@pseolint/core 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -9
- package/dist/ai/prompt.d.ts +1 -1
- package/dist/ai/prompt.d.ts.map +1 -1
- package/dist/ai/prompt.js +13 -1
- package/dist/ai/prompt.js.map +1 -1
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +122 -46
- package/dist/auditor.js.map +1 -1
- package/dist/formatters/console.d.ts +9 -0
- package/dist/formatters/console.d.ts.map +1 -1
- package/dist/formatters/console.js +53 -0
- package/dist/formatters/console.js.map +1 -1
- package/dist/formatters/html.d.ts.map +1 -1
- package/dist/formatters/html.js +363 -135
- package/dist/formatters/html.js.map +1 -1
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -1
- package/dist/rule-references.d.ts.map +1 -1
- package/dist/rule-references.js +8 -0
- package/dist/rule-references.js.map +1 -1
- package/dist/rules/aeo/answer-first.d.ts +18 -0
- package/dist/rules/aeo/answer-first.d.ts.map +1 -0
- package/dist/rules/aeo/answer-first.js +191 -0
- package/dist/rules/aeo/answer-first.js.map +1 -0
- package/dist/rules/aeo/citable-facts.d.ts +9 -0
- package/dist/rules/aeo/citable-facts.d.ts.map +1 -0
- package/dist/rules/aeo/citable-facts.js +90 -0
- package/dist/rules/aeo/citable-facts.js.map +1 -0
- package/dist/rules/aeo/content-modularity.d.ts +11 -0
- package/dist/rules/aeo/content-modularity.d.ts.map +1 -0
- package/dist/rules/aeo/content-modularity.js +107 -0
- package/dist/rules/aeo/content-modularity.js.map +1 -0
- package/dist/rules/aeo/crawler-access.d.ts +25 -0
- package/dist/rules/aeo/crawler-access.d.ts.map +1 -0
- package/dist/rules/aeo/crawler-access.js +116 -0
- package/dist/rules/aeo/crawler-access.js.map +1 -0
- package/dist/rules/aeo/faq-coverage.d.ts +9 -0
- package/dist/rules/aeo/faq-coverage.d.ts.map +1 -0
- package/dist/rules/aeo/faq-coverage.js +71 -0
- package/dist/rules/aeo/faq-coverage.js.map +1 -0
- package/dist/rules/aeo/freshness-signals.d.ts +9 -0
- package/dist/rules/aeo/freshness-signals.d.ts.map +1 -0
- package/dist/rules/aeo/freshness-signals.js +109 -0
- package/dist/rules/aeo/freshness-signals.js.map +1 -0
- package/dist/rules/aeo/llms-txt.d.ts +24 -0
- package/dist/rules/aeo/llms-txt.d.ts.map +1 -0
- package/dist/rules/aeo/llms-txt.js +93 -0
- package/dist/rules/aeo/llms-txt.js.map +1 -0
- package/dist/rules/aeo/non-replicable-value.d.ts +9 -0
- package/dist/rules/aeo/non-replicable-value.d.ts.map +1 -0
- package/dist/rules/aeo/non-replicable-value.js +95 -0
- package/dist/rules/aeo/non-replicable-value.js.map +1 -0
- package/dist/rules/scope.d.ts +12 -0
- package/dist/rules/scope.d.ts.map +1 -0
- package/dist/rules/scope.js +66 -0
- package/dist/rules/scope.js.map +1 -0
- package/dist/types.d.ts +17 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -44,6 +44,14 @@ export * from "./renderer.js";
|
|
|
44
44
|
export * from "./enrich-findings.js";
|
|
45
45
|
export * from "./data-source-loader.js";
|
|
46
46
|
export * from "./rules/data/data-binding.js";
|
|
47
|
+
export { llmsTxtRule, validateLlmsTxt } from "./rules/aeo/llms-txt.js";
|
|
48
|
+
export { crawlerAccessRule, parseRobotsByUserAgent, isFullyDisallowed, DEFAULT_AI_CRAWLERS } from "./rules/aeo/crawler-access.js";
|
|
49
|
+
export { freshnessSignalsRule } from "./rules/aeo/freshness-signals.js";
|
|
50
|
+
export { faqCoverageRule } from "./rules/aeo/faq-coverage.js";
|
|
51
|
+
export { answerFirstRule, extractFirstParagraph } from "./rules/aeo/answer-first.js";
|
|
52
|
+
export { citableFactsRule } from "./rules/aeo/citable-facts.js";
|
|
53
|
+
export { nonReplicableValueRule } from "./rules/aeo/non-replicable-value.js";
|
|
54
|
+
export { contentModularityRule } from "./rules/aeo/content-modularity.js";
|
|
47
55
|
export { cachedFetch, cacheKeyFor } from "./cache.js";
|
|
48
56
|
export { stratifiedSample, inferUrlTemplate } from "./stratified-sample.js";
|
|
49
57
|
export { readState, writeState, computeContentHash, normalizeHtmlForHash, STATE_SCHEMA_VERSION } from "./state.js";
|
|
@@ -53,4 +61,5 @@ export { PROMPT_VERSION, assignFindingId } from "./ai/prompt.js";
|
|
|
53
61
|
export { estimateCostUsd } from "./ai/cost.js";
|
|
54
62
|
export * from "./telemetry/index.js";
|
|
55
63
|
export { promptTriageFeedback } from "./ai/feedback-prompt.js";
|
|
64
|
+
export { RULE_SCOPE, isRuleAllowedInDiff } from "./rules/scope.js";
|
|
56
65
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,oBAAoB,CAAC;AACnC,cAAc,yBAAyB,CAAC;AACxC,cAAc,6BAA6B,CAAC;AAC5C,cAAc,gCAAgC,CAAC;AAC/C,cAAc,6BAA6B,CAAC;AAC5C,cAAc,8BAA8B,CAAC;AAC7C,cAAc,mCAAmC,CAAC;AAClD,cAAc,oCAAoC,CAAC;AACnD,cAAc,sCAAsC,CAAC;AACrD,cAAc,iCAAiC,CAAC;AAChD,cAAc,mCAAmC,CAAC;AAClD,cAAc,iCAAiC,CAAC;AAChD,cAAc,uCAAuC,CAAC;AACtD,cAAc,oCAAoC,CAAC;AACnD,cAAc,mCAAmC,CAAC;AAClD,cAAc,iCAAiC,CAAC;AAChD,cAAc,+BAA+B,CAAC;AAC9C,cAAc,4BAA4B,CAAC;AAC3C,cAAc,6BAA6B,CAAC;AAC5C,cAAc,uCAAuC,CAAC;AACtD,cAAc,4BAA4B,CAAC;AAC3C,cAAc,uCAAuC,CAAC;AACtD,cAAc,4CAA4C,CAAC;AAC3D,cAAc,yCAAyC,CAAC;AACxD,cAAc,sCAAsC,CAAC;AACrD,cAAc,gCAAgC,CAAC;AAC/C,cAAc,0BAA0B,CAAC;AACzC,cAAc,iCAAiC,CAAC;AAChD,cAAc,sCAAsC,CAAC;AACrD,cAAc,iCAAiC,CAAC;AAChD,cAAc,mCAAmC,CAAC;AAClD,cAAc,+BAA+B,CAAC;AAC9C,cAAc,wBAAwB,CAAC;AACvC,cAAc,mCAAmC,CAAC;AAClD,cAAc,uCAAuC,CAAC;AACtD,cAAc,iCAAiC,CAAC;AAChD,cAAc,sBAAsB,CAAC;AACrC,cAAc,sBAAsB,CAAC;AACrC,cAAc,uBAAuB,CAAC;AACtC,cAAc,eAAe,CAAC;AAC9B,cAAc,sBAAsB,CAAC;AACrC,cAAc,yBAAyB,CAAC;AACxC,cAAc,8BAA8B,CAAC;AAC7C,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAGtD,OAAO,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC5E,OAAO,EAAE,SAAS,EAAE,UAAU,EAAE,kBAAkB,EAAE,oBAAoB,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AAYnH,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAEhD,OAAO,EAAE,mBAAmB,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AAE7E,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAE/C,cAAc,sBAAsB,CAAC;AAErC,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,oBAAoB,CAAC;AACnC,cAAc,yBAAyB,CAAC;AACxC,cAAc,6BAA6B,CAAC;AAC5C,cAAc,gCAAgC,CAAC;AAC/C,cAAc,6BAA6B,CAAC;AAC5C,cAAc,8BAA8B,CAAC;AAC7C,cAAc,mCAAmC,CAAC;AAClD,cAAc,oCAAoC,CAAC;AACnD,cAAc,sCAAsC,CAAC;AACrD,cAAc,iCAAiC,CAAC;AAChD,cAAc,mCAAmC,CAAC;AAClD,cAAc,iCAAiC,CAAC;AAChD,cAAc,uCAAuC,CAAC;AACtD,cAAc,oCAAoC,CAAC;AACnD,cAAc,mCAAmC,CAAC;AAClD,cAAc,iCAAiC,CAAC;AAChD,cAAc,+BAA+B,CAAC;AAC9C,cAAc,4BAA4B,CAAC;AAC3C,cAAc,6BAA6B,CAAC;AAC5C,cAAc,uCAAuC,CAAC;AACtD,cAAc,4BAA4B,CAAC;AAC3C,cAAc,uCAAuC,CAAC;AACtD,cAAc,4CAA4C,CAAC;AAC3D,cAAc,yCAAyC,CAAC;AACxD,cAAc,sCAAsC,CAAC;AACrD,cAAc,gCAAgC,CAAC;AAC/C,cAAc,0BAA0B,CAAC;AACzC,cAAc,iCAAiC,CAAC;AAChD,cAAc,sCAAsC,CAAC;AACrD,cAAc,iCAAiC,CAAC;AAChD,cAAc,mCAAmC,CAAC;AAClD,cAAc,+BAA+B,CAAC;AAC9C,cAAc,wBAAwB,CAAC;AACvC,cAAc,mCAAmC,CAAC;AAClD,cAAc,uCAAuC,CAAC;AACtD,cAAc,iCAAiC,CAAC;AAChD,cAAc,sBAAsB,CAAC;AACrC,cAAc,sBAAsB,CAAC;AACrC,cAAc,uBAAuB,CAAC;AACtC,cAAc,eAAe,CAAC;AAC9B,cAAc,sBAAsB,CAAC;AACrC,cAAc,yBAAyB,CAAC;AACxC,cAAc,8BAA8B,CAAC;AAC7C,OAAO,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AAClI,OAAO,EAAE,oBAAoB,EAAE,MAAM,kCAAkC,CAAC;AACxE,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,qBAAqB,EAAE,MAAM,6BAA6B,CAAC;AACrF,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,sBAAsB,EAAE,MAAM,qCAAqC,CAAC;AAC7E,OAAO,EAAE,qBAAqB,EAAE,MAAM,mCAAmC,CAAC;AAC1E,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAGtD,OAAO,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC5E,OAAO,EAAE,SAAS,EAAE,UAAU,EAAE,kBAAkB,EAAE,oBAAoB,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AAYnH,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAEhD,OAAO,EAAE,mBAAmB,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AAE7E,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAE/C,cAAc,sBAAsB,CAAC;AAErC,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AAE/D,OAAO,EAAE,UAAU,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"rule-references.d.ts","sourceRoot":"","sources":["../src/rule-references.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"rule-references.d.ts","sourceRoot":"","sources":["../src/rule-references.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CA4ClD,CAAC"}
|
package/dist/rule-references.js
CHANGED
|
@@ -34,5 +34,13 @@ export const RULE_REFERENCES = {
|
|
|
34
34
|
"cannibal/url-pattern": "https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls",
|
|
35
35
|
"data/missing-binding": "https://developers.google.com/search/docs/essentials/spam-policies#thin-content",
|
|
36
36
|
"data/identical-across-pages": "https://developers.google.com/search/docs/essentials/spam-policies#scraped-content",
|
|
37
|
+
"aeo/llms-txt": "https://llmstxt.org",
|
|
38
|
+
"aeo/crawler-access": "https://www.robotstxt.org/robotstxt.html",
|
|
39
|
+
"aeo/freshness-signals": "https://developers.google.com/search/docs/appearance/publication-dates",
|
|
40
|
+
"aeo/faq-coverage": "https://developers.google.com/search/docs/appearance/structured-data/faqpage",
|
|
41
|
+
"aeo/answer-first": "https://developers.google.com/search/docs/appearance/featured-snippets",
|
|
42
|
+
"aeo/citable-facts": "https://developers.google.com/search/docs/fundamentals/creating-helpful-content",
|
|
43
|
+
"aeo/non-replicable-value": "https://developers.google.com/search/docs/fundamentals/creating-helpful-content",
|
|
44
|
+
"aeo/content-modularity": "https://developers.google.com/search/docs/appearance/structured-data/intro-structured-data",
|
|
37
45
|
};
|
|
38
46
|
//# sourceMappingURL=rule-references.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"rule-references.js","sourceRoot":"","sources":["../src/rule-references.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,eAAe,GAA2B;IACrD,qBAAqB,EAAE,oFAAoF;IAC3G,kBAAkB,EAAE,kFAAkF;IACtG,mBAAmB,EAAE,iFAAiF;IACtG,wBAAwB,EAAE,iFAAiF;IAC3G,yBAAyB,EAAE,kFAAkF;IAC7G,2BAA2B,EAAE,kGAAkG;IAC/H,sBAAsB,EAAE,kFAAkF;IAC1G,wBAAwB,EAAE,kFAAkF;IAC5G,sBAAsB,EAAE,iFAAiF;IACzG,yBAAyB,EAAE,gFAAgF;IAC3G,4BAA4B,EAAE,uEAAuE;IACrG,wBAAwB,EAAE,sFAAsF;IAChH,sBAAsB,EAAE,sFAAsF;IAC9G,oBAAoB,EAAE,6EAA6E;IACnG,iBAAiB,EAAE,6EAA6E;IAChG,4BAA4B,EAAE,6EAA6E;IAC3G,iBAAiB,EAAE,6EAA6E;IAChG,kBAAkB,EAAE,6EAA6E;IACjG,wBAAwB,EAAE,0EAA0E;IACpG,4BAA4B,EAAE,wFAAwF;IACtH,iCAAiC,EAAE,wFAAwF;IAC3H,8BAA8B,EAAE,4EAA4E;IAC5G,2BAA2B,EAAE,+EAA+E;IAC5G,qBAAqB,EAAE,2EAA2E;IAClG,eAAe,EAAE,6EAA6E;IAC9F,sBAAsB,EAAE,8DAA8D;IACtF,2BAA2B,EAAE,iGAAiG;IAC9H,sBAAsB,EAAE,4FAA4F;IACpH,wBAAwB,EAAE,4FAA4F;IACtH,oBAAoB,EAAE,4FAA4F;IAClH,wBAAwB,EAAE,wFAAwF;IAClH,4BAA4B,EAAE,wFAAwF;IACtH,sBAAsB,EAAE,wFAAwF;IAChH,sBAAsB,EAAE,iFAAiF;IACzG,6BAA6B,EAAE,oFAAoF;
|
|
1
|
+
{"version":3,"file":"rule-references.js","sourceRoot":"","sources":["../src/rule-references.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,eAAe,GAA2B;IACrD,qBAAqB,EAAE,oFAAoF;IAC3G,kBAAkB,EAAE,kFAAkF;IACtG,mBAAmB,EAAE,iFAAiF;IACtG,wBAAwB,EAAE,iFAAiF;IAC3G,yBAAyB,EAAE,kFAAkF;IAC7G,2BAA2B,EAAE,kGAAkG;IAC/H,sBAAsB,EAAE,kFAAkF;IAC1G,wBAAwB,EAAE,kFAAkF;IAC5G,sBAAsB,EAAE,iFAAiF;IACzG,yBAAyB,EAAE,gFAAgF;IAC3G,4BAA4B,EAAE,uEAAuE;IACrG,wBAAwB,EAAE,sFAAsF;IAChH,sBAAsB,EAAE,sFAAsF;IAC9G,oBAAoB,EAAE,6EAA6E;IACnG,iBAAiB,EAAE,6EAA6E;IAChG,4BAA4B,EAAE,6EAA6E;IAC3G,iBAAiB,EAAE,6EAA6E;IAChG,kBAAkB,EAAE,6EAA6E;IACjG,wBAAwB,EAAE,0EAA0E;IACpG,4BAA4B,EAAE,wFAAwF;IACtH,iCAAiC,EAAE,wFAAwF;IAC3H,8BAA8B,EAAE,4EAA4E;IAC5G,2BAA2B,EAAE,+EAA+E;IAC5G,qBAAqB,EAAE,2EAA2E;IAClG,eAAe,EAAE,6EAA6E;IAC9F,sBAAsB,EAAE,8DAA8D;IACtF,2BAA2B,EAAE,iGAAiG;IAC9H,sBAAsB,EAAE,4FAA4F;IACpH,wBAAwB,EAAE,4FAA4F;IACtH,oBAAoB,EAAE,4FAA4F;IAClH,wBAAwB,EAAE,wFAAwF;IAClH,4BAA4B,EAAE,wFAAwF;IACtH,sBAAsB,EAAE,wFAAwF;IAChH,sBAAsB,EAAE,iFAAiF;IACzG,6BAA6B,EAAE,oFAAoF;IACnH,cAAc,EAAE,qBAAqB;IACrC,oBAAoB,EAAE,0CAA0C;IAChE,uBAAuB,EAAE,wEAAwE;IACjG,kBAAkB,EAAE,8EAA8E;IAClG,kBAAkB,EAAE,wEAAwE;IAC5F,mBAAmB,EAAE,iFAAiF;IACtG,0BAA0B,EAAE,iFAAiF;IAC7G,wBAAwB,EAAE,4FAA4F;CACvH,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { EntityMaskPattern, ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
+
export interface AnswerFirstOptions {
|
|
3
|
+
/** First paragraph should fit under this many words to be "extractable". Default: 100. */
|
|
4
|
+
maxFirstParagraphWords?: number;
|
|
5
|
+
/** Opener longer than this is penalized as too long to extract. Default: 150. */
|
|
6
|
+
paragraphTooLongWords?: number;
|
|
7
|
+
/** Minimum score-points for a passing opener. Default: 2. */
|
|
8
|
+
minScoreToPass?: number;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Extract the first paragraph-ish content block after the H1. Walks the DOM in document
|
|
12
|
+
* order starting at the H1 and finds the next `<p>` with >=5 words that is NOT inside a
|
|
13
|
+
* nav/aside/footer/header/breadcrumb. Falls back to the first qualifying `<p>` inside
|
|
14
|
+
* `main`/`article` if the H1-sibling walk finds nothing.
|
|
15
|
+
*/
|
|
16
|
+
export declare function extractFirstParagraph(html: string): string;
|
|
17
|
+
export declare function answerFirstRule(pages: ParsedPage[], entityPatterns: EntityMaskPattern[], options?: AnswerFirstOptions): RuleResult[];
|
|
18
|
+
//# sourceMappingURL=answer-first.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"answer-first.d.ts","sourceRoot":"","sources":["../../../src/rules/aeo/answer-first.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAEhF,MAAM,WAAW,kBAAkB;IACjC,0FAA0F;IAC1F,sBAAsB,CAAC,EAAE,MAAM,CAAC;IAChC,iFAAiF;IACjF,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,6DAA6D;IAC7D,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AA6BD;;;;;GAKG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CA0D1D;AA8CD,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,cAAc,EAAE,iBAAiB,EAAE,EACnC,OAAO,CAAC,EAAE,kBAAkB,GAC3B,UAAU,EAAE,CA0Dd"}
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import { load } from "cheerio";
|
|
2
|
+
const BOILERPLATE_PATTERNS = [
|
|
3
|
+
/^\s*(welcome\s+to|our\s+(complete|comprehensive|ultimate)|in\s+this\s+(post|article|guide)|home\s*»|skip\s+to)/i,
|
|
4
|
+
/^\s*(generate|create|build)\s+your\s+/i,
|
|
5
|
+
];
|
|
6
|
+
const NUMBER_PATTERNS = [
|
|
7
|
+
/\$[\d,]+(\.\d{2})?/,
|
|
8
|
+
/\b\d+(\.\d+)?\s*%/,
|
|
9
|
+
/\b\d+\s*(days?|weeks?|months?|years?|business\s+days?|hours?|minutes?)\b/i,
|
|
10
|
+
/\b(19|20)\d{2}\b/, // years
|
|
11
|
+
];
|
|
12
|
+
/**
|
|
13
|
+
* "Proper-noun entity" detector. Accepts multi-word proper nouns (e.g. "Secretary of State",
|
|
14
|
+
* "Delaware Division") AND standalone capitalized entities that are not sentence-initial
|
|
15
|
+
* (e.g. "...with the Delaware office", "filed in California").
|
|
16
|
+
*/
|
|
17
|
+
const MULTI_WORD_PROPER_NOUN = /\b[A-Z][a-z]+(?:\s+(?:of\s+|de\s+|and\s+)?[A-Z][a-z]+)+\b/;
|
|
18
|
+
const SINGLE_WORD_PROPER_NOUN = /(?<!^)(?<=\s)[A-Z][a-z]{2,}\b/;
|
|
19
|
+
const FORM_PATTERN = /\bForm\s+[A-Z0-9-]+\b/i;
|
|
20
|
+
function wordCount(text) {
|
|
21
|
+
const trimmed = text.trim();
|
|
22
|
+
if (!trimmed)
|
|
23
|
+
return 0;
|
|
24
|
+
return trimmed.split(/\s+/).length;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Extract the first paragraph-ish content block after the H1. Walks the DOM in document
|
|
28
|
+
* order starting at the H1 and finds the next `<p>` with >=5 words that is NOT inside a
|
|
29
|
+
* nav/aside/footer/header/breadcrumb. Falls back to the first qualifying `<p>` inside
|
|
30
|
+
* `main`/`article` if the H1-sibling walk finds nothing.
|
|
31
|
+
*/
|
|
32
|
+
export function extractFirstParagraph(html) {
|
|
33
|
+
const $ = load(html);
|
|
34
|
+
const h1 = $("h1").first();
|
|
35
|
+
const excludeSel = "nav, aside, footer, header, [role=navigation], .breadcrumbs, .breadcrumb";
|
|
36
|
+
const isExcluded = (el) => {
|
|
37
|
+
const $el = $(el);
|
|
38
|
+
return $el.closest(excludeSel).length > 0;
|
|
39
|
+
};
|
|
40
|
+
const pickFirstQualifyingP = (scope) => {
|
|
41
|
+
let found = null;
|
|
42
|
+
scope.find("p").each((_, el) => {
|
|
43
|
+
if (found)
|
|
44
|
+
return false;
|
|
45
|
+
if (isExcluded(el))
|
|
46
|
+
return undefined;
|
|
47
|
+
const text = $(el).text().trim();
|
|
48
|
+
if (wordCount(text) >= 5) {
|
|
49
|
+
found = text;
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
return undefined;
|
|
53
|
+
});
|
|
54
|
+
return found;
|
|
55
|
+
};
|
|
56
|
+
// Walk forward from the H1 through the document, checking every `<p>` that comes after it.
|
|
57
|
+
// cheerio has no compareDocumentPosition; emulate with a boolean gate in document order.
|
|
58
|
+
if (h1.length > 0) {
|
|
59
|
+
const h1El = h1.get(0);
|
|
60
|
+
let passedH1 = false;
|
|
61
|
+
let found = null;
|
|
62
|
+
$("h1, p").each((_, el) => {
|
|
63
|
+
if (found)
|
|
64
|
+
return false;
|
|
65
|
+
if (el === h1El) {
|
|
66
|
+
passedH1 = true;
|
|
67
|
+
return undefined;
|
|
68
|
+
}
|
|
69
|
+
if (!passedH1)
|
|
70
|
+
return undefined;
|
|
71
|
+
const tag = el.tagName?.toLowerCase?.();
|
|
72
|
+
if (tag !== "p")
|
|
73
|
+
return undefined;
|
|
74
|
+
if (isExcluded(el))
|
|
75
|
+
return undefined;
|
|
76
|
+
const text = $(el).text().trim();
|
|
77
|
+
if (wordCount(text) >= 5) {
|
|
78
|
+
found = text;
|
|
79
|
+
return false;
|
|
80
|
+
}
|
|
81
|
+
return undefined;
|
|
82
|
+
});
|
|
83
|
+
if (found)
|
|
84
|
+
return found;
|
|
85
|
+
}
|
|
86
|
+
// Fallbacks in order of specificity.
|
|
87
|
+
for (const scope of [$("main").first(), $("article").first(), $("body").first()]) {
|
|
88
|
+
if (scope.length === 0)
|
|
89
|
+
continue;
|
|
90
|
+
const found = pickFirstQualifyingP(scope);
|
|
91
|
+
if (found)
|
|
92
|
+
return found;
|
|
93
|
+
}
|
|
94
|
+
return "";
|
|
95
|
+
}
|
|
96
|
+
function applyEntityMask(text, patterns) {
|
|
97
|
+
let out = text;
|
|
98
|
+
for (const p of patterns) {
|
|
99
|
+
out = out.replace(p.pattern, p.placeholder);
|
|
100
|
+
}
|
|
101
|
+
return out;
|
|
102
|
+
}
|
|
103
|
+
function hasConcreteFact(text) {
|
|
104
|
+
return NUMBER_PATTERNS.some((re) => re.test(text)) || FORM_PATTERN.test(text);
|
|
105
|
+
}
|
|
106
|
+
function hasNamedEntity(text, entityPatterns) {
|
|
107
|
+
if (MULTI_WORD_PROPER_NOUN.test(text))
|
|
108
|
+
return true;
|
|
109
|
+
if (SINGLE_WORD_PROPER_NOUN.test(text))
|
|
110
|
+
return true;
|
|
111
|
+
// A user-defined entity pattern matching the opener also counts as a named entity —
|
|
112
|
+
// this is how pSEO operators declare their primary entity dimension (states, cities, etc.).
|
|
113
|
+
for (const p of entityPatterns) {
|
|
114
|
+
if (p.pattern.test(text)) {
|
|
115
|
+
// Reset lastIndex for global regexes so subsequent callers see fresh state.
|
|
116
|
+
p.pattern.lastIndex = 0;
|
|
117
|
+
return true;
|
|
118
|
+
}
|
|
119
|
+
p.pattern.lastIndex = 0;
|
|
120
|
+
}
|
|
121
|
+
return false;
|
|
122
|
+
}
|
|
123
|
+
function hasCompleteSentence(text) {
|
|
124
|
+
return /[.!?]\s*$/.test(text.trim()) && wordCount(text) >= 8;
|
|
125
|
+
}
|
|
126
|
+
function isBoilerplateOpener(text) {
|
|
127
|
+
return BOILERPLATE_PATTERNS.some((re) => re.test(text));
|
|
128
|
+
}
|
|
129
|
+
export function answerFirstRule(pages, entityPatterns, options) {
|
|
130
|
+
const maxWords = options?.maxFirstParagraphWords ?? 100;
|
|
131
|
+
const tooLong = options?.paragraphTooLongWords ?? 150;
|
|
132
|
+
const minPass = options?.minScoreToPass ?? 2;
|
|
133
|
+
const scored = [];
|
|
134
|
+
for (const page of pages) {
|
|
135
|
+
const paragraph = extractFirstParagraph(page.html);
|
|
136
|
+
if (!paragraph)
|
|
137
|
+
continue;
|
|
138
|
+
const opener = paragraph.split(/\s+/).slice(0, maxWords).join(" ");
|
|
139
|
+
const wc = wordCount(paragraph);
|
|
140
|
+
let score = 0;
|
|
141
|
+
if (hasConcreteFact(opener))
|
|
142
|
+
score += 1;
|
|
143
|
+
if (hasNamedEntity(opener, entityPatterns))
|
|
144
|
+
score += 1;
|
|
145
|
+
if (hasCompleteSentence(opener))
|
|
146
|
+
score += 1;
|
|
147
|
+
if (wc > tooLong)
|
|
148
|
+
score -= 1;
|
|
149
|
+
if (isBoilerplateOpener(opener))
|
|
150
|
+
score -= 2;
|
|
151
|
+
const masked = applyEntityMask(opener, entityPatterns);
|
|
152
|
+
scored.push({ url: page.url, score, paragraph: opener, wordCount: wc, masked });
|
|
153
|
+
}
|
|
154
|
+
const maskFrequency = new Map();
|
|
155
|
+
for (const s of scored) {
|
|
156
|
+
maskFrequency.set(s.masked, (maskFrequency.get(s.masked) ?? 0) + 1);
|
|
157
|
+
}
|
|
158
|
+
const templateThreshold = Math.max(3, Math.floor(scored.length * 0.2));
|
|
159
|
+
const findings = [];
|
|
160
|
+
for (const s of scored) {
|
|
161
|
+
let finalScore = s.score;
|
|
162
|
+
const isTemplated = (maskFrequency.get(s.masked) ?? 0) >= templateThreshold && scored.length > 3;
|
|
163
|
+
if (isTemplated)
|
|
164
|
+
finalScore -= 3;
|
|
165
|
+
if (finalScore >= minPass)
|
|
166
|
+
continue;
|
|
167
|
+
const severity = finalScore <= 0 ? "error" : "warning";
|
|
168
|
+
const reasons = [];
|
|
169
|
+
if (isTemplated)
|
|
170
|
+
reasons.push("opener is identical to other pages after entity masking");
|
|
171
|
+
if (s.wordCount > tooLong)
|
|
172
|
+
reasons.push(`opener is ${s.wordCount} words (too long to extract)`);
|
|
173
|
+
if (isBoilerplateOpener(s.paragraph))
|
|
174
|
+
reasons.push("opener is boilerplate ('Welcome to...', 'Generate your...')");
|
|
175
|
+
if (!hasConcreteFact(s.paragraph))
|
|
176
|
+
reasons.push("no specific numbers, dates, or dollar amounts");
|
|
177
|
+
if (!hasNamedEntity(s.paragraph, entityPatterns))
|
|
178
|
+
reasons.push("no named entities (agencies, laws, proper nouns)");
|
|
179
|
+
findings.push({
|
|
180
|
+
ruleId: "aeo/answer-first",
|
|
181
|
+
severity,
|
|
182
|
+
message: `${s.url} does not open with a direct, extractable answer${reasons.length ? `: ${reasons.join("; ")}` : "."}`,
|
|
183
|
+
pageUrl: s.url,
|
|
184
|
+
fix: `Restructure the template to open with entity-specific facts. Instead of boilerplate or a topic preamble, ` +
|
|
185
|
+
`lead with a complete-answer sentence containing the key number, date, agency, or form name a reader ` +
|
|
186
|
+
`(or AI Overview) would cite. Target: <${maxWords} words, at least one concrete number, at least one named entity.`,
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
return findings;
|
|
190
|
+
}
|
|
191
|
+
//# sourceMappingURL=answer-first.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"answer-first.js","sourceRoot":"","sources":["../../../src/rules/aeo/answer-first.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAY/B,MAAM,oBAAoB,GAAa;IACrC,iHAAiH;IACjH,wCAAwC;CACzC,CAAC;AAEF,MAAM,eAAe,GAAa;IAChC,oBAAoB;IACpB,mBAAmB;IACnB,2EAA2E;IAC3E,kBAAkB,EAAE,QAAQ;CAC7B,CAAC;AAEF;;;;GAIG;AACH,MAAM,sBAAsB,GAAG,2DAA2D,CAAC;AAC3F,MAAM,uBAAuB,GAAG,+BAA+B,CAAC;AAChE,MAAM,YAAY,GAAG,wBAAwB,CAAC;AAE9C,SAAS,SAAS,CAAC,IAAY;IAC7B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,CAAC,CAAC;IACvB,OAAO,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;AACrC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,qBAAqB,CAAC,IAAY;IAChD,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;IAE3B,MAAM,UAAU,GAAG,0EAA0E,CAAC;IAC9F,MAAM,UAAU,GAAG,CAAC,EAAW,EAAW,EAAE;QAC1C,MAAM,GAAG,GAAG,CAAC,CAAC,EAAW,CAAC,CAAC;QAC3B,OAAO,GAAG,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAC5C,CAAC,CAAC;IAEF,MAAM,oBAAoB,GAAG,CAAC,KAA2B,EAAiB,EAAE;QAC1E,IAAI,KAAK,GAAkB,IAAI,CAAC;QAChC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;YAC7B,IAAI,KAAK;gBAAE,OAAO,KAAK,CAAC;YACxB,IAAI,UAAU,CAAC,EAAE,CAAC;gBAAE,OAAO,SAAS,CAAC;YACrC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YACjC,IAAI,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzB,KAAK,GAAG,IAAI,CAAC;gBACb,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,SAAS,CAAC;QACnB,CAAC,CAAC,CAAC;QACH,OAAO,KAAK,CAAC;IACf,CAAC,CAAC;IAEF,2FAA2F;IAC3F,yFAAyF;IACzF,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAClB,MAAM,IAAI,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACvB,IAAI,QAAQ,GAAG,KAAK,CAAC;QACrB,IAAI,KAAK,GAAkB,IAAI,CAAC;QAChC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;YACxB,IAAI,KAAK;gBAAE,OAAO,KAAK,CAAC;YACxB,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC;gBAChB,QAAQ,GAAG,IAAI,CAAC;gBAChB,OAAO,SAAS,CAAC;YACnB,CAAC;YACD,IAAI,CAAC,QAAQ;gBAAE,OAAO,SAAS,CAAC;YAChC,MAAM,GAAG,GAAI,EAA2B,CAAC,OAAO,EAAE,WAAW,EAAE,EAAE,CAAC;YAClE,IAAI,GAAG,KAAK,GAAG;gBAAE,OAAO,SAAS,CAAC;YAClC,IAAI,UAAU,CAAC,EAAE,CAAC;gBAAE,OAAO,SAAS,CAAC;YACrC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YACjC,IAAI,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzB,KAAK,GAAG,IAAI,CAAC;gBACb,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,SAAS,CAAC;QACnB,CAAC,CAAC,CAAC;QACH,IAAI,KAAK;YAAE,OAAO,KAAK,CAAC;IAC1B,CAAC;IAED,qCAAqC;IACrC,KAAK,MAAM,KAAK,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC;QACjF,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QACjC,MAAM,KAAK,GAAG,oBAAoB,CAAC,KAAK,CAAC,CAAC;QAC1C,IAAI,KAAK;YAAE,OAAO,KAAK,CAAC;IAC1B,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,QAA6B;IAClE,IAAI,GAAG,GAAG,IAAI,CAAC;IACf,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,WAAW,CAAC,CAAC;IAC9C,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,eAAe,CAAC,IAAY;IACnC,OAAO,eAAe,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAChF,CAAC;AAED,SAAS,cAAc,CAAC,IAAY,EAAE,cAAmC;IACvE,IAAI,sBAAsB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACnD,IAAI,uBAAuB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACpD,oFAAoF;IACpF,4FAA4F;IAC5F,KAAK,MAAM,CAAC,IAAI,cAAc,EAAE,CAAC;QAC/B,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACzB,4EAA4E;YAC5E,CAAC,CAAC,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;YACxB,OAAO,IAAI,CAAC;QACd,CAAC;QACD,CAAC,CAAC,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;IAC1B,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAY;IACvC,OAAO,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,IAAI,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC/D,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAY;IACvC,OAAO,oBAAoB,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;AAC1D,CAAC;AAUD,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,cAAmC,EACnC,OAA4B;IAE5B,MAAM,QAAQ,GAAG,OAAO,EAAE,sBAAsB,IAAI,GAAG,CAAC;IACxD,MAAM,OAAO,GAAG,OAAO,EAAE,qBAAqB,IAAI,GAAG,CAAC;IACtD,MAAM,OAAO,GAAG,OAAO,EAAE,cAAc,IAAI,CAAC,CAAC;IAE7C,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,SAAS,GAAG,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnD,IAAI,CAAC,SAAS;YAAE,SAAS;QACzB,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACnE,MAAM,EAAE,GAAG,SAAS,CAAC,SAAS,CAAC,CAAC;QAEhC,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,eAAe,CAAC,MAAM,CAAC;YAAE,KAAK,IAAI,CAAC,CAAC;QACxC,IAAI,cAAc,CAAC,MAAM,EAAE,cAAc,CAAC;YAAE,KAAK,IAAI,CAAC,CAAC;QACvD,IAAI,mBAAmB,CAAC,MAAM,CAAC;YAAE,KAAK,IAAI,CAAC,CAAC;QAC5C,IAAI,EAAE,GAAG,OAAO;YAAE,KAAK,IAAI,CAAC,CAAC;QAC7B,IAAI,mBAAmB,CAAC,MAAM,CAAC;YAAE,KAAK,IAAI,CAAC,CAAC;QAE5C,MAAM,MAAM,GAAG,eAAe,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;QACvD,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC;IAClF,CAAC;IAED,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB,CAAC;IAChD,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACvB,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACtE,CAAC;IACD,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC;IAEvE,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACvB,IAAI,UAAU,GAAG,CAAC,CAAC,KAAK,CAAC;QACzB,MAAM,WAAW,GAAG,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,iBAAiB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC;QACjG,IAAI,WAAW;YAAE,UAAU,IAAI,CAAC,CAAC;QAEjC,IAAI,UAAU,IAAI,OAAO;YAAE,SAAS;QAEpC,MAAM,QAAQ,GAAG,UAAU,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC;QACvD,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,IAAI,WAAW;YAAE,OAAO,CAAC,IAAI,CAAC,yDAAyD,CAAC,CAAC;QACzF,IAAI,CAAC,CAAC,SAAS,GAAG,OAAO;YAAE,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,SAAS,8BAA8B,CAAC,CAAC;QAChG,IAAI,mBAAmB,CAAC,CAAC,CAAC,SAAS,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,6DAA6D,CAAC,CAAC;QAClH,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,SAAS,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,+CAA+C,CAAC,CAAC;QACjG,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,SAAS,EAAE,cAAc,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,kDAAkD,CAAC,CAAC;QAEnH,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,kBAAkB;YAC1B,QAAQ;YACR,OAAO,EAAE,GAAG,CAAC,CAAC,GAAG,mDAAmD,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE;YACtH,OAAO,EAAE,CAAC,CAAC,GAAG;YACd,GAAG,EACD,2GAA2G;gBAC3G,sGAAsG;gBACtG,yCAAyC,QAAQ,kEAAkE;SACtH,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { EntityMaskPattern, ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
+
export interface CitableFactsOptions {
|
|
3
|
+
/** Below this count → error. Default: 3. */
|
|
4
|
+
minFactsPerPage?: number;
|
|
5
|
+
/** At or above this count → pass. Default: 8. */
|
|
6
|
+
targetFactsPerPage?: number;
|
|
7
|
+
}
|
|
8
|
+
export declare function citableFactsRule(pages: ParsedPage[], entityPatterns: EntityMaskPattern[], options?: CitableFactsOptions): RuleResult[];
|
|
9
|
+
//# sourceMappingURL=citable-facts.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"citable-facts.d.ts","sourceRoot":"","sources":["../../../src/rules/aeo/citable-facts.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAEhF,MAAM,WAAW,mBAAmB;IAClC,4CAA4C;IAC5C,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,iDAAiD;IACjD,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAkCD,wBAAgB,gBAAgB,CAC9B,KAAK,EAAE,UAAU,EAAE,EACnB,cAAc,EAAE,iBAAiB,EAAE,EACnC,OAAO,CAAC,EAAE,mBAAmB,GAC5B,UAAU,EAAE,CAmEd"}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
const FACT_PATTERNS = [
|
|
2
|
+
{ name: "dollar", regex: /\$[\d,]+(\.\d{2})?/g },
|
|
3
|
+
{ name: "percent", regex: /\b\d+(\.\d+)?\s*%/g },
|
|
4
|
+
{
|
|
5
|
+
name: "timeframe",
|
|
6
|
+
regex: /\b\d+(?:-\d+)?\s*(business\s+days?|days?|weeks?|months?|years?|hours?|minutes?)\b/gi,
|
|
7
|
+
},
|
|
8
|
+
{
|
|
9
|
+
name: "date",
|
|
10
|
+
regex: /\b(january|february|march|april|may|june|july|august|september|october|november|december)\s+\d{1,2}(?:,\s*\d{4})?\b/gi,
|
|
11
|
+
},
|
|
12
|
+
{ name: "isoDate", regex: /\b\d{4}-\d{2}-\d{2}\b/g },
|
|
13
|
+
{ name: "form", regex: /\bForm\s+[A-Z0-9][A-Z0-9-]*\b/g },
|
|
14
|
+
];
|
|
15
|
+
function extractRawFacts(text) {
|
|
16
|
+
const out = new Set();
|
|
17
|
+
for (const { regex } of FACT_PATTERNS) {
|
|
18
|
+
const matches = text.match(regex);
|
|
19
|
+
if (!matches)
|
|
20
|
+
continue;
|
|
21
|
+
for (const m of matches)
|
|
22
|
+
out.add(m.trim().toLowerCase());
|
|
23
|
+
}
|
|
24
|
+
return Array.from(out);
|
|
25
|
+
}
|
|
26
|
+
function applyEntityMask(text, patterns) {
|
|
27
|
+
let out = text;
|
|
28
|
+
for (const p of patterns)
|
|
29
|
+
out = out.replace(p.pattern, p.placeholder);
|
|
30
|
+
return out;
|
|
31
|
+
}
|
|
32
|
+
export function citableFactsRule(pages, entityPatterns, options) {
|
|
33
|
+
const minFacts = options?.minFactsPerPage ?? 3;
|
|
34
|
+
const targetFacts = options?.targetFactsPerPage ?? 8;
|
|
35
|
+
const findings = [];
|
|
36
|
+
// Build a global template-fact set: facts that appear verbatim on a majority of pages
|
|
37
|
+
// after entity masking — those are "template facts", not entity-specific data points.
|
|
38
|
+
//
|
|
39
|
+
// Scaling by page count:
|
|
40
|
+
// n == 1 → no template detection possible, all facts count as entity-specific.
|
|
41
|
+
// n in 2..=5 → fact is template when it appears on ALL pages (strict).
|
|
42
|
+
// n > 5 → fact is template when it appears on >= 50% of pages.
|
|
43
|
+
//
|
|
44
|
+
// Earlier cut-off (`> 5 ? ceil(n*0.5) : n + 1`) made small-sample audits silently
|
|
45
|
+
// ignore template facts, so a 3-page audit with "$70" on every page looked clean.
|
|
46
|
+
const templateThreshold = pages.length <= 1 ? Infinity :
|
|
47
|
+
pages.length <= 5 ? pages.length :
|
|
48
|
+
Math.ceil(pages.length * 0.5);
|
|
49
|
+
const factFrequency = new Map();
|
|
50
|
+
const perPageFacts = new Map();
|
|
51
|
+
for (const page of pages) {
|
|
52
|
+
const masked = applyEntityMask(page.contentText, entityPatterns);
|
|
53
|
+
const rawFacts = extractRawFacts(masked);
|
|
54
|
+
perPageFacts.set(page.url, rawFacts);
|
|
55
|
+
for (const f of rawFacts) {
|
|
56
|
+
factFrequency.set(f, (factFrequency.get(f) ?? 0) + 1);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
const templateFacts = new Set();
|
|
60
|
+
for (const [f, count] of factFrequency.entries()) {
|
|
61
|
+
if (count >= templateThreshold)
|
|
62
|
+
templateFacts.add(f);
|
|
63
|
+
}
|
|
64
|
+
for (const page of pages) {
|
|
65
|
+
const facts = perPageFacts.get(page.url) ?? [];
|
|
66
|
+
const unique = facts.filter((f) => !templateFacts.has(f));
|
|
67
|
+
if (unique.length >= targetFacts)
|
|
68
|
+
continue;
|
|
69
|
+
const severity = unique.length < minFacts ? "error" : "warning";
|
|
70
|
+
const templateDrag = facts.length - unique.length;
|
|
71
|
+
const templateNote = templateDrag > 0
|
|
72
|
+
? ` (${templateDrag} additional fact${templateDrag === 1 ? "" : "s"} appear on most pages and don't count as entity-specific)`
|
|
73
|
+
: "";
|
|
74
|
+
findings.push({
|
|
75
|
+
ruleId: "aeo/citable-facts",
|
|
76
|
+
severity,
|
|
77
|
+
message: `${page.url} has ${unique.length} unique citable fact${unique.length === 1 ? "" : "s"}${templateNote}. ` +
|
|
78
|
+
`AI Overviews cite specific numbers and named references.`,
|
|
79
|
+
pageUrl: page.url,
|
|
80
|
+
fix: `Replace vague language ("varies", "several weeks", "affordable", "many options") with ` +
|
|
81
|
+
`specific values a reader or AI engine would cite: exact prices, percentages, dates, ` +
|
|
82
|
+
`timeframes, version numbers, named products, standards, or regulations that apply to this page. ` +
|
|
83
|
+
`For pSEO templates, bind these values from your data source so each page gets ` +
|
|
84
|
+
`entity-specific numbers instead of repeating the same template wording. ` +
|
|
85
|
+
`Target: ${targetFacts}+ unique facts per page.`,
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
return findings;
|
|
89
|
+
}
|
|
90
|
+
//# sourceMappingURL=citable-facts.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"citable-facts.js","sourceRoot":"","sources":["../../../src/rules/aeo/citable-facts.ts"],"names":[],"mappings":"AASA,MAAM,aAAa,GAA2C;IAC5D,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,qBAAqB,EAAE;IAChD,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,oBAAoB,EAAE;IAChD;QACE,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,qFAAqF;KAC7F;IACD;QACE,IAAI,EAAE,MAAM;QACZ,KAAK,EACH,uHAAuH;KAC1H;IACD,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,wBAAwB,EAAE;IACpD,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,gCAAgC,EAAE;CAC1D,CAAC;AAEF,SAAS,eAAe,CAAC,IAAY;IACnC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,MAAM,EAAE,KAAK,EAAE,IAAI,aAAa,EAAE,CAAC;QACtC,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAClC,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,KAAK,MAAM,CAAC,IAAI,OAAO;YAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;IAC3D,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,QAA6B;IAClE,IAAI,GAAG,GAAG,IAAI,CAAC;IACf,KAAK,MAAM,CAAC,IAAI,QAAQ;QAAE,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,WAAW,CAAC,CAAC;IACtE,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,KAAmB,EACnB,cAAmC,EACnC,OAA6B;IAE7B,MAAM,QAAQ,GAAG,OAAO,EAAE,eAAe,IAAI,CAAC,CAAC;IAC/C,MAAM,WAAW,GAAG,OAAO,EAAE,kBAAkB,IAAI,CAAC,CAAC;IACrD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,sFAAsF;IACtF,sFAAsF;IACtF,EAAE;IACF,yBAAyB;IACzB,sFAAsF;IACtF,0EAA0E;IAC1E,uEAAuE;IACvE,EAAE;IACF,kFAAkF;IAClF,kFAAkF;IAClF,MAAM,iBAAiB,GACrB,KAAK,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QAC9B,KAAK,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YAClC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC;IAEhC,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB,CAAC;IAChD,MAAM,YAAY,GAAG,IAAI,GAAG,EAAoB,CAAC;IAEjD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,eAAe,CAAC,IAAI,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;QACjE,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC;QACzC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QACrC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,aAAa,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED,MAAM,aAAa,GAAG,IAAI,GAAG,EAAU,CAAC;IACxC,KAAK,MAAM,CAAC,CAAC,EAAE,KAAK,CAAC,IAAI,aAAa,CAAC,OAAO,EAAE,EAAE,CAAC;QACjD,IAAI,KAAK,IAAI,iBAAiB;YAAE,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACvD,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QAC/C,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAE1D,IAAI,MAAM,CAAC,MAAM,IAAI,WAAW;YAAE,SAAS;QAE3C,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC;QAChE,MAAM,YAAY,GAAG,KAAK,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAClD,MAAM,YAAY,GAAG,YAAY,GAAG,CAAC;YACnC,CAAC,CAAC,KAAK,YAAY,mBAAmB,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,2DAA2D;YAC9H,CAAC,CAAC,EAAE,CAAC;QAEP,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,mBAAmB;YAC3B,QAAQ;YACR,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,QAAQ,MAAM,CAAC,MAAM,uBAAuB,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,YAAY,IAAI;gBACxG,0DAA0D;YAC5D,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EACD,wFAAwF;gBACxF,sFAAsF;gBACtF,kGAAkG;gBAClG,gFAAgF;gBAChF,0EAA0E;gBAC1E,WAAW,WAAW,0BAA0B;SACnD,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
+
export interface ContentModularityOptions {
|
|
3
|
+
/** Paragraphs longer than this many words are flagged. Default: 200. */
|
|
4
|
+
maxParagraphWords?: number;
|
|
5
|
+
/** Fraction of sections that must be self-contained to pass. Default: 0.7. */
|
|
6
|
+
minSelfContainedRatio?: number;
|
|
7
|
+
/** Extra cross-reference regexes (merged with defaults). */
|
|
8
|
+
crossRefPatterns?: RegExp[];
|
|
9
|
+
}
|
|
10
|
+
export declare function contentModularityRule(pages: ParsedPage[], options?: ContentModularityOptions): RuleResult[];
|
|
11
|
+
//# sourceMappingURL=content-modularity.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content-modularity.d.ts","sourceRoot":"","sources":["../../../src/rules/aeo/content-modularity.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,MAAM,WAAW,wBAAwB;IACvC,wEAAwE;IACxE,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,8EAA8E;IAC9E,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,4DAA4D;IAC5D,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;CAC7B;AA+FD,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,UAAU,EAAE,EACnB,OAAO,CAAC,EAAE,wBAAwB,GACjC,UAAU,EAAE,CAyCd"}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { load } from "cheerio";
|
|
2
|
+
const DEFAULT_CROSS_REF_RULES = [
|
|
3
|
+
{ re: /\b(as\s+)?(mentioned|noted|discussed|shown)\s+(above|below|earlier|previously|later)\b/i, label: "'as mentioned above'" },
|
|
4
|
+
{ re: /\bin\s+the\s+(previous|next|following|preceding)\s+section\b/i, label: "'in the previous/next section'" },
|
|
5
|
+
{ re: /\bsee\s+(above|below|earlier|the\s+section)\b/i, label: "'see above/below'" },
|
|
6
|
+
{ re: /\bas\s+(noted|stated)\s+above\b/i, label: "'as noted above'" },
|
|
7
|
+
];
|
|
8
|
+
// Intentionally conservative — only flag headings that are truly content-free.
|
|
9
|
+
// "FAQ", "Summary", "Conclusion", "Getting Started" are common legitimate headings
|
|
10
|
+
// and were removed from this list after feedback.
|
|
11
|
+
const VAGUE_HEADING_PATTERNS = [
|
|
12
|
+
/^\s*(more\s+information|more\s+info|details|additional\s+details|other|notes?)\s*$/i,
|
|
13
|
+
];
|
|
14
|
+
function splitIntoSections(html) {
|
|
15
|
+
const $ = load(html);
|
|
16
|
+
const sections = [];
|
|
17
|
+
let current = null;
|
|
18
|
+
const scope = $("article").length > 0 ? $("article") : $("main").length > 0 ? $("main") : $("body");
|
|
19
|
+
// Chrome/nav elements inside <article>/<main> (related-posts asides, breadcrumb nav,
|
|
20
|
+
// footer CTAs) inflate paragraph counts and confuse cross-reference detection.
|
|
21
|
+
// Skip any h2/h3/p/li that sits inside them.
|
|
22
|
+
const excludeSel = "nav, aside, footer, header, [role=navigation], .breadcrumbs, .breadcrumb";
|
|
23
|
+
const isExcluded = (el) => $(el).closest(excludeSel).length > 0;
|
|
24
|
+
scope.find("h2, h3, p, li").each((_, el) => {
|
|
25
|
+
if (isExcluded(el))
|
|
26
|
+
return;
|
|
27
|
+
const tag = el.tagName?.toLowerCase?.() ?? "";
|
|
28
|
+
const text = $(el).text().trim();
|
|
29
|
+
if (!text)
|
|
30
|
+
return;
|
|
31
|
+
if (tag === "h2" || tag === "h3") {
|
|
32
|
+
current = { heading: text, text: "", paragraphs: [] };
|
|
33
|
+
sections.push(current);
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
if (!current) {
|
|
37
|
+
current = { heading: "", text: "", paragraphs: [] };
|
|
38
|
+
sections.push(current);
|
|
39
|
+
}
|
|
40
|
+
current.paragraphs.push(text);
|
|
41
|
+
current.text = current.text ? `${current.text} ${text}` : text;
|
|
42
|
+
});
|
|
43
|
+
return sections;
|
|
44
|
+
}
|
|
45
|
+
function wordCount(text) {
|
|
46
|
+
const trimmed = text.trim();
|
|
47
|
+
if (!trimmed)
|
|
48
|
+
return 0;
|
|
49
|
+
return trimmed.split(/\s+/).length;
|
|
50
|
+
}
|
|
51
|
+
function analyzeSection(section, crossRefRules, maxParagraphWords) {
|
|
52
|
+
const reasons = [];
|
|
53
|
+
for (const { re, label } of crossRefRules) {
|
|
54
|
+
if (re.test(section.text)) {
|
|
55
|
+
reasons.push(`cross-references another section (${label})`);
|
|
56
|
+
break;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
if (section.heading && VAGUE_HEADING_PATTERNS.some((re) => re.test(section.heading))) {
|
|
60
|
+
reasons.push(`heading "${section.heading}" is too vague`);
|
|
61
|
+
}
|
|
62
|
+
const longParagraphs = section.paragraphs.filter((p) => wordCount(p) > maxParagraphWords).length;
|
|
63
|
+
if (longParagraphs > 0) {
|
|
64
|
+
reasons.push(`${longParagraphs} paragraph${longParagraphs === 1 ? "" : "s"} exceed ${maxParagraphWords} words`);
|
|
65
|
+
}
|
|
66
|
+
if (reasons.length === 0)
|
|
67
|
+
return null;
|
|
68
|
+
return { heading: section.heading || "(pre-heading)", reasons };
|
|
69
|
+
}
|
|
70
|
+
export function contentModularityRule(pages, options) {
|
|
71
|
+
const maxParagraphWords = options?.maxParagraphWords ?? 200;
|
|
72
|
+
const minRatio = options?.minSelfContainedRatio ?? 0.7;
|
|
73
|
+
const extraCrossRef = (options?.crossRefPatterns ?? []).map((re) => ({ re, label: "custom pattern" }));
|
|
74
|
+
const crossRefRules = [...DEFAULT_CROSS_REF_RULES, ...extraCrossRef];
|
|
75
|
+
const findings = [];
|
|
76
|
+
for (const page of pages) {
|
|
77
|
+
const sections = splitIntoSections(page.html).filter((s) => s.text.length > 0);
|
|
78
|
+
if (sections.length < 2)
|
|
79
|
+
continue;
|
|
80
|
+
const issues = [];
|
|
81
|
+
for (const section of sections) {
|
|
82
|
+
const issue = analyzeSection(section, crossRefRules, maxParagraphWords);
|
|
83
|
+
if (issue)
|
|
84
|
+
issues.push(issue);
|
|
85
|
+
}
|
|
86
|
+
const selfContainedRatio = 1 - issues.length / sections.length;
|
|
87
|
+
if (selfContainedRatio >= minRatio)
|
|
88
|
+
continue;
|
|
89
|
+
const examples = issues
|
|
90
|
+
.slice(0, 3)
|
|
91
|
+
.map((i) => `"${i.heading}": ${i.reasons.join(", ")}`)
|
|
92
|
+
.join(" | ");
|
|
93
|
+
findings.push({
|
|
94
|
+
ruleId: "aeo/content-modularity",
|
|
95
|
+
severity: "warning",
|
|
96
|
+
message: `${page.url} has ${issues.length}/${sections.length} sections that are not independently extractable. ` +
|
|
97
|
+
`Examples — ${examples}.`,
|
|
98
|
+
pageUrl: page.url,
|
|
99
|
+
fix: `Each section should answer one question completely without referencing other parts of the page. ` +
|
|
100
|
+
`(1) Replace "see above" / "as mentioned" with the actual information the reader needs. ` +
|
|
101
|
+
`(2) Rename vague H2s ("More Info", "Details") to specific topics (e.g. "California LLC Annual Report Requirements"). ` +
|
|
102
|
+
`(3) Break paragraphs longer than ${maxParagraphWords} words into 2–3 focused paragraphs with their own sub-headings.`,
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
return findings;
|
|
106
|
+
}
|
|
107
|
+
//# sourceMappingURL=content-modularity.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content-modularity.js","sourceRoot":"","sources":["../../../src/rules/aeo/content-modularity.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAiB/B,MAAM,uBAAuB,GAAmB;IAC9C,EAAE,EAAE,EAAE,yFAAyF,EAAE,KAAK,EAAE,sBAAsB,EAAE;IAChI,EAAE,EAAE,EAAE,+DAA+D,EAAE,KAAK,EAAE,gCAAgC,EAAE;IAChH,EAAE,EAAE,EAAE,gDAAgD,EAAE,KAAK,EAAE,mBAAmB,EAAE;IACpF,EAAE,EAAE,EAAE,kCAAkC,EAAE,KAAK,EAAE,kBAAkB,EAAE;CACtE,CAAC;AAEF,+EAA+E;AAC/E,mFAAmF;AACnF,kDAAkD;AAClD,MAAM,sBAAsB,GAAa;IACvC,qFAAqF;CACtF,CAAC;AAQF,SAAS,iBAAiB,CAAC,IAAY;IACrC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,IAAI,OAAO,GAAmB,IAAI,CAAC;IAEnC,MAAM,KAAK,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAEpG,qFAAqF;IACrF,+EAA+E;IAC/E,6CAA6C;IAC7C,MAAM,UAAU,GAAG,0EAA0E,CAAC;IAC9F,MAAM,UAAU,GAAG,CAAC,EAAW,EAAW,EAAE,CAAC,CAAC,CAAC,EAAW,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAE3F,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QACzC,IAAI,UAAU,CAAC,EAAE,CAAC;YAAE,OAAO;QAC3B,MAAM,GAAG,GAAI,EAA2B,CAAC,OAAO,EAAE,WAAW,EAAE,EAAE,IAAI,EAAE,CAAC;QACxE,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,IAAI,CAAC,IAAI;YAAE,OAAO;QAClB,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,KAAK,IAAI,EAAE,CAAC;YACjC,OAAO,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;YACtD,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACvB,OAAO;QACT,CAAC;QACD,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO,GAAG,EAAE,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;YACpD,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACzB,CAAC;QACD,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC9B,OAAO,CAAC,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,IAAI,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IACjE,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,SAAS,CAAC,IAAY;IAC7B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,CAAC,CAAC;IACvB,OAAO,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;AACrC,CAAC;AAOD,SAAS,cAAc,CACrB,OAAgB,EAChB,aAA6B,EAC7B,iBAAyB;IAEzB,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,MAAM,EAAE,EAAE,EAAE,KAAK,EAAE,IAAI,aAAa,EAAE,CAAC;QAC1C,IAAI,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,IAAI,CAAC,qCAAqC,KAAK,GAAG,CAAC,CAAC;YAC5D,MAAM;QACR,CAAC;IACH,CAAC;IACD,IAAI,OAAO,CAAC,OAAO,IAAI,sBAAsB,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,EAAE,CAAC;QACrF,OAAO,CAAC,IAAI,CAAC,YAAY,OAAO,CAAC,OAAO,gBAAgB,CAAC,CAAC;IAC5D,CAAC;IACD,MAAM,cAAc,GAAG,OAAO,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,iBAAiB,CAAC,CAAC,MAAM,CAAC;IACjG,IAAI,cAAc,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,IAAI,CAAC,GAAG,cAAc,aAAa,cAAc,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,WAAW,iBAAiB,QAAQ,CAAC,CAAC;IAClH,CAAC;IACD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACtC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,eAAe,EAAE,OAAO,EAAE,CAAC;AAClE,CAAC;AAED,MAAM,UAAU,qBAAqB,CACnC,KAAmB,EACnB,OAAkC;IAElC,MAAM,iBAAiB,GAAG,OAAO,EAAE,iBAAiB,IAAI,GAAG,CAAC;IAC5D,MAAM,QAAQ,GAAG,OAAO,EAAE,qBAAqB,IAAI,GAAG,CAAC;IACvD,MAAM,aAAa,GAAG,CAAC,OAAO,EAAE,gBAAgB,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,gBAAgB,EAAE,CAAC,CAAC,CAAC;IACvG,MAAM,aAAa,GAAG,CAAC,GAAG,uBAAuB,EAAE,GAAG,aAAa,CAAC,CAAC;IACrE,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC/E,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAElC,MAAM,MAAM,GAAmB,EAAE,CAAC;QAClC,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,KAAK,GAAG,cAAc,CAAC,OAAO,EAAE,aAAa,EAAE,iBAAiB,CAAC,CAAC;YACxE,IAAI,KAAK;gBAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAChC,CAAC;QAED,MAAM,kBAAkB,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;QAC/D,IAAI,kBAAkB,IAAI,QAAQ;YAAE,SAAS;QAE7C,MAAM,QAAQ,GAAG,MAAM;aACpB,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;aACX,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,OAAO,MAAM,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;aACrD,IAAI,CAAC,KAAK,CAAC,CAAC;QAEf,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,wBAAwB;YAChC,QAAQ,EAAE,SAAS;YACnB,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,QAAQ,MAAM,CAAC,MAAM,IAAI,QAAQ,CAAC,MAAM,oDAAoD;gBACvG,cAAc,QAAQ,GAAG;YAC3B,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EACD,kGAAkG;gBAClG,yFAAyF;gBACzF,uHAAuH;gBACvH,oCAAoC,iBAAiB,iEAAiE;SACzH,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { RuleResult } from "../../types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Default AI crawler user-agents to check. Ordered by prevalence.
|
|
4
|
+
*/
|
|
5
|
+
export declare const DEFAULT_AI_CRAWLERS: readonly ["GPTBot", "ChatGPT-User", "ClaudeBot", "PerplexityBot", "Bytespider", "Google-Extended", "CCBot", "Applebot-Extended"];
|
|
6
|
+
/**
|
|
7
|
+
* Parse robots.txt into a map of user-agent -> list of Disallow patterns.
|
|
8
|
+
* User-agent keys are lowercased for case-insensitive lookup.
|
|
9
|
+
* A blank Disallow value is treated as "allow all" and produces an empty array
|
|
10
|
+
* (matches the robots spec: "Disallow: " with no value means no restrictions).
|
|
11
|
+
*/
|
|
12
|
+
export declare function parseRobotsByUserAgent(robotsTxt: string): Map<string, string[]>;
|
|
13
|
+
/** True if the Disallow list includes a root block (`/`). */
|
|
14
|
+
export declare function isFullyDisallowed(patterns: string[] | undefined): boolean;
|
|
15
|
+
export interface CrawlerAccessOptions {
|
|
16
|
+
/** List of AI crawler user-agents to check. */
|
|
17
|
+
crawlers?: readonly string[];
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Warn per blocked AI crawler; escalate to error when all configured crawlers are blocked.
|
|
21
|
+
* Wildcard blocks (`User-agent: *` + `Disallow: /`) also count as blocking each named crawler
|
|
22
|
+
* unless the crawler has its own more-permissive block.
|
|
23
|
+
*/
|
|
24
|
+
export declare function crawlerAccessRule(robotsTxtContent: string, options?: CrawlerAccessOptions): RuleResult[];
|
|
25
|
+
//# sourceMappingURL=crawler-access.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"crawler-access.d.ts","sourceRoot":"","sources":["../../../src/rules/aeo/crawler-access.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAEjD;;GAEG;AACH,eAAO,MAAM,mBAAmB,kIAStB,CAAC;AAEX;;;;;GAKG;AACH,wBAAgB,sBAAsB,CAAC,SAAS,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAsC/E;AAED,6DAA6D;AAC7D,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,SAAS,GAAG,OAAO,CAGzE;AAED,MAAM,WAAW,oBAAoB;IACnC,+CAA+C;IAC/C,QAAQ,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;CAC9B;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAC/B,gBAAgB,EAAE,MAAM,EACxB,OAAO,CAAC,EAAE,oBAAoB,GAC7B,UAAU,EAAE,CAkDd"}
|