@pseolint/core 0.4.3 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +264 -169
- package/dist/ai/manifest/diff.d.ts +78 -0
- package/dist/ai/manifest/diff.d.ts.map +1 -0
- package/dist/ai/manifest/diff.js +139 -0
- package/dist/ai/manifest/diff.js.map +1 -0
- package/dist/ai/manifest/index.d.ts +18 -0
- package/dist/ai/manifest/index.d.ts.map +1 -0
- package/dist/ai/manifest/index.js +15 -0
- package/dist/ai/manifest/index.js.map +1 -0
- package/dist/ai/manifest/validate-manifest.d.ts +37 -0
- package/dist/ai/manifest/validate-manifest.d.ts.map +1 -0
- package/dist/ai/manifest/validate-manifest.js +67 -0
- package/dist/ai/manifest/validate-manifest.js.map +1 -0
- package/dist/ai/manifest/validators/domain-patches.d.ts +15 -0
- package/dist/ai/manifest/validators/domain-patches.d.ts.map +1 -0
- package/dist/ai/manifest/validators/domain-patches.js +110 -0
- package/dist/ai/manifest/validators/domain-patches.js.map +1 -0
- package/dist/ai/manifest/validators/index.d.ts +5 -0
- package/dist/ai/manifest/validators/index.d.ts.map +1 -0
- package/dist/ai/manifest/validators/index.js +4 -0
- package/dist/ai/manifest/validators/index.js.map +1 -0
- package/dist/ai/manifest/validators/page-changes.d.ts +36 -0
- package/dist/ai/manifest/validators/page-changes.d.ts.map +1 -0
- package/dist/ai/manifest/validators/page-changes.js +221 -0
- package/dist/ai/manifest/validators/page-changes.js.map +1 -0
- package/dist/ai/manifest/validators/types.d.ts +17 -0
- package/dist/ai/manifest/validators/types.d.ts.map +1 -0
- package/dist/ai/manifest/validators/types.js +5 -0
- package/dist/ai/manifest/validators/types.js.map +1 -0
- package/dist/ai/orchestrate.d.ts +74 -0
- package/dist/ai/orchestrate.d.ts.map +1 -0
- package/dist/ai/orchestrate.js +54 -0
- package/dist/ai/orchestrate.js.map +1 -0
- package/dist/ai/orchestrator/budget.d.ts +57 -0
- package/dist/ai/orchestrator/budget.d.ts.map +1 -0
- package/dist/ai/orchestrator/budget.js +114 -0
- package/dist/ai/orchestrator/budget.js.map +1 -0
- package/dist/ai/orchestrator/finish-tool.d.ts +568 -0
- package/dist/ai/orchestrator/finish-tool.d.ts.map +1 -0
- package/dist/ai/orchestrator/finish-tool.js +114 -0
- package/dist/ai/orchestrator/finish-tool.js.map +1 -0
- package/dist/ai/orchestrator/index.d.ts +25 -0
- package/dist/ai/orchestrator/index.d.ts.map +1 -0
- package/dist/ai/orchestrator/index.js +21 -0
- package/dist/ai/orchestrator/index.js.map +1 -0
- package/dist/ai/orchestrator/log.d.ts +24 -0
- package/dist/ai/orchestrator/log.d.ts.map +1 -0
- package/dist/ai/orchestrator/log.js +48 -0
- package/dist/ai/orchestrator/log.js.map +1 -0
- package/dist/ai/orchestrator/page-cache.d.ts +64 -0
- package/dist/ai/orchestrator/page-cache.d.ts.map +1 -0
- package/dist/ai/orchestrator/page-cache.js +127 -0
- package/dist/ai/orchestrator/page-cache.js.map +1 -0
- package/dist/ai/orchestrator/prompt.d.ts +16 -0
- package/dist/ai/orchestrator/prompt.d.ts.map +1 -0
- package/dist/ai/orchestrator/prompt.js +52 -0
- package/dist/ai/orchestrator/prompt.js.map +1 -0
- package/dist/ai/orchestrator/runner.d.ts +65 -0
- package/dist/ai/orchestrator/runner.d.ts.map +1 -0
- package/dist/ai/orchestrator/runner.js +223 -0
- package/dist/ai/orchestrator/runner.js.map +1 -0
- package/dist/ai/orchestrator/session.d.ts +44 -0
- package/dist/ai/orchestrator/session.d.ts.map +1 -0
- package/dist/ai/orchestrator/session.js +64 -0
- package/dist/ai/orchestrator/session.js.map +1 -0
- package/dist/ai/orchestrator/types.d.ts +99 -0
- package/dist/ai/orchestrator/types.d.ts.map +1 -0
- package/dist/ai/orchestrator/types.js +8 -0
- package/dist/ai/orchestrator/types.js.map +1 -0
- package/dist/ai/probes/cache.d.ts +12 -0
- package/dist/ai/probes/cache.d.ts.map +1 -0
- package/dist/ai/probes/cache.js +46 -0
- package/dist/ai/probes/cache.js.map +1 -0
- package/dist/ai/tools/ask-ai-engine.d.ts +77 -0
- package/dist/ai/tools/ask-ai-engine.d.ts.map +1 -0
- package/dist/ai/tools/ask-ai-engine.js +253 -0
- package/dist/ai/tools/ask-ai-engine.js.map +1 -0
- package/dist/ai/tools/check-domain-crawler-access.d.ts +71 -0
- package/dist/ai/tools/check-domain-crawler-access.d.ts.map +1 -0
- package/dist/ai/tools/check-domain-crawler-access.js +76 -0
- package/dist/ai/tools/check-domain-crawler-access.js.map +1 -0
- package/dist/ai/tools/check-domain-llms-txt.d.ts +70 -0
- package/dist/ai/tools/check-domain-llms-txt.d.ts.map +1 -0
- package/dist/ai/tools/check-domain-llms-txt.js +75 -0
- package/dist/ai/tools/check-domain-llms-txt.js.map +1 -0
- package/dist/ai/tools/check-indexability.d.ts +58 -0
- package/dist/ai/tools/check-indexability.d.ts.map +1 -0
- package/dist/ai/tools/check-indexability.js +64 -0
- package/dist/ai/tools/check-indexability.js.map +1 -0
- package/dist/ai/tools/check-robots.d.ts +68 -0
- package/dist/ai/tools/check-robots.d.ts.map +1 -0
- package/dist/ai/tools/check-robots.js +90 -0
- package/dist/ai/tools/check-robots.js.map +1 -0
- package/dist/ai/tools/check-rule-answer-first.d.ts +54 -0
- package/dist/ai/tools/check-rule-answer-first.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-answer-first.js +50 -0
- package/dist/ai/tools/check-rule-answer-first.js.map +1 -0
- package/dist/ai/tools/check-rule-canonical-consistency.d.ts +66 -0
- package/dist/ai/tools/check-rule-canonical-consistency.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-canonical-consistency.js +51 -0
- package/dist/ai/tools/check-rule-canonical-consistency.js.map +1 -0
- package/dist/ai/tools/check-rule-citable-facts.d.ts +58 -0
- package/dist/ai/tools/check-rule-citable-facts.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-citable-facts.js +41 -0
- package/dist/ai/tools/check-rule-citable-facts.js.map +1 -0
- package/dist/ai/tools/check-rule-content-modularity.d.ts +58 -0
- package/dist/ai/tools/check-rule-content-modularity.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-content-modularity.js +45 -0
- package/dist/ai/tools/check-rule-content-modularity.js.map +1 -0
- package/dist/ai/tools/check-rule-faq-coverage.d.ts +54 -0
- package/dist/ai/tools/check-rule-faq-coverage.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-faq-coverage.js +39 -0
- package/dist/ai/tools/check-rule-faq-coverage.js.map +1 -0
- package/dist/ai/tools/check-rule-freshness-signals.d.ts +54 -0
- package/dist/ai/tools/check-rule-freshness-signals.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-freshness-signals.js +45 -0
- package/dist/ai/tools/check-rule-freshness-signals.js.map +1 -0
- package/dist/ai/tools/check-rule-json-ld-valid.d.ts +54 -0
- package/dist/ai/tools/check-rule-json-ld-valid.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-json-ld-valid.js +44 -0
- package/dist/ai/tools/check-rule-json-ld-valid.js.map +1 -0
- package/dist/ai/tools/check-rule-missing-author.d.ts +54 -0
- package/dist/ai/tools/check-rule-missing-author.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-missing-author.js +45 -0
- package/dist/ai/tools/check-rule-missing-author.js.map +1 -0
- package/dist/ai/tools/check-rule-near-duplicate.d.ts +82 -0
- package/dist/ai/tools/check-rule-near-duplicate.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-near-duplicate.js +63 -0
- package/dist/ai/tools/check-rule-near-duplicate.js.map +1 -0
- package/dist/ai/tools/check-rule-required-fields.d.ts +50 -0
- package/dist/ai/tools/check-rule-required-fields.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-required-fields.js +38 -0
- package/dist/ai/tools/check-rule-required-fields.js.map +1 -0
- package/dist/ai/tools/check-rule-schema-consistency.d.ts +54 -0
- package/dist/ai/tools/check-rule-schema-consistency.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-schema-consistency.js +44 -0
- package/dist/ai/tools/check-rule-schema-consistency.js.map +1 -0
- package/dist/ai/tools/check-rule-summary-bait.d.ts +54 -0
- package/dist/ai/tools/check-rule-summary-bait.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-summary-bait.js +39 -0
- package/dist/ai/tools/check-rule-summary-bait.js.map +1 -0
- package/dist/ai/tools/check-rule-thin-content.d.ts +66 -0
- package/dist/ai/tools/check-rule-thin-content.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-thin-content.js +58 -0
- package/dist/ai/tools/check-rule-thin-content.js.map +1 -0
- package/dist/ai/tools/detect-templates.d.ts +60 -0
- package/dist/ai/tools/detect-templates.d.ts.map +1 -0
- package/dist/ai/tools/detect-templates.js +43 -0
- package/dist/ai/tools/detect-templates.js.map +1 -0
- package/dist/ai/tools/fetch-page.d.ts +70 -0
- package/dist/ai/tools/fetch-page.d.ts.map +1 -0
- package/dist/ai/tools/fetch-page.js +93 -0
- package/dist/ai/tools/fetch-page.js.map +1 -0
- package/dist/ai/tools/fetch-sitemap.d.ts +60 -0
- package/dist/ai/tools/fetch-sitemap.d.ts.map +1 -0
- package/dist/ai/tools/fetch-sitemap.js +116 -0
- package/dist/ai/tools/fetch-sitemap.js.map +1 -0
- package/dist/ai/tools/index.d.ts +1555 -0
- package/dist/ai/tools/index.d.ts.map +1 -0
- package/dist/ai/tools/index.js +119 -0
- package/dist/ai/tools/index.js.map +1 -0
- package/dist/ai/tools/parse-page.d.ts +94 -0
- package/dist/ai/tools/parse-page.d.ts.map +1 -0
- package/dist/ai/tools/parse-page.js +108 -0
- package/dist/ai/tools/parse-page.js.map +1 -0
- package/dist/ai/tools/query-serp.d.ts +113 -0
- package/dist/ai/tools/query-serp.d.ts.map +1 -0
- package/dist/ai/tools/query-serp.js +131 -0
- package/dist/ai/tools/query-serp.js.map +1 -0
- package/dist/ai/tools/sample-template.d.ts +67 -0
- package/dist/ai/tools/sample-template.d.ts.map +1 -0
- package/dist/ai/tools/sample-template.js +75 -0
- package/dist/ai/tools/sample-template.js.map +1 -0
- package/dist/ai/tools/types.d.ts +73 -0
- package/dist/ai/tools/types.d.ts.map +1 -0
- package/dist/ai/tools/types.js +64 -0
- package/dist/ai/tools/types.js.map +1 -0
- package/dist/ai/tools/validate-jsonld.d.ts +62 -0
- package/dist/ai/tools/validate-jsonld.d.ts.map +1 -0
- package/dist/ai/tools/validate-jsonld.js +84 -0
- package/dist/ai/tools/validate-jsonld.js.map +1 -0
- package/dist/auditor.d.ts +4 -0
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +629 -64
- package/dist/auditor.js.map +1 -1
- package/dist/backpressure.d.ts.map +1 -1
- package/dist/backpressure.js +10 -3
- package/dist/backpressure.js.map +1 -1
- package/dist/enrich-findings.d.ts.map +1 -1
- package/dist/enrich-findings.js +15 -1
- package/dist/enrich-findings.js.map +1 -1
- package/dist/formatters/console.d.ts.map +1 -1
- package/dist/formatters/console.js +13 -0
- package/dist/formatters/console.js.map +1 -1
- package/dist/formatters/markdown.d.ts.map +1 -1
- package/dist/formatters/markdown.js +20 -2
- package/dist/formatters/markdown.js.map +1 -1
- package/dist/index.d.ts +12 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -1
- package/dist/rule-references.d.ts.map +1 -1
- package/dist/rule-references.js +5 -0
- package/dist/rule-references.js.map +1 -1
- package/dist/rules/content/heading-structure.d.ts +21 -0
- package/dist/rules/content/heading-structure.d.ts.map +1 -0
- package/dist/rules/content/heading-structure.js +56 -0
- package/dist/rules/content/heading-structure.js.map +1 -0
- package/dist/rules/content/image-alt-text.d.ts +18 -0
- package/dist/rules/content/image-alt-text.d.ts.map +1 -0
- package/dist/rules/content/image-alt-text.js +77 -0
- package/dist/rules/content/image-alt-text.js.map +1 -0
- package/dist/rules/content/title-uniqueness.d.ts +18 -0
- package/dist/rules/content/title-uniqueness.d.ts.map +1 -0
- package/dist/rules/content/title-uniqueness.js +70 -0
- package/dist/rules/content/title-uniqueness.js.map +1 -0
- package/dist/rules/links/host-section-divergence.d.ts +3 -0
- package/dist/rules/links/host-section-divergence.d.ts.map +1 -0
- package/dist/rules/links/host-section-divergence.js +158 -0
- package/dist/rules/links/host-section-divergence.js.map +1 -0
- package/dist/rules/links/link-depth.d.ts +12 -1
- package/dist/rules/links/link-depth.d.ts.map +1 -1
- package/dist/rules/links/link-depth.js +25 -12
- package/dist/rules/links/link-depth.js.map +1 -1
- package/dist/rules/scope.d.ts.map +1 -1
- package/dist/rules/scope.js +5 -0
- package/dist/rules/scope.js.map +1 -1
- package/dist/rules/spam/doorway-pattern.d.ts.map +1 -1
- package/dist/rules/spam/doorway-pattern.js +27 -4
- package/dist/rules/spam/doorway-pattern.js.map +1 -1
- package/dist/rules/spam/publication-velocity.d.ts +1 -1
- package/dist/rules/spam/publication-velocity.d.ts.map +1 -1
- package/dist/rules/spam/publication-velocity.js +9 -4
- package/dist/rules/spam/publication-velocity.js.map +1 -1
- package/dist/rules/spam/template-coverage.js +1 -1
- package/dist/rules/spam/template-coverage.js.map +1 -1
- package/dist/rules/spam/template-diversity.js +1 -1
- package/dist/rules/spam/template-diversity.js.map +1 -1
- package/dist/rules/tech/hreflang-consistency.d.ts.map +1 -1
- package/dist/rules/tech/hreflang-consistency.js +33 -4
- package/dist/rules/tech/hreflang-consistency.js.map +1 -1
- package/dist/rules/tech/og-completeness.d.ts +11 -0
- package/dist/rules/tech/og-completeness.d.ts.map +1 -1
- package/dist/rules/tech/og-completeness.js +22 -23
- package/dist/rules/tech/og-completeness.js.map +1 -1
- package/dist/ruleset-version.d.ts +8 -0
- package/dist/ruleset-version.d.ts.map +1 -0
- package/dist/ruleset-version.js +8 -0
- package/dist/ruleset-version.js.map +1 -0
- package/dist/scrape-strategy.d.ts +42 -0
- package/dist/scrape-strategy.d.ts.map +1 -0
- package/dist/scrape-strategy.js +101 -0
- package/dist/scrape-strategy.js.map +1 -0
- package/dist/site-classifier.d.ts.map +1 -1
- package/dist/site-classifier.js +1 -0
- package/dist/site-classifier.js.map +1 -1
- package/dist/state.d.ts +36 -1
- package/dist/state.d.ts.map +1 -1
- package/dist/state.js +3 -1
- package/dist/state.js.map +1 -1
- package/dist/stratified-sample.d.ts +9 -1
- package/dist/stratified-sample.d.ts.map +1 -1
- package/dist/stratified-sample.js +23 -6
- package/dist/stratified-sample.js.map +1 -1
- package/dist/types.d.ts +135 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/url-normalize.d.ts.map +1 -1
- package/dist/url-normalize.js +13 -1
- package/dist/url-normalize.js.map +1 -1
- package/package.json +90 -90
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { parseHtmlPage } from "../../parser.js";
|
|
3
|
+
import { resolvePage } from "../orchestrator/page-cache.js";
|
|
4
|
+
import { defineTool } from "./types.js";
|
|
5
|
+
const inputSchema = z.object({
|
|
6
|
+
pageId: z.string().describe("Page reference returned by fetch_page."),
|
|
7
|
+
xRobotsTagHeader: z
|
|
8
|
+
.string()
|
|
9
|
+
.optional()
|
|
10
|
+
.describe("Value of the X-Robots-Tag HTTP response header (if present). The orchestrator should pull this from the fetch_page result's `headers['x-robots-tag']`."),
|
|
11
|
+
});
|
|
12
|
+
const outputSchema = z.object({
|
|
13
|
+
url: z.string(),
|
|
14
|
+
indexable: z.boolean().describe("Final indexability verdict combining meta robots, X-Robots-Tag, and canonical."),
|
|
15
|
+
reasons: z.array(z.string()).describe("Reasons the page is non-indexable, when applicable."),
|
|
16
|
+
metaRobots: z.string(),
|
|
17
|
+
metaNoindex: z.boolean(),
|
|
18
|
+
xRobotsNoindex: z.boolean(),
|
|
19
|
+
canonicalUrl: z.string(),
|
|
20
|
+
canonicalSelfReferencing: z.boolean(),
|
|
21
|
+
});
|
|
22
|
+
function parseRobotsDirectives(value) {
|
|
23
|
+
const lower = value.toLowerCase();
|
|
24
|
+
return {
|
|
25
|
+
noindex: /\bnoindex\b|\bnone\b/.test(lower),
|
|
26
|
+
nofollow: /\bnofollow\b|\bnone\b/.test(lower),
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
export const checkIndexabilityTool = defineTool({
|
|
30
|
+
name: "check_indexability",
|
|
31
|
+
description: "Determine whether a previously-fetched page is indexable by search engines. Combines <meta name=robots>, X-Robots-Tag (pass from fetch_page response headers), and canonical signals. Returns `indexable: false` with reasons when noindex or non-self-referencing canonical is present. Call this near the top of any per-page audit — non-indexable pages don't need rule checks.",
|
|
32
|
+
inputSchema,
|
|
33
|
+
outputSchema,
|
|
34
|
+
async execute({ pageId, xRobotsTagHeader }) {
|
|
35
|
+
const entry = resolvePage(pageId);
|
|
36
|
+
const parsed = parseHtmlPage(entry.html, entry.url);
|
|
37
|
+
// If the caller didn't pass X-Robots-Tag, pull it from the cached fetch
|
|
38
|
+
// headers — saves the LLM a round-trip just to surface a header it
|
|
39
|
+
// already saw in fetch_page's output.
|
|
40
|
+
const xRobots = xRobotsTagHeader ?? entry.headers["x-robots-tag"] ?? "";
|
|
41
|
+
const reasons = [];
|
|
42
|
+
const meta = parseRobotsDirectives(parsed.robotsMeta);
|
|
43
|
+
if (meta.noindex)
|
|
44
|
+
reasons.push(`<meta name="robots"> contains noindex`);
|
|
45
|
+
const xRobotsParsed = xRobots ? parseRobotsDirectives(xRobots) : { noindex: false, nofollow: false };
|
|
46
|
+
if (xRobotsParsed.noindex)
|
|
47
|
+
reasons.push(`X-Robots-Tag header contains noindex`);
|
|
48
|
+
const canonicalSelfReferencing = parsed.canonical === parsed.url || parsed.canonical === "";
|
|
49
|
+
if (parsed.canonical && !canonicalSelfReferencing) {
|
|
50
|
+
reasons.push(`canonical points to ${parsed.canonical} (not self-referencing)`);
|
|
51
|
+
}
|
|
52
|
+
return {
|
|
53
|
+
url: parsed.url,
|
|
54
|
+
indexable: !meta.noindex && !xRobotsParsed.noindex && canonicalSelfReferencing,
|
|
55
|
+
reasons,
|
|
56
|
+
metaRobots: parsed.robotsMeta,
|
|
57
|
+
metaNoindex: meta.noindex,
|
|
58
|
+
xRobotsNoindex: xRobotsParsed.noindex,
|
|
59
|
+
canonicalUrl: parsed.canonical,
|
|
60
|
+
canonicalSelfReferencing,
|
|
61
|
+
};
|
|
62
|
+
},
|
|
63
|
+
});
|
|
64
|
+
//# sourceMappingURL=check-indexability.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"check-indexability.js","sourceRoot":"","sources":["../../../src/ai/tools/check-indexability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,MAAM,+BAA+B,CAAC;AAC5D,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAC3B,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,wCAAwC,CAAC;IACrE,gBAAgB,EAAE,CAAC;SAChB,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CACP,wJAAwJ,CACzJ;CACJ,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC;IAC5B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE;IACf,SAAS,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,gFAAgF,CAAC;IACjH,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,CAAC,qDAAqD,CAAC;IAC5F,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE;IACtB,WAAW,EAAE,CAAC,CAAC,OAAO,EAAE;IACxB,cAAc,EAAE,CAAC,CAAC,OAAO,EAAE;IAC3B,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE;IACxB,wBAAwB,EAAE,CAAC,CAAC,OAAO,EAAE;CACtC,CAAC,CAAC;AAEH,SAAS,qBAAqB,CAAC,KAAa;IAC1C,MAAM,KAAK,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;IAClC,OAAO;QACL,OAAO,EAAE,sBAAsB,CAAC,IAAI,CAAC,KAAK,CAAC;QAC3C,QAAQ,EAAE,uBAAuB,CAAC,IAAI,CAAC,KAAK,CAAC;KAC9C,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,MAAM,qBAAqB,GAAG,UAAU,CAAC;IAC9C,IAAI,EAAE,oBAAoB;IAC1B,WAAW,EACT,qXAAqX;IACvX,WAAW;IACX,YAAY;IACZ,KAAK,CAAC,OAAO,CAAC,EAAE,MAAM,EAAE,gBAAgB,EAAE;QACxC,MAAM,KAAK,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;QAClC,MAAM,MAAM,GAAG,aAAa,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC;QACpD,wEAAwE;QACxE,mEAAmE;QACnE,sCAAsC;QACtC,MAAM,OAAO,GAAG,gBAAgB,IAAI,KAAK,CAAC,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;QACxE,MAAM,OAAO,GAAa,EAAE,CAAC;QAE7B,MAAM,IAAI,GAAG,qBAAqB,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QACtD,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;QAExE,MAAM,aAAa,GAAG,OAAO,CAAC,CAAC,CAAC,qBAAqB,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;QACrG,IAAI,aAAa,CAAC,OAAO;YAAE,OAAO,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;QAEhF,MAAM,wBAAwB,GAAG,MAAM,CAAC,SAAS,KAAK,MAAM,CAAC,GAAG,IAAI,MAAM,CAAC,SAAS,KAAK,EAAE,CAAC;QAC5F,IAAI,MAAM,CAAC,SAAS,IAAI,CAAC,wBAAwB,EAAE,CAAC;YAClD,OAAO,CAAC,IAAI,CAAC,uBAAuB,MAAM,CAAC,SAAS,yBAAyB,CAAC,CAAC;QACjF,CAAC;QAED,OAAO;YACL,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,SAAS,EAAE,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,aAAa,CAAC,OAAO,IAAI,wBAAwB;YAC9E,OAAO;YACP,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,WAAW,EAAE,IAAI,CAAC,OAAO;YACzB,cAAc,EAAE,aAAa,CAAC,OAAO;YACrC,YAAY,EAAE,MAAM,CAAC,SAAS;YAC9B,wBAAwB;SACzB,CAAC;IACJ,CAAC;CACF,CAAC,CAAC"}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
/**
|
|
3
|
+
* General-purpose robots.txt fetch + parse + URL test. Wraps the existing
|
|
4
|
+
* `parseDisallowPatterns`, `isBlockedByPattern`, and `parseCrawlDelaySeconds`
|
|
5
|
+
* primitives. The AI-crawler-specific check lives in
|
|
6
|
+
* `check_domain_crawler_access`; this is the lower-level building block.
|
|
7
|
+
*/
|
|
8
|
+
export declare const checkRobotsTool: {
|
|
9
|
+
name: string;
|
|
10
|
+
description: string;
|
|
11
|
+
inputSchema: z.ZodType<{
|
|
12
|
+
origin: string;
|
|
13
|
+
testUrl?: string | undefined;
|
|
14
|
+
userAgent?: string | undefined;
|
|
15
|
+
timeoutMs?: number | undefined;
|
|
16
|
+
}, unknown, z.core.$ZodTypeInternals<{
|
|
17
|
+
origin: string;
|
|
18
|
+
testUrl?: string | undefined;
|
|
19
|
+
userAgent?: string | undefined;
|
|
20
|
+
timeoutMs?: number | undefined;
|
|
21
|
+
}, unknown>>;
|
|
22
|
+
outputSchema: z.ZodType<{
|
|
23
|
+
origin: string;
|
|
24
|
+
robotsTxtUrl: string;
|
|
25
|
+
robotsTxtPresent: boolean;
|
|
26
|
+
disallowPatterns: string[];
|
|
27
|
+
crawlDelaySeconds: number;
|
|
28
|
+
hasSitemapDirective: boolean;
|
|
29
|
+
testUrlBlocked: boolean | null;
|
|
30
|
+
}, unknown, z.core.$ZodTypeInternals<{
|
|
31
|
+
origin: string;
|
|
32
|
+
robotsTxtUrl: string;
|
|
33
|
+
robotsTxtPresent: boolean;
|
|
34
|
+
disallowPatterns: string[];
|
|
35
|
+
crawlDelaySeconds: number;
|
|
36
|
+
hasSitemapDirective: boolean;
|
|
37
|
+
testUrlBlocked: boolean | null;
|
|
38
|
+
}, unknown>>;
|
|
39
|
+
toAiTool(): import("ai").Tool<{
|
|
40
|
+
origin: string;
|
|
41
|
+
testUrl?: string | undefined;
|
|
42
|
+
userAgent?: string | undefined;
|
|
43
|
+
timeoutMs?: number | undefined;
|
|
44
|
+
}, import("./types.js").ToolResult<{
|
|
45
|
+
origin: string;
|
|
46
|
+
robotsTxtUrl: string;
|
|
47
|
+
robotsTxtPresent: boolean;
|
|
48
|
+
disallowPatterns: string[];
|
|
49
|
+
crawlDelaySeconds: number;
|
|
50
|
+
hasSitemapDirective: boolean;
|
|
51
|
+
testUrlBlocked: boolean | null;
|
|
52
|
+
}>>;
|
|
53
|
+
run(input: {
|
|
54
|
+
origin: string;
|
|
55
|
+
testUrl?: string | undefined;
|
|
56
|
+
userAgent?: string | undefined;
|
|
57
|
+
timeoutMs?: number | undefined;
|
|
58
|
+
}, ctx?: import("./types.js").ToolExecuteContext): Promise<import("./types.js").ToolResult<{
|
|
59
|
+
origin: string;
|
|
60
|
+
robotsTxtUrl: string;
|
|
61
|
+
robotsTxtPresent: boolean;
|
|
62
|
+
disallowPatterns: string[];
|
|
63
|
+
crawlDelaySeconds: number;
|
|
64
|
+
hasSitemapDirective: boolean;
|
|
65
|
+
testUrlBlocked: boolean | null;
|
|
66
|
+
}>>;
|
|
67
|
+
};
|
|
68
|
+
//# sourceMappingURL=check-robots.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"check-robots.d.ts","sourceRoot":"","sources":["../../../src/ai/tools/check-robots.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAyCxB;;;;;GAKG;AACH,eAAO,MAAM,eAAe;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsD1B,CAAC"}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { cachedFetch } from "../../cache.js";
|
|
3
|
+
import { validateTargetHost } from "../../ssrf-guard.js";
|
|
4
|
+
import { parseDisallowPatterns, isBlockedByPattern, parseCrawlDelaySeconds, } from "../../rules/tech/robots-sitemap-presence.js";
|
|
5
|
+
import { defineTool } from "./types.js";
|
|
6
|
+
const inputSchema = z.object({
|
|
7
|
+
origin: z
|
|
8
|
+
.string()
|
|
9
|
+
.url()
|
|
10
|
+
.describe("Site origin (e.g. https://example.com). The tool fetches /robots.txt."),
|
|
11
|
+
testUrl: z
|
|
12
|
+
.string()
|
|
13
|
+
.url()
|
|
14
|
+
.optional()
|
|
15
|
+
.describe("Optional URL to test against the robots.txt rules. Returns whether it would be blocked for the wildcard user agent."),
|
|
16
|
+
userAgent: z
|
|
17
|
+
.string()
|
|
18
|
+
.optional()
|
|
19
|
+
.describe("User agent to evaluate rules against. Default '*'. Pass 'GPTBot' / 'ClaudeBot' / 'PerplexityBot' to test AI-crawler-specific rules."),
|
|
20
|
+
timeoutMs: z.number().int().positive().max(15_000).optional(),
|
|
21
|
+
});
|
|
22
|
+
const outputSchema = z.object({
|
|
23
|
+
origin: z.string(),
|
|
24
|
+
robotsTxtUrl: z.string(),
|
|
25
|
+
robotsTxtPresent: z.boolean(),
|
|
26
|
+
disallowPatterns: z.array(z.string()),
|
|
27
|
+
crawlDelaySeconds: z.number().int().nonnegative(),
|
|
28
|
+
hasSitemapDirective: z.boolean(),
|
|
29
|
+
testUrlBlocked: z.boolean().nullable(),
|
|
30
|
+
});
|
|
31
|
+
/**
|
|
32
|
+
* General-purpose robots.txt fetch + parse + URL test. Wraps the existing
|
|
33
|
+
* `parseDisallowPatterns`, `isBlockedByPattern`, and `parseCrawlDelaySeconds`
|
|
34
|
+
* primitives. The AI-crawler-specific check lives in
|
|
35
|
+
* `check_domain_crawler_access`; this is the lower-level building block.
|
|
36
|
+
*/
|
|
37
|
+
export const checkRobotsTool = defineTool({
|
|
38
|
+
name: "check_robots",
|
|
39
|
+
description: "Fetch and parse a domain's robots.txt. Returns disallow patterns, crawl-delay, and whether a Sitemap directive is present. Pass `testUrl` to check whether a specific URL would be blocked for the named user agent (default '*'). Use this when the orchestrator needs to know if a candidate URL is fetchable before scheduling fetch_page.",
|
|
40
|
+
inputSchema,
|
|
41
|
+
outputSchema,
|
|
42
|
+
async execute({ origin, testUrl, userAgent = "*", timeoutMs = 10_000 }, ctx) {
|
|
43
|
+
const robotsUrl = `${new URL(origin).origin}/robots.txt`;
|
|
44
|
+
const validateHop = async (hopUrl) => {
|
|
45
|
+
let host;
|
|
46
|
+
try {
|
|
47
|
+
host = new URL(hopUrl).hostname;
|
|
48
|
+
}
|
|
49
|
+
catch {
|
|
50
|
+
throw new Error(`check_robots: invalid URL ${hopUrl}`);
|
|
51
|
+
}
|
|
52
|
+
await validateTargetHost(host);
|
|
53
|
+
};
|
|
54
|
+
let robotsTxtContent = "";
|
|
55
|
+
let robotsTxtPresent = false;
|
|
56
|
+
try {
|
|
57
|
+
const res = await cachedFetch(robotsUrl, { timeoutMs, cache: null, validateHop, signal: ctx?.signal });
|
|
58
|
+
if (res.status >= 200 && res.status < 300) {
|
|
59
|
+
robotsTxtContent = res.body;
|
|
60
|
+
robotsTxtPresent = true;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
// robots.txt absent or unreachable — caller decides what to do.
|
|
65
|
+
}
|
|
66
|
+
const disallowPatterns = robotsTxtPresent ? parseDisallowPatterns(robotsTxtContent, [userAgent]) : [];
|
|
67
|
+
const crawlDelaySeconds = robotsTxtPresent ? parseCrawlDelaySeconds(robotsTxtContent) : 0;
|
|
68
|
+
const hasSitemapDirective = robotsTxtPresent && /^\s*sitemap\s*:/gim.test(robotsTxtContent);
|
|
69
|
+
let testUrlBlocked = null;
|
|
70
|
+
if (testUrl) {
|
|
71
|
+
try {
|
|
72
|
+
const path = new URL(testUrl).pathname || "/";
|
|
73
|
+
testUrlBlocked = disallowPatterns.some((p) => isBlockedByPattern(path, p));
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
testUrlBlocked = null;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return {
|
|
80
|
+
origin: new URL(origin).origin,
|
|
81
|
+
robotsTxtUrl: robotsUrl,
|
|
82
|
+
robotsTxtPresent,
|
|
83
|
+
disallowPatterns,
|
|
84
|
+
crawlDelaySeconds,
|
|
85
|
+
hasSitemapDirective,
|
|
86
|
+
testUrlBlocked,
|
|
87
|
+
};
|
|
88
|
+
},
|
|
89
|
+
});
|
|
90
|
+
//# sourceMappingURL=check-robots.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"check-robots.js","sourceRoot":"","sources":["../../../src/ai/tools/check-robots.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,EACL,qBAAqB,EACrB,kBAAkB,EAClB,sBAAsB,GACvB,MAAM,6CAA6C,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAC3B,MAAM,EAAE,CAAC;SACN,MAAM,EAAE;SACR,GAAG,EAAE;SACL,QAAQ,CAAC,uEAAuE,CAAC;IACpF,OAAO,EAAE,CAAC;SACP,MAAM,EAAE;SACR,GAAG,EAAE;SACL,QAAQ,EAAE;SACV,QAAQ,CACP,qHAAqH,CACtH;IACH,SAAS,EAAE,CAAC;SACT,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CACP,qIAAqI,CACtI;IACH,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE;CAC9D,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC;IAC5B,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE;IAClB,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE;IACxB,gBAAgB,EAAE,CAAC,CAAC,OAAO,EAAE;IAC7B,gBAAgB,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;IACrC,iBAAiB,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,WAAW,EAAE;IACjD,mBAAmB,EAAE,CAAC,CAAC,OAAO,EAAE;IAChC,cAAc,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;CACvC,CAAC,CAAC;AAEH;;;;;GAKG;AACH,MAAM,CAAC,MAAM,eAAe,GAAG,UAAU,CAAC;IACxC,IAAI,EAAE,cAAc;IACpB,WAAW,EACT,+UAA+U;IACjV,WAAW;IACX,YAAY;IACZ,KAAK,CAAC,OAAO,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,GAAG,GAAG,EAAE,SAAS,GAAG,MAAM,EAAE,EAAE,GAAG;QACzE,MAAM,SAAS,GAAG,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,MAAM,aAAa,CAAC;QACzD,MAAM,WAAW,GAAG,KAAK,EAAE,MAAc,EAAiB,EAAE;YAC1D,IAAI,IAAY,CAAC;YACjB,IAAI,CAAC;gBACH,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC;YAClC,CAAC;YAAC,MAAM,CAAC;gBACP,MAAM,IAAI,KAAK,CAAC,6BAA6B,MAAM,EAAE,CAAC,CAAC;YACzD,CAAC;YACD,MAAM,kBAAkB,CAAC,IAAI,CAAC,CAAC;QACjC,CAAC,CAAC;QAEF,IAAI,gBAAgB,GAAG,EAAE,CAAC;QAC1B,IAAI,gBAAgB,GAAG,KAAK,CAAC;QAC7B,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;YACvG,IAAI,GAAG,CAAC,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;gBAC1C,gBAAgB,GAAG,GAAG,CAAC,IAAI,CAAC;gBAC5B,gBAAgB,GAAG,IAAI,CAAC;YAC1B,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,gEAAgE;QAClE,CAAC;QAED,MAAM,gBAAgB,GAAG,gBAAgB,CAAC,CAAC,CAAC,qBAAqB,CAAC,gBAAgB,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACtG,MAAM,iBAAiB,GAAG,gBAAgB,CAAC,CAAC,CAAC,sBAAsB,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1F,MAAM,mBAAmB,GAAG,gBAAgB,IAAI,oBAAoB,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAE5F,IAAI,cAAc,GAAmB,IAAI,CAAC;QAC1C,IAAI,OAAO,EAAE,CAAC;YACZ,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,QAAQ,IAAI,GAAG,CAAC;gBAC9C,cAAc,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,kBAAkB,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YAC7E,CAAC;YAAC,MAAM,CAAC;gBACP,cAAc,GAAG,IAAI,CAAC;YACxB,CAAC;QACH,CAAC;QAED,OAAO;YACL,MAAM,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,MAAM;YAC9B,YAAY,EAAE,SAAS;YACvB,gBAAgB;YAChB,gBAAgB;YAChB,iBAAiB;YACjB,mBAAmB;YACnB,cAAc;SACf,CAAC;IACJ,CAAC;CACF,CAAC,CAAC"}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
export declare const checkRuleAnswerFirstTool: {
|
|
3
|
+
name: string;
|
|
4
|
+
description: string;
|
|
5
|
+
inputSchema: z.ZodType<{
|
|
6
|
+
pageId: string;
|
|
7
|
+
maxFirstParagraphWords?: number | undefined;
|
|
8
|
+
}, unknown, z.core.$ZodTypeInternals<{
|
|
9
|
+
pageId: string;
|
|
10
|
+
maxFirstParagraphWords?: number | undefined;
|
|
11
|
+
}, unknown>>;
|
|
12
|
+
outputSchema: z.ZodType<{
|
|
13
|
+
finding: {
|
|
14
|
+
ruleId: "aeo/answer-first";
|
|
15
|
+
severity: "warning" | "error";
|
|
16
|
+
confidence: "high" | "medium" | "low" | "speculative";
|
|
17
|
+
message: string;
|
|
18
|
+
fix?: string | undefined;
|
|
19
|
+
} | null;
|
|
20
|
+
}, unknown, z.core.$ZodTypeInternals<{
|
|
21
|
+
finding: {
|
|
22
|
+
ruleId: "aeo/answer-first";
|
|
23
|
+
severity: "warning" | "error";
|
|
24
|
+
confidence: "high" | "medium" | "low" | "speculative";
|
|
25
|
+
message: string;
|
|
26
|
+
fix?: string | undefined;
|
|
27
|
+
} | null;
|
|
28
|
+
}, unknown>>;
|
|
29
|
+
toAiTool(): import("ai").Tool<{
|
|
30
|
+
pageId: string;
|
|
31
|
+
maxFirstParagraphWords?: number | undefined;
|
|
32
|
+
}, import("./types.js").ToolResult<{
|
|
33
|
+
finding: {
|
|
34
|
+
ruleId: "aeo/answer-first";
|
|
35
|
+
severity: "warning" | "error";
|
|
36
|
+
confidence: "high" | "medium" | "low" | "speculative";
|
|
37
|
+
message: string;
|
|
38
|
+
fix?: string | undefined;
|
|
39
|
+
} | null;
|
|
40
|
+
}>>;
|
|
41
|
+
run(input: {
|
|
42
|
+
pageId: string;
|
|
43
|
+
maxFirstParagraphWords?: number | undefined;
|
|
44
|
+
}, ctx?: import("./types.js").ToolExecuteContext): Promise<import("./types.js").ToolResult<{
|
|
45
|
+
finding: {
|
|
46
|
+
ruleId: "aeo/answer-first";
|
|
47
|
+
severity: "warning" | "error";
|
|
48
|
+
confidence: "high" | "medium" | "low" | "speculative";
|
|
49
|
+
message: string;
|
|
50
|
+
fix?: string | undefined;
|
|
51
|
+
} | null;
|
|
52
|
+
}>>;
|
|
53
|
+
};
|
|
54
|
+
//# sourceMappingURL=check-rule-answer-first.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"check-rule-answer-first.d.ts","sourceRoot":"","sources":["../../../src/ai/tools/check-rule-answer-first.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AA6BxB,eAAO,MAAM,wBAAwB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAwBnC,CAAC"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { parseHtmlPage } from "../../parser.js";
|
|
3
|
+
import { answerFirstRule } from "../../rules/aeo/answer-first.js";
|
|
4
|
+
import { resolvePage } from "../orchestrator/page-cache.js";
|
|
5
|
+
import { defineTool } from "./types.js";
|
|
6
|
+
const inputSchema = z.object({
|
|
7
|
+
pageId: z.string().describe("Page reference returned by fetch_page."),
|
|
8
|
+
maxFirstParagraphWords: z
|
|
9
|
+
.number()
|
|
10
|
+
.int()
|
|
11
|
+
.positive()
|
|
12
|
+
.max(500)
|
|
13
|
+
.optional()
|
|
14
|
+
.describe("Opener should fit under this many words to count as extractable. Default 100."),
|
|
15
|
+
});
|
|
16
|
+
const outputSchema = z.object({
|
|
17
|
+
finding: z
|
|
18
|
+
.object({
|
|
19
|
+
ruleId: z.literal("aeo/answer-first"),
|
|
20
|
+
severity: z.enum(["warning", "error"]),
|
|
21
|
+
confidence: z.enum(["high", "medium", "low", "speculative"]),
|
|
22
|
+
message: z.string(),
|
|
23
|
+
fix: z.string().optional(),
|
|
24
|
+
})
|
|
25
|
+
.nullable(),
|
|
26
|
+
});
|
|
27
|
+
export const checkRuleAnswerFirstTool = defineTool({
|
|
28
|
+
name: "check_rule_answer_first",
|
|
29
|
+
description: "Check whether a previously-fetched page opens with a direct, extractable answer (concrete fact, named entity, complete sentence, <100 words). Critical for AI Overview / Perplexity citability. Returns a finding when the opener is boilerplate, too long, or lacks specifics. Note: cross-page template detection (same opener across N pages) is NOT done here — call this per page and aggregate identical openers yourself.",
|
|
30
|
+
inputSchema,
|
|
31
|
+
outputSchema,
|
|
32
|
+
async execute({ pageId, maxFirstParagraphWords = 100 }) {
|
|
33
|
+
const entry = resolvePage(pageId);
|
|
34
|
+
const parsed = parseHtmlPage(entry.html, entry.url);
|
|
35
|
+
const findings = answerFirstRule([parsed], [], { maxFirstParagraphWords });
|
|
36
|
+
const finding = findings[0];
|
|
37
|
+
return {
|
|
38
|
+
finding: finding
|
|
39
|
+
? {
|
|
40
|
+
ruleId: "aeo/answer-first",
|
|
41
|
+
severity: finding.severity === "error" ? "error" : "warning",
|
|
42
|
+
confidence: finding.confidence ?? "medium",
|
|
43
|
+
message: finding.message,
|
|
44
|
+
fix: finding.fix,
|
|
45
|
+
}
|
|
46
|
+
: null,
|
|
47
|
+
};
|
|
48
|
+
},
|
|
49
|
+
});
|
|
50
|
+
//# sourceMappingURL=check-rule-answer-first.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"check-rule-answer-first.js","sourceRoot":"","sources":["../../../src/ai/tools/check-rule-answer-first.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAClE,OAAO,EAAE,WAAW,EAAE,MAAM,+BAA+B,CAAC;AAC5D,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAC3B,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,wCAAwC,CAAC;IACrE,sBAAsB,EAAE,CAAC;SACtB,MAAM,EAAE;SACR,GAAG,EAAE;SACL,QAAQ,EAAE;SACV,GAAG,CAAC,GAAG,CAAC;SACR,QAAQ,EAAE;SACV,QAAQ,CAAC,+EAA+E,CAAC;CAC7F,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC;IAC5B,OAAO,EAAE,CAAC;SACP,MAAM,CAAC;QACN,MAAM,EAAE,CAAC,CAAC,OAAO,CAAC,kBAAkB,CAAC;QACrC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QACtC,UAAU,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,aAAa,CAAC,CAAC;QAC5D,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE;QACnB,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;KAC3B,CAAC;SACD,QAAQ,EAAE;CACd,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,wBAAwB,GAAG,UAAU,CAAC;IACjD,IAAI,EAAE,yBAAyB;IAC/B,WAAW,EACT,kaAAka;IACpa,WAAW;IACX,YAAY;IACZ,KAAK,CAAC,OAAO,CAAC,EAAE,MAAM,EAAE,sBAAsB,GAAG,GAAG,EAAE;QACpD,MAAM,KAAK,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;QAClC,MAAM,MAAM,GAAG,aAAa,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC;QACpD,MAAM,QAAQ,GAAG,eAAe,CAAC,CAAC,MAAM,CAAC,EAAE,EAAE,EAAE,EAAE,sBAAsB,EAAE,CAAC,CAAC;QAC3E,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QAE5B,OAAO;YACL,OAAO,EAAE,OAAO;gBACd,CAAC,CAAC;oBACE,MAAM,EAAE,kBAA2B;oBACnC,QAAQ,EAAE,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAE,OAAiB,CAAC,CAAC,CAAE,SAAmB;oBAClF,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,QAAQ;oBAC1C,OAAO,EAAE,OAAO,CAAC,OAAO;oBACxB,GAAG,EAAE,OAAO,CAAC,GAAG;iBACjB;gBACH,CAAC,CAAC,IAAI;SACT,CAAC;IACJ,CAAC;CACF,CAAC,CAAC"}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
export declare const checkRuleCanonicalConsistencyTool: {
|
|
3
|
+
name: string;
|
|
4
|
+
description: string;
|
|
5
|
+
inputSchema: z.ZodType<{
|
|
6
|
+
pageId: string;
|
|
7
|
+
knownUrls?: string[] | undefined;
|
|
8
|
+
}, unknown, z.core.$ZodTypeInternals<{
|
|
9
|
+
pageId: string;
|
|
10
|
+
knownUrls?: string[] | undefined;
|
|
11
|
+
}, unknown>>;
|
|
12
|
+
outputSchema: z.ZodType<{
|
|
13
|
+
hasCanonical: boolean;
|
|
14
|
+
canonicalUrl: string | null;
|
|
15
|
+
selfReferencing: boolean;
|
|
16
|
+
findings: {
|
|
17
|
+
ruleId: "tech/canonical-consistency";
|
|
18
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
19
|
+
message: string;
|
|
20
|
+
fix?: string | undefined;
|
|
21
|
+
relatedUrls?: string[] | undefined;
|
|
22
|
+
}[];
|
|
23
|
+
}, unknown, z.core.$ZodTypeInternals<{
|
|
24
|
+
hasCanonical: boolean;
|
|
25
|
+
canonicalUrl: string | null;
|
|
26
|
+
selfReferencing: boolean;
|
|
27
|
+
findings: {
|
|
28
|
+
ruleId: "tech/canonical-consistency";
|
|
29
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
30
|
+
message: string;
|
|
31
|
+
fix?: string | undefined;
|
|
32
|
+
relatedUrls?: string[] | undefined;
|
|
33
|
+
}[];
|
|
34
|
+
}, unknown>>;
|
|
35
|
+
toAiTool(): import("ai").Tool<{
|
|
36
|
+
pageId: string;
|
|
37
|
+
knownUrls?: string[] | undefined;
|
|
38
|
+
}, import("./types.js").ToolResult<{
|
|
39
|
+
hasCanonical: boolean;
|
|
40
|
+
canonicalUrl: string | null;
|
|
41
|
+
selfReferencing: boolean;
|
|
42
|
+
findings: {
|
|
43
|
+
ruleId: "tech/canonical-consistency";
|
|
44
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
45
|
+
message: string;
|
|
46
|
+
fix?: string | undefined;
|
|
47
|
+
relatedUrls?: string[] | undefined;
|
|
48
|
+
}[];
|
|
49
|
+
}>>;
|
|
50
|
+
run(input: {
|
|
51
|
+
pageId: string;
|
|
52
|
+
knownUrls?: string[] | undefined;
|
|
53
|
+
}, ctx?: import("./types.js").ToolExecuteContext): Promise<import("./types.js").ToolResult<{
|
|
54
|
+
hasCanonical: boolean;
|
|
55
|
+
canonicalUrl: string | null;
|
|
56
|
+
selfReferencing: boolean;
|
|
57
|
+
findings: {
|
|
58
|
+
ruleId: "tech/canonical-consistency";
|
|
59
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
60
|
+
message: string;
|
|
61
|
+
fix?: string | undefined;
|
|
62
|
+
relatedUrls?: string[] | undefined;
|
|
63
|
+
}[];
|
|
64
|
+
}>>;
|
|
65
|
+
};
|
|
66
|
+
//# sourceMappingURL=check-rule-canonical-consistency.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"check-rule-canonical-consistency.d.ts","sourceRoot":"","sources":["../../../src/ai/tools/check-rule-canonical-consistency.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAgCxB,eAAO,MAAM,iCAAiC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA0B5C,CAAC"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { parseHtmlPage } from "../../parser.js";
|
|
3
|
+
import { canonicalConsistencyRule } from "../../rules/tech/canonical-consistency.js";
|
|
4
|
+
import { mergeNormalizeUrlOptions } from "../../url-normalize.js";
|
|
5
|
+
import { resolvePage } from "../orchestrator/page-cache.js";
|
|
6
|
+
import { defineTool } from "./types.js";
|
|
7
|
+
const inputSchema = z.object({
|
|
8
|
+
pageId: z.string().describe("Page reference returned by fetch_page."),
|
|
9
|
+
knownUrls: z
|
|
10
|
+
.array(z.string())
|
|
11
|
+
.optional()
|
|
12
|
+
.describe("Other URLs already crawled in this audit. Lets the rule classify cross-page canonicalization severity (warning when canonical points to a known URL; info when it points outside)."),
|
|
13
|
+
});
|
|
14
|
+
const outputSchema = z.object({
|
|
15
|
+
hasCanonical: z.boolean(),
|
|
16
|
+
canonicalUrl: z.string().nullable(),
|
|
17
|
+
selfReferencing: z.boolean(),
|
|
18
|
+
findings: z.array(z.object({
|
|
19
|
+
ruleId: z.literal("tech/canonical-consistency"),
|
|
20
|
+
severity: z.enum(["info", "warning", "error", "critical"]),
|
|
21
|
+
message: z.string(),
|
|
22
|
+
fix: z.string().optional(),
|
|
23
|
+
relatedUrls: z.array(z.string()).optional(),
|
|
24
|
+
})),
|
|
25
|
+
});
|
|
26
|
+
export const checkRuleCanonicalConsistencyTool = defineTool({
|
|
27
|
+
name: "check_rule_canonical_consistency",
|
|
28
|
+
description: "Check the canonical tag on a previously-fetched page: missing, invalid, points to another crawled page, or HTTP-Link/HTML-link mismatch. Pass `knownUrls` (URLs crawled so far) to differentiate cross-page canonicals from external ones. Returns 0-2 findings depending on what's wrong.",
|
|
29
|
+
inputSchema,
|
|
30
|
+
outputSchema,
|
|
31
|
+
async execute({ pageId, knownUrls = [] }) {
|
|
32
|
+
const entry = resolvePage(pageId);
|
|
33
|
+
const parsed = parseHtmlPage(entry.html, entry.url);
|
|
34
|
+
const known = new Set(knownUrls);
|
|
35
|
+
const normalizeOpts = mergeNormalizeUrlOptions();
|
|
36
|
+
const findings = canonicalConsistencyRule([parsed], known, normalizeOpts);
|
|
37
|
+
return {
|
|
38
|
+
hasCanonical: parsed.canonical !== "",
|
|
39
|
+
canonicalUrl: parsed.canonical || null,
|
|
40
|
+
selfReferencing: parsed.canonical === parsed.url,
|
|
41
|
+
findings: findings.map((f) => ({
|
|
42
|
+
ruleId: "tech/canonical-consistency",
|
|
43
|
+
severity: f.severity,
|
|
44
|
+
message: f.message,
|
|
45
|
+
fix: f.fix,
|
|
46
|
+
relatedUrls: f.relatedUrls,
|
|
47
|
+
})),
|
|
48
|
+
};
|
|
49
|
+
},
|
|
50
|
+
});
|
|
51
|
+
//# sourceMappingURL=check-rule-canonical-consistency.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"check-rule-canonical-consistency.js","sourceRoot":"","sources":["../../../src/ai/tools/check-rule-canonical-consistency.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,wBAAwB,EAAE,MAAM,2CAA2C,CAAC;AACrF,OAAO,EAAE,wBAAwB,EAAE,MAAM,wBAAwB,CAAC;AAClE,OAAO,EAAE,WAAW,EAAE,MAAM,+BAA+B,CAAC;AAC5D,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAC3B,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,wCAAwC,CAAC;IACrE,SAAS,EAAE,CAAC;SACT,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;SACjB,QAAQ,EAAE;SACV,QAAQ,CACP,oLAAoL,CACrL;CACJ,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC;IAC5B,YAAY,EAAE,CAAC,CAAC,OAAO,EAAE;IACzB,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IACnC,eAAe,EAAE,CAAC,CAAC,OAAO,EAAE;IAC5B,QAAQ,EAAE,CAAC,CAAC,KAAK,CACf,CAAC,CAAC,MAAM,CAAC;QACP,MAAM,EAAE,CAAC,CAAC,OAAO,CAAC,4BAA4B,CAAC;QAC/C,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC;QAC1D,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE;QACnB,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;QAC1B,WAAW,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;KAC5C,CAAC,CACH;CACF,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,iCAAiC,GAAG,UAAU,CAAC;IAC1D,IAAI,EAAE,kCAAkC;IACxC,WAAW,EACT,4RAA4R;IAC9R,WAAW;IACX,YAAY;IACZ,KAAK,CAAC,OAAO,CAAC,EAAE,MAAM,EAAE,SAAS,GAAG,EAAE,EAAE;QACtC,MAAM,KAAK,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;QAClC,MAAM,MAAM,GAAG,aAAa,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC;QACpD,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC;QACjC,MAAM,aAAa,GAAG,wBAAwB,EAAE,CAAC;QACjD,MAAM,QAAQ,GAAG,wBAAwB,CAAC,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,aAAa,CAAC,CAAC;QAE1E,OAAO;YACL,YAAY,EAAE,MAAM,CAAC,SAAS,KAAK,EAAE;YACrC,YAAY,EAAE,MAAM,CAAC,SAAS,IAAI,IAAI;YACtC,eAAe,EAAE,MAAM,CAAC,SAAS,KAAK,MAAM,CAAC,GAAG;YAChD,QAAQ,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC7B,MAAM,EAAE,4BAAqC;gBAC7C,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,WAAW,EAAE,CAAC,CAAC,WAAW;aAC3B,CAAC,CAAC;SACJ,CAAC;IACJ,CAAC;CACF,CAAC,CAAC"}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
export declare const checkRuleCitableFactsTool: {
|
|
3
|
+
name: string;
|
|
4
|
+
description: string;
|
|
5
|
+
inputSchema: z.ZodType<{
|
|
6
|
+
pageIds: string[];
|
|
7
|
+
minFactsPerPage?: number | undefined;
|
|
8
|
+
}, unknown, z.core.$ZodTypeInternals<{
|
|
9
|
+
pageIds: string[];
|
|
10
|
+
minFactsPerPage?: number | undefined;
|
|
11
|
+
}, unknown>>;
|
|
12
|
+
outputSchema: z.ZodType<{
|
|
13
|
+
findings: {
|
|
14
|
+
ruleId: "aeo/citable-facts";
|
|
15
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
16
|
+
message: string;
|
|
17
|
+
confidence?: "high" | "medium" | "low" | "speculative" | undefined;
|
|
18
|
+
pageUrl?: string | undefined;
|
|
19
|
+
fix?: string | undefined;
|
|
20
|
+
}[];
|
|
21
|
+
}, unknown, z.core.$ZodTypeInternals<{
|
|
22
|
+
findings: {
|
|
23
|
+
ruleId: "aeo/citable-facts";
|
|
24
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
25
|
+
message: string;
|
|
26
|
+
confidence?: "high" | "medium" | "low" | "speculative" | undefined;
|
|
27
|
+
pageUrl?: string | undefined;
|
|
28
|
+
fix?: string | undefined;
|
|
29
|
+
}[];
|
|
30
|
+
}, unknown>>;
|
|
31
|
+
toAiTool(): import("ai").Tool<{
|
|
32
|
+
pageIds: string[];
|
|
33
|
+
minFactsPerPage?: number | undefined;
|
|
34
|
+
}, import("./types.js").ToolResult<{
|
|
35
|
+
findings: {
|
|
36
|
+
ruleId: "aeo/citable-facts";
|
|
37
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
38
|
+
message: string;
|
|
39
|
+
confidence?: "high" | "medium" | "low" | "speculative" | undefined;
|
|
40
|
+
pageUrl?: string | undefined;
|
|
41
|
+
fix?: string | undefined;
|
|
42
|
+
}[];
|
|
43
|
+
}>>;
|
|
44
|
+
run(input: {
|
|
45
|
+
pageIds: string[];
|
|
46
|
+
minFactsPerPage?: number | undefined;
|
|
47
|
+
}, ctx?: import("./types.js").ToolExecuteContext): Promise<import("./types.js").ToolResult<{
|
|
48
|
+
findings: {
|
|
49
|
+
ruleId: "aeo/citable-facts";
|
|
50
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
51
|
+
message: string;
|
|
52
|
+
confidence?: "high" | "medium" | "low" | "speculative" | undefined;
|
|
53
|
+
pageUrl?: string | undefined;
|
|
54
|
+
fix?: string | undefined;
|
|
55
|
+
}[];
|
|
56
|
+
}>>;
|
|
57
|
+
};
|
|
58
|
+
//# sourceMappingURL=check-rule-citable-facts.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"check-rule-citable-facts.d.ts","sourceRoot":"","sources":["../../../src/ai/tools/check-rule-citable-facts.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAwBxB,eAAO,MAAM,yBAAyB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAyBpC,CAAC"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { parseHtmlPage } from "../../parser.js";
|
|
3
|
+
import { citableFactsRule } from "../../rules/aeo/citable-facts.js";
|
|
4
|
+
import { resolvePages } from "../orchestrator/page-cache.js";
|
|
5
|
+
import { defineTool } from "./types.js";
|
|
6
|
+
const inputSchema = z.object({
|
|
7
|
+
pageIds: z.array(z.string()).min(1).max(50).describe("Page references from fetch_page."),
|
|
8
|
+
minFactsPerPage: z.number().int().nonnegative().max(20).optional(),
|
|
9
|
+
});
|
|
10
|
+
const outputSchema = z.object({
|
|
11
|
+
findings: z.array(z.object({
|
|
12
|
+
ruleId: z.literal("aeo/citable-facts"),
|
|
13
|
+
severity: z.enum(["info", "warning", "error", "critical"]),
|
|
14
|
+
confidence: z.enum(["high", "medium", "low", "speculative"]).optional(),
|
|
15
|
+
message: z.string(),
|
|
16
|
+
pageUrl: z.string().optional(),
|
|
17
|
+
fix: z.string().optional(),
|
|
18
|
+
})),
|
|
19
|
+
});
|
|
20
|
+
export const checkRuleCitableFactsTool = defineTool({
|
|
21
|
+
name: "check_rule_citable_facts",
|
|
22
|
+
description: "Count citable facts (numbers, dates, percentages, dollar amounts, named entities) per page. Pages with too few quotable specifics get skipped by AI Overview / Perplexity / ChatGPT for citation. Default threshold is 3 facts per page.",
|
|
23
|
+
inputSchema,
|
|
24
|
+
outputSchema,
|
|
25
|
+
async execute({ pageIds, minFactsPerPage }) {
|
|
26
|
+
const entries = resolvePages(pageIds);
|
|
27
|
+
const parsed = entries.map((e) => parseHtmlPage(e.html, e.url));
|
|
28
|
+
const findings = citableFactsRule(parsed, [], minFactsPerPage !== undefined ? { minFactsPerPage } : undefined);
|
|
29
|
+
return {
|
|
30
|
+
findings: findings.map((f) => ({
|
|
31
|
+
ruleId: "aeo/citable-facts",
|
|
32
|
+
severity: f.severity,
|
|
33
|
+
confidence: f.confidence,
|
|
34
|
+
message: f.message,
|
|
35
|
+
pageUrl: f.pageUrl,
|
|
36
|
+
fix: f.fix,
|
|
37
|
+
})),
|
|
38
|
+
};
|
|
39
|
+
},
|
|
40
|
+
});
|
|
41
|
+
//# sourceMappingURL=check-rule-citable-facts.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"check-rule-citable-facts.js","sourceRoot":"","sources":["../../../src/ai/tools/check-rule-citable-facts.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AACpE,OAAO,EAAE,YAAY,EAAE,MAAM,+BAA+B,CAAC;AAC7D,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAC3B,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,kCAAkC,CAAC;IACxF,eAAe,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,WAAW,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,QAAQ,EAAE;CACnE,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC;IAC5B,QAAQ,EAAE,CAAC,CAAC,KAAK,CACf,CAAC,CAAC,MAAM,CAAC;QACP,MAAM,EAAE,CAAC,CAAC,OAAO,CAAC,mBAAmB,CAAC;QACtC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC;QAC1D,UAAU,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,aAAa,CAAC,CAAC,CAAC,QAAQ,EAAE;QACvE,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE;QACnB,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;QAC9B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;KAC3B,CAAC,CACH;CACF,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,yBAAyB,GAAG,UAAU,CAAC;IAClD,IAAI,EAAE,0BAA0B;IAChC,WAAW,EACT,0OAA0O;IAC5O,WAAW;IACX,YAAY;IACZ,KAAK,CAAC,OAAO,CAAC,EAAE,OAAO,EAAE,eAAe,EAAE;QACxC,MAAM,OAAO,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;QACtC,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAChE,MAAM,QAAQ,GAAG,gBAAgB,CAC/B,MAAM,EACN,EAAE,EACF,eAAe,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,eAAe,EAAE,CAAC,CAAC,CAAC,SAAS,CAChE,CAAC;QACF,OAAO;YACL,QAAQ,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC7B,MAAM,EAAE,mBAA4B;gBACpC,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,UAAU,EAAE,CAAC,CAAC,UAAU;gBACxB,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,GAAG,EAAE,CAAC,CAAC,GAAG;aACX,CAAC,CAAC;SACJ,CAAC;IACJ,CAAC;CACF,CAAC,CAAC"}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
export declare const checkRuleContentModularityTool: {
|
|
3
|
+
name: string;
|
|
4
|
+
description: string;
|
|
5
|
+
inputSchema: z.ZodType<{
|
|
6
|
+
pageIds: string[];
|
|
7
|
+
maxParagraphWords?: number | undefined;
|
|
8
|
+
minSelfContainedRatio?: number | undefined;
|
|
9
|
+
}, unknown, z.core.$ZodTypeInternals<{
|
|
10
|
+
pageIds: string[];
|
|
11
|
+
maxParagraphWords?: number | undefined;
|
|
12
|
+
minSelfContainedRatio?: number | undefined;
|
|
13
|
+
}, unknown>>;
|
|
14
|
+
outputSchema: z.ZodType<{
|
|
15
|
+
findings: {
|
|
16
|
+
ruleId: "aeo/content-modularity";
|
|
17
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
18
|
+
message: string;
|
|
19
|
+
pageUrl?: string | undefined;
|
|
20
|
+
fix?: string | undefined;
|
|
21
|
+
}[];
|
|
22
|
+
}, unknown, z.core.$ZodTypeInternals<{
|
|
23
|
+
findings: {
|
|
24
|
+
ruleId: "aeo/content-modularity";
|
|
25
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
26
|
+
message: string;
|
|
27
|
+
pageUrl?: string | undefined;
|
|
28
|
+
fix?: string | undefined;
|
|
29
|
+
}[];
|
|
30
|
+
}, unknown>>;
|
|
31
|
+
toAiTool(): import("ai").Tool<{
|
|
32
|
+
pageIds: string[];
|
|
33
|
+
maxParagraphWords?: number | undefined;
|
|
34
|
+
minSelfContainedRatio?: number | undefined;
|
|
35
|
+
}, import("./types.js").ToolResult<{
|
|
36
|
+
findings: {
|
|
37
|
+
ruleId: "aeo/content-modularity";
|
|
38
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
39
|
+
message: string;
|
|
40
|
+
pageUrl?: string | undefined;
|
|
41
|
+
fix?: string | undefined;
|
|
42
|
+
}[];
|
|
43
|
+
}>>;
|
|
44
|
+
run(input: {
|
|
45
|
+
pageIds: string[];
|
|
46
|
+
maxParagraphWords?: number | undefined;
|
|
47
|
+
minSelfContainedRatio?: number | undefined;
|
|
48
|
+
}, ctx?: import("./types.js").ToolExecuteContext): Promise<import("./types.js").ToolResult<{
|
|
49
|
+
findings: {
|
|
50
|
+
ruleId: "aeo/content-modularity";
|
|
51
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
52
|
+
message: string;
|
|
53
|
+
pageUrl?: string | undefined;
|
|
54
|
+
fix?: string | undefined;
|
|
55
|
+
}[];
|
|
56
|
+
}>>;
|
|
57
|
+
};
|
|
58
|
+
//# sourceMappingURL=check-rule-content-modularity.d.ts.map
|