@pseolint/core 0.4.3 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +264 -169
- package/dist/ai/manifest/diff.d.ts +78 -0
- package/dist/ai/manifest/diff.d.ts.map +1 -0
- package/dist/ai/manifest/diff.js +139 -0
- package/dist/ai/manifest/diff.js.map +1 -0
- package/dist/ai/manifest/index.d.ts +18 -0
- package/dist/ai/manifest/index.d.ts.map +1 -0
- package/dist/ai/manifest/index.js +15 -0
- package/dist/ai/manifest/index.js.map +1 -0
- package/dist/ai/manifest/validate-manifest.d.ts +37 -0
- package/dist/ai/manifest/validate-manifest.d.ts.map +1 -0
- package/dist/ai/manifest/validate-manifest.js +67 -0
- package/dist/ai/manifest/validate-manifest.js.map +1 -0
- package/dist/ai/manifest/validators/domain-patches.d.ts +15 -0
- package/dist/ai/manifest/validators/domain-patches.d.ts.map +1 -0
- package/dist/ai/manifest/validators/domain-patches.js +110 -0
- package/dist/ai/manifest/validators/domain-patches.js.map +1 -0
- package/dist/ai/manifest/validators/index.d.ts +5 -0
- package/dist/ai/manifest/validators/index.d.ts.map +1 -0
- package/dist/ai/manifest/validators/index.js +4 -0
- package/dist/ai/manifest/validators/index.js.map +1 -0
- package/dist/ai/manifest/validators/page-changes.d.ts +36 -0
- package/dist/ai/manifest/validators/page-changes.d.ts.map +1 -0
- package/dist/ai/manifest/validators/page-changes.js +221 -0
- package/dist/ai/manifest/validators/page-changes.js.map +1 -0
- package/dist/ai/manifest/validators/types.d.ts +17 -0
- package/dist/ai/manifest/validators/types.d.ts.map +1 -0
- package/dist/ai/manifest/validators/types.js +5 -0
- package/dist/ai/manifest/validators/types.js.map +1 -0
- package/dist/ai/orchestrate.d.ts +74 -0
- package/dist/ai/orchestrate.d.ts.map +1 -0
- package/dist/ai/orchestrate.js +54 -0
- package/dist/ai/orchestrate.js.map +1 -0
- package/dist/ai/orchestrator/budget.d.ts +57 -0
- package/dist/ai/orchestrator/budget.d.ts.map +1 -0
- package/dist/ai/orchestrator/budget.js +114 -0
- package/dist/ai/orchestrator/budget.js.map +1 -0
- package/dist/ai/orchestrator/finish-tool.d.ts +568 -0
- package/dist/ai/orchestrator/finish-tool.d.ts.map +1 -0
- package/dist/ai/orchestrator/finish-tool.js +114 -0
- package/dist/ai/orchestrator/finish-tool.js.map +1 -0
- package/dist/ai/orchestrator/index.d.ts +25 -0
- package/dist/ai/orchestrator/index.d.ts.map +1 -0
- package/dist/ai/orchestrator/index.js +21 -0
- package/dist/ai/orchestrator/index.js.map +1 -0
- package/dist/ai/orchestrator/log.d.ts +24 -0
- package/dist/ai/orchestrator/log.d.ts.map +1 -0
- package/dist/ai/orchestrator/log.js +48 -0
- package/dist/ai/orchestrator/log.js.map +1 -0
- package/dist/ai/orchestrator/page-cache.d.ts +64 -0
- package/dist/ai/orchestrator/page-cache.d.ts.map +1 -0
- package/dist/ai/orchestrator/page-cache.js +127 -0
- package/dist/ai/orchestrator/page-cache.js.map +1 -0
- package/dist/ai/orchestrator/prompt.d.ts +16 -0
- package/dist/ai/orchestrator/prompt.d.ts.map +1 -0
- package/dist/ai/orchestrator/prompt.js +52 -0
- package/dist/ai/orchestrator/prompt.js.map +1 -0
- package/dist/ai/orchestrator/runner.d.ts +65 -0
- package/dist/ai/orchestrator/runner.d.ts.map +1 -0
- package/dist/ai/orchestrator/runner.js +223 -0
- package/dist/ai/orchestrator/runner.js.map +1 -0
- package/dist/ai/orchestrator/session.d.ts +44 -0
- package/dist/ai/orchestrator/session.d.ts.map +1 -0
- package/dist/ai/orchestrator/session.js +64 -0
- package/dist/ai/orchestrator/session.js.map +1 -0
- package/dist/ai/orchestrator/types.d.ts +99 -0
- package/dist/ai/orchestrator/types.d.ts.map +1 -0
- package/dist/ai/orchestrator/types.js +8 -0
- package/dist/ai/orchestrator/types.js.map +1 -0
- package/dist/ai/probes/cache.d.ts +12 -0
- package/dist/ai/probes/cache.d.ts.map +1 -0
- package/dist/ai/probes/cache.js +46 -0
- package/dist/ai/probes/cache.js.map +1 -0
- package/dist/ai/tools/ask-ai-engine.d.ts +77 -0
- package/dist/ai/tools/ask-ai-engine.d.ts.map +1 -0
- package/dist/ai/tools/ask-ai-engine.js +253 -0
- package/dist/ai/tools/ask-ai-engine.js.map +1 -0
- package/dist/ai/tools/check-domain-crawler-access.d.ts +71 -0
- package/dist/ai/tools/check-domain-crawler-access.d.ts.map +1 -0
- package/dist/ai/tools/check-domain-crawler-access.js +76 -0
- package/dist/ai/tools/check-domain-crawler-access.js.map +1 -0
- package/dist/ai/tools/check-domain-llms-txt.d.ts +70 -0
- package/dist/ai/tools/check-domain-llms-txt.d.ts.map +1 -0
- package/dist/ai/tools/check-domain-llms-txt.js +75 -0
- package/dist/ai/tools/check-domain-llms-txt.js.map +1 -0
- package/dist/ai/tools/check-indexability.d.ts +58 -0
- package/dist/ai/tools/check-indexability.d.ts.map +1 -0
- package/dist/ai/tools/check-indexability.js +64 -0
- package/dist/ai/tools/check-indexability.js.map +1 -0
- package/dist/ai/tools/check-robots.d.ts +68 -0
- package/dist/ai/tools/check-robots.d.ts.map +1 -0
- package/dist/ai/tools/check-robots.js +90 -0
- package/dist/ai/tools/check-robots.js.map +1 -0
- package/dist/ai/tools/check-rule-answer-first.d.ts +54 -0
- package/dist/ai/tools/check-rule-answer-first.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-answer-first.js +50 -0
- package/dist/ai/tools/check-rule-answer-first.js.map +1 -0
- package/dist/ai/tools/check-rule-canonical-consistency.d.ts +66 -0
- package/dist/ai/tools/check-rule-canonical-consistency.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-canonical-consistency.js +51 -0
- package/dist/ai/tools/check-rule-canonical-consistency.js.map +1 -0
- package/dist/ai/tools/check-rule-citable-facts.d.ts +58 -0
- package/dist/ai/tools/check-rule-citable-facts.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-citable-facts.js +41 -0
- package/dist/ai/tools/check-rule-citable-facts.js.map +1 -0
- package/dist/ai/tools/check-rule-content-modularity.d.ts +58 -0
- package/dist/ai/tools/check-rule-content-modularity.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-content-modularity.js +45 -0
- package/dist/ai/tools/check-rule-content-modularity.js.map +1 -0
- package/dist/ai/tools/check-rule-faq-coverage.d.ts +54 -0
- package/dist/ai/tools/check-rule-faq-coverage.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-faq-coverage.js +39 -0
- package/dist/ai/tools/check-rule-faq-coverage.js.map +1 -0
- package/dist/ai/tools/check-rule-freshness-signals.d.ts +54 -0
- package/dist/ai/tools/check-rule-freshness-signals.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-freshness-signals.js +45 -0
- package/dist/ai/tools/check-rule-freshness-signals.js.map +1 -0
- package/dist/ai/tools/check-rule-json-ld-valid.d.ts +54 -0
- package/dist/ai/tools/check-rule-json-ld-valid.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-json-ld-valid.js +44 -0
- package/dist/ai/tools/check-rule-json-ld-valid.js.map +1 -0
- package/dist/ai/tools/check-rule-missing-author.d.ts +54 -0
- package/dist/ai/tools/check-rule-missing-author.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-missing-author.js +45 -0
- package/dist/ai/tools/check-rule-missing-author.js.map +1 -0
- package/dist/ai/tools/check-rule-near-duplicate.d.ts +82 -0
- package/dist/ai/tools/check-rule-near-duplicate.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-near-duplicate.js +63 -0
- package/dist/ai/tools/check-rule-near-duplicate.js.map +1 -0
- package/dist/ai/tools/check-rule-required-fields.d.ts +50 -0
- package/dist/ai/tools/check-rule-required-fields.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-required-fields.js +38 -0
- package/dist/ai/tools/check-rule-required-fields.js.map +1 -0
- package/dist/ai/tools/check-rule-schema-consistency.d.ts +54 -0
- package/dist/ai/tools/check-rule-schema-consistency.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-schema-consistency.js +44 -0
- package/dist/ai/tools/check-rule-schema-consistency.js.map +1 -0
- package/dist/ai/tools/check-rule-summary-bait.d.ts +54 -0
- package/dist/ai/tools/check-rule-summary-bait.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-summary-bait.js +39 -0
- package/dist/ai/tools/check-rule-summary-bait.js.map +1 -0
- package/dist/ai/tools/check-rule-thin-content.d.ts +66 -0
- package/dist/ai/tools/check-rule-thin-content.d.ts.map +1 -0
- package/dist/ai/tools/check-rule-thin-content.js +58 -0
- package/dist/ai/tools/check-rule-thin-content.js.map +1 -0
- package/dist/ai/tools/detect-templates.d.ts +60 -0
- package/dist/ai/tools/detect-templates.d.ts.map +1 -0
- package/dist/ai/tools/detect-templates.js +43 -0
- package/dist/ai/tools/detect-templates.js.map +1 -0
- package/dist/ai/tools/fetch-page.d.ts +70 -0
- package/dist/ai/tools/fetch-page.d.ts.map +1 -0
- package/dist/ai/tools/fetch-page.js +93 -0
- package/dist/ai/tools/fetch-page.js.map +1 -0
- package/dist/ai/tools/fetch-sitemap.d.ts +60 -0
- package/dist/ai/tools/fetch-sitemap.d.ts.map +1 -0
- package/dist/ai/tools/fetch-sitemap.js +116 -0
- package/dist/ai/tools/fetch-sitemap.js.map +1 -0
- package/dist/ai/tools/index.d.ts +1555 -0
- package/dist/ai/tools/index.d.ts.map +1 -0
- package/dist/ai/tools/index.js +119 -0
- package/dist/ai/tools/index.js.map +1 -0
- package/dist/ai/tools/parse-page.d.ts +94 -0
- package/dist/ai/tools/parse-page.d.ts.map +1 -0
- package/dist/ai/tools/parse-page.js +108 -0
- package/dist/ai/tools/parse-page.js.map +1 -0
- package/dist/ai/tools/query-serp.d.ts +113 -0
- package/dist/ai/tools/query-serp.d.ts.map +1 -0
- package/dist/ai/tools/query-serp.js +131 -0
- package/dist/ai/tools/query-serp.js.map +1 -0
- package/dist/ai/tools/sample-template.d.ts +67 -0
- package/dist/ai/tools/sample-template.d.ts.map +1 -0
- package/dist/ai/tools/sample-template.js +75 -0
- package/dist/ai/tools/sample-template.js.map +1 -0
- package/dist/ai/tools/types.d.ts +73 -0
- package/dist/ai/tools/types.d.ts.map +1 -0
- package/dist/ai/tools/types.js +64 -0
- package/dist/ai/tools/types.js.map +1 -0
- package/dist/ai/tools/validate-jsonld.d.ts +62 -0
- package/dist/ai/tools/validate-jsonld.d.ts.map +1 -0
- package/dist/ai/tools/validate-jsonld.js +84 -0
- package/dist/ai/tools/validate-jsonld.js.map +1 -0
- package/dist/auditor.d.ts +4 -0
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +629 -64
- package/dist/auditor.js.map +1 -1
- package/dist/backpressure.d.ts.map +1 -1
- package/dist/backpressure.js +10 -3
- package/dist/backpressure.js.map +1 -1
- package/dist/enrich-findings.d.ts.map +1 -1
- package/dist/enrich-findings.js +15 -1
- package/dist/enrich-findings.js.map +1 -1
- package/dist/formatters/console.d.ts.map +1 -1
- package/dist/formatters/console.js +13 -0
- package/dist/formatters/console.js.map +1 -1
- package/dist/formatters/markdown.d.ts.map +1 -1
- package/dist/formatters/markdown.js +20 -2
- package/dist/formatters/markdown.js.map +1 -1
- package/dist/index.d.ts +12 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -1
- package/dist/rule-references.d.ts.map +1 -1
- package/dist/rule-references.js +5 -0
- package/dist/rule-references.js.map +1 -1
- package/dist/rules/content/heading-structure.d.ts +21 -0
- package/dist/rules/content/heading-structure.d.ts.map +1 -0
- package/dist/rules/content/heading-structure.js +56 -0
- package/dist/rules/content/heading-structure.js.map +1 -0
- package/dist/rules/content/image-alt-text.d.ts +18 -0
- package/dist/rules/content/image-alt-text.d.ts.map +1 -0
- package/dist/rules/content/image-alt-text.js +77 -0
- package/dist/rules/content/image-alt-text.js.map +1 -0
- package/dist/rules/content/title-uniqueness.d.ts +18 -0
- package/dist/rules/content/title-uniqueness.d.ts.map +1 -0
- package/dist/rules/content/title-uniqueness.js +70 -0
- package/dist/rules/content/title-uniqueness.js.map +1 -0
- package/dist/rules/links/host-section-divergence.d.ts +3 -0
- package/dist/rules/links/host-section-divergence.d.ts.map +1 -0
- package/dist/rules/links/host-section-divergence.js +158 -0
- package/dist/rules/links/host-section-divergence.js.map +1 -0
- package/dist/rules/links/link-depth.d.ts +12 -1
- package/dist/rules/links/link-depth.d.ts.map +1 -1
- package/dist/rules/links/link-depth.js +25 -12
- package/dist/rules/links/link-depth.js.map +1 -1
- package/dist/rules/scope.d.ts.map +1 -1
- package/dist/rules/scope.js +5 -0
- package/dist/rules/scope.js.map +1 -1
- package/dist/rules/spam/doorway-pattern.d.ts.map +1 -1
- package/dist/rules/spam/doorway-pattern.js +27 -4
- package/dist/rules/spam/doorway-pattern.js.map +1 -1
- package/dist/rules/spam/publication-velocity.d.ts +1 -1
- package/dist/rules/spam/publication-velocity.d.ts.map +1 -1
- package/dist/rules/spam/publication-velocity.js +9 -4
- package/dist/rules/spam/publication-velocity.js.map +1 -1
- package/dist/rules/spam/template-coverage.js +1 -1
- package/dist/rules/spam/template-coverage.js.map +1 -1
- package/dist/rules/spam/template-diversity.js +1 -1
- package/dist/rules/spam/template-diversity.js.map +1 -1
- package/dist/rules/tech/hreflang-consistency.d.ts.map +1 -1
- package/dist/rules/tech/hreflang-consistency.js +33 -4
- package/dist/rules/tech/hreflang-consistency.js.map +1 -1
- package/dist/rules/tech/og-completeness.d.ts +11 -0
- package/dist/rules/tech/og-completeness.d.ts.map +1 -1
- package/dist/rules/tech/og-completeness.js +22 -23
- package/dist/rules/tech/og-completeness.js.map +1 -1
- package/dist/ruleset-version.d.ts +8 -0
- package/dist/ruleset-version.d.ts.map +1 -0
- package/dist/ruleset-version.js +8 -0
- package/dist/ruleset-version.js.map +1 -0
- package/dist/scrape-strategy.d.ts +42 -0
- package/dist/scrape-strategy.d.ts.map +1 -0
- package/dist/scrape-strategy.js +101 -0
- package/dist/scrape-strategy.js.map +1 -0
- package/dist/site-classifier.d.ts.map +1 -1
- package/dist/site-classifier.js +1 -0
- package/dist/site-classifier.js.map +1 -1
- package/dist/state.d.ts +36 -1
- package/dist/state.d.ts.map +1 -1
- package/dist/state.js +3 -1
- package/dist/state.js.map +1 -1
- package/dist/stratified-sample.d.ts +9 -1
- package/dist/stratified-sample.d.ts.map +1 -1
- package/dist/stratified-sample.js +23 -6
- package/dist/stratified-sample.js.map +1 -1
- package/dist/types.d.ts +135 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/url-normalize.d.ts.map +1 -1
- package/dist/url-normalize.js +13 -1
- package/dist/url-normalize.js.map +1 -1
- package/package.json +90 -90
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { cachedFetch } from "../../cache.js";
|
|
3
|
+
import { validateTargetHost } from "../../ssrf-guard.js";
|
|
4
|
+
import { probeCacheKey, readProbeCache, writeProbeCache } from "../probes/cache.js";
|
|
5
|
+
import { defineTool } from "./types.js";
|
|
6
|
+
const ENGINES = ["anthropic", "perplexity", "gemini"];
|
|
7
|
+
const inputSchema = z.object({
|
|
8
|
+
engine: z.enum(ENGINES),
|
|
9
|
+
query: z.string().min(1).max(500),
|
|
10
|
+
candidateUrl: z
|
|
11
|
+
.string()
|
|
12
|
+
.url()
|
|
13
|
+
.optional()
|
|
14
|
+
.describe("When provided, the tool reports whether the engine cited this URL in its answer. Used for AEO citability probes — does ChatGPT/Claude/Perplexity cite *your* page when asked the question your page is targeting?"),
|
|
15
|
+
apiKey: z
|
|
16
|
+
.string()
|
|
17
|
+
.min(8)
|
|
18
|
+
.optional()
|
|
19
|
+
.describe("Engine-specific API key. Falls back to ANTHROPIC_API_KEY / PERPLEXITY_API_KEY / GOOGLE_GENERATIVE_AI_API_KEY env vars."),
|
|
20
|
+
cacheDir: z.string().optional(),
|
|
21
|
+
});
|
|
22
|
+
const outputSchema = z.object({
|
|
23
|
+
engine: z.enum(ENGINES),
|
|
24
|
+
query: z.string(),
|
|
25
|
+
answer: z.string(),
|
|
26
|
+
citedUrls: z.array(z.string()),
|
|
27
|
+
candidateCited: z.boolean().nullable(),
|
|
28
|
+
fromCache: z.boolean(),
|
|
29
|
+
/**
|
|
30
|
+
* Approximate cost of this call, in USD. The orchestrator's BudgetTracker
|
|
31
|
+
* sums these into the session-level USD cap. Cache hits report 0.
|
|
32
|
+
*/
|
|
33
|
+
apiCostUsd: z.number().nonnegative(),
|
|
34
|
+
});
|
|
35
|
+
const DEFAULT_TTL_MS = 24 * 60 * 60 * 1000;
|
|
36
|
+
/** Per-call estimated cost, USD. Conservative midpoints. */
|
|
37
|
+
const COST_BY_ENGINE_USD = {
|
|
38
|
+
anthropic: 0.003,
|
|
39
|
+
perplexity: 0.001,
|
|
40
|
+
gemini: 0.001,
|
|
41
|
+
};
|
|
42
|
+
const URL_RE = /https?:\/\/[^\s<>"\)\]]+/g;
|
|
43
|
+
function extractUrls(text) {
|
|
44
|
+
const matches = text.match(URL_RE) ?? [];
|
|
45
|
+
return Array.from(new Set(matches));
|
|
46
|
+
}
|
|
47
|
+
function hostMatches(citedUrl, candidateUrl) {
|
|
48
|
+
try {
|
|
49
|
+
return new URL(citedUrl).hostname === new URL(candidateUrl).hostname;
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
async function validateHostHop(hopUrl, label) {
|
|
56
|
+
let host;
|
|
57
|
+
try {
|
|
58
|
+
host = new URL(hopUrl).hostname;
|
|
59
|
+
}
|
|
60
|
+
catch {
|
|
61
|
+
throw new Error(`${label}: invalid URL ${hopUrl}`);
|
|
62
|
+
}
|
|
63
|
+
await validateTargetHost(host);
|
|
64
|
+
}
|
|
65
|
+
async function askAnthropic(query, apiKey, signal) {
|
|
66
|
+
await validateHostHop("https://api.anthropic.com", "ask_ai_engine");
|
|
67
|
+
const res = await cachedFetch("https://api.anthropic.com/v1/messages", {
|
|
68
|
+
timeoutMs: 30_000,
|
|
69
|
+
cache: null,
|
|
70
|
+
signal,
|
|
71
|
+
fetcher: async (url, init) => fetch(url, {
|
|
72
|
+
...init,
|
|
73
|
+
method: "POST",
|
|
74
|
+
headers: {
|
|
75
|
+
"x-api-key": apiKey,
|
|
76
|
+
"anthropic-version": "2023-06-01",
|
|
77
|
+
"content-type": "application/json",
|
|
78
|
+
},
|
|
79
|
+
body: JSON.stringify({
|
|
80
|
+
model: "claude-sonnet-4-6",
|
|
81
|
+
max_tokens: 600,
|
|
82
|
+
messages: [
|
|
83
|
+
{
|
|
84
|
+
role: "user",
|
|
85
|
+
content: `Answer this in 2-3 sentences. If you cite specific webpages, include their URLs verbatim in your answer.\n\nQuestion: ${query}`,
|
|
86
|
+
},
|
|
87
|
+
],
|
|
88
|
+
}),
|
|
89
|
+
}),
|
|
90
|
+
});
|
|
91
|
+
if (res.status >= 400) {
|
|
92
|
+
throw new Error(`ask_ai_engine[anthropic]: HTTP ${res.status}`);
|
|
93
|
+
}
|
|
94
|
+
const payload = JSON.parse(res.body);
|
|
95
|
+
if (payload.error) {
|
|
96
|
+
throw new Error(`ask_ai_engine[anthropic]: ${payload.error.message ?? "unknown error"}`);
|
|
97
|
+
}
|
|
98
|
+
const answer = (payload.content ?? [])
|
|
99
|
+
.filter((c) => c.type === "text" && typeof c.text === "string")
|
|
100
|
+
.map((c) => c.text)
|
|
101
|
+
.join("\n");
|
|
102
|
+
return { answer, citedUrls: extractUrls(answer) };
|
|
103
|
+
}
|
|
104
|
+
async function askPerplexity(query, apiKey, signal) {
|
|
105
|
+
await validateHostHop("https://api.perplexity.ai", "ask_ai_engine");
|
|
106
|
+
const res = await cachedFetch("https://api.perplexity.ai/chat/completions", {
|
|
107
|
+
timeoutMs: 30_000,
|
|
108
|
+
cache: null,
|
|
109
|
+
signal,
|
|
110
|
+
fetcher: async (url, init) => fetch(url, {
|
|
111
|
+
...init,
|
|
112
|
+
method: "POST",
|
|
113
|
+
headers: {
|
|
114
|
+
authorization: `Bearer ${apiKey}`,
|
|
115
|
+
"content-type": "application/json",
|
|
116
|
+
},
|
|
117
|
+
body: JSON.stringify({
|
|
118
|
+
model: "sonar",
|
|
119
|
+
messages: [{ role: "user", content: query }],
|
|
120
|
+
max_tokens: 600,
|
|
121
|
+
}),
|
|
122
|
+
}),
|
|
123
|
+
});
|
|
124
|
+
if (res.status >= 400) {
|
|
125
|
+
throw new Error(`ask_ai_engine[perplexity]: HTTP ${res.status}`);
|
|
126
|
+
}
|
|
127
|
+
const payload = JSON.parse(res.body);
|
|
128
|
+
if (payload.error) {
|
|
129
|
+
throw new Error(`ask_ai_engine[perplexity]: ${payload.error.message ?? "unknown error"}`);
|
|
130
|
+
}
|
|
131
|
+
const answer = payload.choices?.[0]?.message?.content ?? "";
|
|
132
|
+
// Perplexity surfaces citations natively — prefer them, fall back to inline URL extraction.
|
|
133
|
+
const citedUrls = payload.citations && payload.citations.length > 0
|
|
134
|
+
? Array.from(new Set(payload.citations))
|
|
135
|
+
: extractUrls(answer);
|
|
136
|
+
return { answer, citedUrls };
|
|
137
|
+
}
|
|
138
|
+
async function askGemini(query, apiKey, signal) {
|
|
139
|
+
const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=${encodeURIComponent(apiKey)}`;
|
|
140
|
+
await validateHostHop(url, "ask_ai_engine");
|
|
141
|
+
const res = await cachedFetch(url, {
|
|
142
|
+
timeoutMs: 30_000,
|
|
143
|
+
cache: null,
|
|
144
|
+
signal,
|
|
145
|
+
fetcher: async (u, init) => fetch(u, {
|
|
146
|
+
...init,
|
|
147
|
+
method: "POST",
|
|
148
|
+
headers: { "content-type": "application/json" },
|
|
149
|
+
body: JSON.stringify({
|
|
150
|
+
contents: [{ parts: [{ text: query }] }],
|
|
151
|
+
tools: [{ googleSearchRetrieval: {} }],
|
|
152
|
+
}),
|
|
153
|
+
}),
|
|
154
|
+
});
|
|
155
|
+
if (res.status >= 400) {
|
|
156
|
+
throw new Error(`ask_ai_engine[gemini]: HTTP ${res.status}`);
|
|
157
|
+
}
|
|
158
|
+
const payload = JSON.parse(res.body);
|
|
159
|
+
if (payload.error) {
|
|
160
|
+
throw new Error(`ask_ai_engine[gemini]: ${payload.error.message ?? "unknown error"}`);
|
|
161
|
+
}
|
|
162
|
+
const candidate = payload.candidates?.[0];
|
|
163
|
+
const answer = (candidate?.content?.parts ?? []).map((p) => p.text ?? "").join("");
|
|
164
|
+
const citedUrls = new Set();
|
|
165
|
+
for (const c of candidate?.citationMetadata?.citationSources ?? []) {
|
|
166
|
+
if (c.uri)
|
|
167
|
+
citedUrls.add(c.uri);
|
|
168
|
+
}
|
|
169
|
+
for (const chunk of candidate?.groundingMetadata?.groundingChunks ?? []) {
|
|
170
|
+
if (chunk.web?.uri)
|
|
171
|
+
citedUrls.add(chunk.web.uri);
|
|
172
|
+
}
|
|
173
|
+
if (citedUrls.size === 0) {
|
|
174
|
+
for (const u of extractUrls(answer))
|
|
175
|
+
citedUrls.add(u);
|
|
176
|
+
}
|
|
177
|
+
return { answer, citedUrls: Array.from(citedUrls) };
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Probe an AI answer engine with a query. The whole point of this tool —
|
|
181
|
+
* the AEO citability check — is to ask the same engines users will ask
|
|
182
|
+
* and see whether they cite the page being audited. No proxies, no
|
|
183
|
+
* heuristics. The cited-URLs list comes back, and if `candidateUrl` is
|
|
184
|
+
* provided we report whether it (by hostname) appears.
|
|
185
|
+
*
|
|
186
|
+
* Provider key resolution: explicit `apiKey` arg → engine-specific env
|
|
187
|
+
* var. Cached for 24h on `engine + query` — same query against the same
|
|
188
|
+
* engine reliably hits the same answer for the cache window.
|
|
189
|
+
*/
|
|
190
|
+
export const askAiEngineTool = defineTool({
|
|
191
|
+
name: "ask_ai_engine",
|
|
192
|
+
description: "Ask an AI answer engine (Anthropic, Perplexity, or Gemini) a query and report what it answered + which URLs it cited. Pass `candidateUrl` to check whether the engine cited that specific page (by hostname). This is the literal AEO citability test — no heuristic, no proxy, just measurement. Cached 24h per (engine, query). Costs vary: Anthropic ~$0.003/call, Perplexity ~$0.001, Gemini ~$0.001.",
|
|
193
|
+
inputSchema,
|
|
194
|
+
outputSchema,
|
|
195
|
+
async execute({ engine, query, candidateUrl, apiKey, cacheDir }, ctx) {
|
|
196
|
+
const envKey = engine === "anthropic" ? process.env.ANTHROPIC_API_KEY :
|
|
197
|
+
engine === "perplexity" ? process.env.PERPLEXITY_API_KEY :
|
|
198
|
+
process.env.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
199
|
+
const key = apiKey ?? envKey;
|
|
200
|
+
if (!key) {
|
|
201
|
+
const envVar = engine === "anthropic" ? "ANTHROPIC_API_KEY" :
|
|
202
|
+
engine === "perplexity" ? "PERPLEXITY_API_KEY" :
|
|
203
|
+
"GOOGLE_GENERATIVE_AI_API_KEY";
|
|
204
|
+
throw new Error(`ask_ai_engine: no API key for ${engine} (pass apiKey or set ${envVar})`);
|
|
205
|
+
}
|
|
206
|
+
if (cacheDir) {
|
|
207
|
+
const cacheKey = probeCacheKey("ask_ai_engine", engine, query);
|
|
208
|
+
const cached = await readProbeCache(cacheDir, cacheKey, DEFAULT_TTL_MS);
|
|
209
|
+
if (cached) {
|
|
210
|
+
// Cache hits don't bill — re-evaluate candidateCited against the
|
|
211
|
+
// cached citations so a different `candidateUrl` still gets a
|
|
212
|
+
// correct answer without a fresh fetch.
|
|
213
|
+
const candidateCited = candidateUrl
|
|
214
|
+
? cached.citedUrls.some((u) => hostMatches(u, candidateUrl))
|
|
215
|
+
: null;
|
|
216
|
+
return { ...cached, candidateCited, fromCache: true, apiCostUsd: 0 };
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
let probe;
|
|
220
|
+
if (engine === "anthropic") {
|
|
221
|
+
probe = await askAnthropic(query, key, ctx?.signal);
|
|
222
|
+
}
|
|
223
|
+
else if (engine === "perplexity") {
|
|
224
|
+
probe = await askPerplexity(query, key, ctx?.signal);
|
|
225
|
+
}
|
|
226
|
+
else {
|
|
227
|
+
probe = await askGemini(query, key, ctx?.signal);
|
|
228
|
+
}
|
|
229
|
+
const candidateCited = candidateUrl
|
|
230
|
+
? probe.citedUrls.some((u) => hostMatches(u, candidateUrl))
|
|
231
|
+
: null;
|
|
232
|
+
const result = {
|
|
233
|
+
engine,
|
|
234
|
+
query,
|
|
235
|
+
answer: probe.answer,
|
|
236
|
+
citedUrls: probe.citedUrls,
|
|
237
|
+
candidateCited,
|
|
238
|
+
fromCache: false,
|
|
239
|
+
apiCostUsd: COST_BY_ENGINE_USD[engine],
|
|
240
|
+
};
|
|
241
|
+
if (cacheDir) {
|
|
242
|
+
try {
|
|
243
|
+
const cacheKey = probeCacheKey("ask_ai_engine", engine, query);
|
|
244
|
+
await writeProbeCache(cacheDir, cacheKey, result);
|
|
245
|
+
}
|
|
246
|
+
catch {
|
|
247
|
+
// Cache write failures are non-fatal.
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
return result;
|
|
251
|
+
},
|
|
252
|
+
});
|
|
253
|
+
//# sourceMappingURL=ask-ai-engine.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ask-ai-engine.js","sourceRoot":"","sources":["../../../src/ai/tools/ask-ai-engine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,EAAE,aAAa,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACpF,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,OAAO,GAAG,CAAC,WAAW,EAAE,YAAY,EAAE,QAAQ,CAAU,CAAC;AAE/D,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAC3B,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;IACvB,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC;IACjC,YAAY,EAAE,CAAC;SACZ,MAAM,EAAE;SACR,GAAG,EAAE;SACL,QAAQ,EAAE;SACV,QAAQ,CACP,mNAAmN,CACpN;IACH,MAAM,EAAE,CAAC;SACN,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,QAAQ,EAAE;SACV,QAAQ,CACP,wHAAwH,CACzH;IACH,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;CAChC,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC;IAC5B,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;IACvB,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;IACjB,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE;IAClB,SAAS,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;IAC9B,cAAc,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;IACtC,SAAS,EAAE,CAAC,CAAC,OAAO,EAAE;IACtB;;;OAGG;IACH,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,WAAW,EAAE;CACrC,CAAC,CAAC;AAEH,MAAM,cAAc,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;AAE3C,4DAA4D;AAC5D,MAAM,kBAAkB,GAA2C;IACjE,SAAS,EAAE,KAAK;IAChB,UAAU,EAAE,KAAK;IACjB,MAAM,EAAE,KAAK;CACd,CAAC;AAIF,MAAM,MAAM,GAAG,2BAA2B,CAAC;AAE3C,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;IACzC,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;AACtC,CAAC;AAED,SAAS,WAAW,CAAC,QAAgB,EAAE,YAAoB;IACzD,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,QAAQ,KAAK,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC,QAAQ,CAAC;IACvE,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,KAAK,UAAU,eAAe,CAAC,MAAc,EAAE,KAAa;IAC1D,IAAI,IAAY,CAAC;IACjB,IAAI,CAAC;QACH,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC;IAClC,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,GAAG,KAAK,iBAAiB,MAAM,EAAE,CAAC,CAAC;IACrD,CAAC;IACD,MAAM,kBAAkB,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAOD,KAAK,UAAU,YAAY,CAAC,KAAa,EAAE,MAAc,EAAE,MAAoB;IAC7E,MAAM,eAAe,CAAC,2BAA2B,EAAE,eAAe,CAAC,CAAC;IACpE,MAAM,GAAG,GAAG,MAAM,WAAW,CAAC,uCAAuC,EAAE;QACrE,SAAS,EAAE,MAAM;QACjB,KAAK,EAAE,IAAI;QACX,MAAM;QACN,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE,CAC3B,KAAK,CAAC,GAAG,EAAE;YACT,GAAG,IAAI;YACP,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,WAAW,EAAE,MAAM;gBACnB,mBAAmB,EAAE,YAAY;gBACjC,cAAc,EAAE,kBAAkB;aACnC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK,EAAE,mBAAmB;gBAC1B,UAAU,EAAE,GAAG;gBACf,QAAQ,EAAE;oBACR;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE,yHAAyH,KAAK,EAAE;qBAC1I;iBACF;aACF,CAAC;SACH,CAAC;KACL,CAAC,CAAC;IACH,IAAI,GAAG,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CAAC,kCAAkC,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC;IAClE,CAAC;IACD,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAsB,CAAC;IAC1D,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CAAC,6BAA6B,OAAO,CAAC,KAAK,CAAC,OAAO,IAAI,eAAe,EAAE,CAAC,CAAC;IAC3F,CAAC;IACD,MAAM,MAAM,GAAG,CAAC,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC;SACnC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,IAAI,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC;SAC9D,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAK,CAAC;SACnB,IAAI,CAAC,IAAI,CAAC,CAAC;IACd,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,WAAW,CAAC,MAAM,CAAC,EAAE,CAAC;AACpD,CAAC;AAQD,KAAK,UAAU,aAAa,CAAC,KAAa,EAAE,MAAc,EAAE,MAAoB;IAC9E,MAAM,eAAe,CAAC,2BAA2B,EAAE,eAAe,CAAC,CAAC;IACpE,MAAM,GAAG,GAAG,MAAM,WAAW,CAAC,4CAA4C,EAAE;QAC1E,SAAS,EAAE,MAAM;QACjB,KAAK,EAAE,IAAI;QACX,MAAM;QACN,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE,CAC3B,KAAK,CAAC,GAAG,EAAE;YACT,GAAG,IAAI;YACP,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,aAAa,EAAE,UAAU,MAAM,EAAE;gBACjC,cAAc,EAAE,kBAAkB;aACnC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK,EAAE,OAAO;gBACd,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;gBAC5C,UAAU,EAAE,GAAG;aAChB,CAAC;SACH,CAAC;KACL,CAAC,CAAC;IACH,IAAI,GAAG,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CAAC,mCAAmC,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC;IACnE,CAAC;IACD,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAuB,CAAC;IAC3D,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CAAC,8BAA8B,OAAO,CAAC,KAAK,CAAC,OAAO,IAAI,eAAe,EAAE,CAAC,CAAC;IAC5F,CAAC;IACD,MAAM,MAAM,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,IAAI,EAAE,CAAC;IAC5D,4FAA4F;IAC5F,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC;QACjE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QACxC,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;IACxB,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;AAC/B,CAAC;AAoBD,KAAK,UAAU,SAAS,CAAC,KAAa,EAAE,MAAc,EAAE,MAAoB;IAC1E,MAAM,GAAG,GAAG,gGAAgG,kBAAkB,CAAC,MAAM,CAAC,EAAE,CAAC;IACzI,MAAM,eAAe,CAAC,GAAG,EAAE,eAAe,CAAC,CAAC;IAC5C,MAAM,GAAG,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE;QACjC,SAAS,EAAE,MAAM;QACjB,KAAK,EAAE,IAAI;QACX,MAAM;QACN,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,EAAE,CACzB,KAAK,CAAC,CAAC,EAAE;YACP,GAAG,IAAI;YACP,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;YAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,QAAQ,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;gBACxC,KAAK,EAAE,CAAC,EAAE,qBAAqB,EAAE,EAAE,EAAE,CAAC;aACvC,CAAC;SACH,CAAC;KACL,CAAC,CAAC;IACH,IAAI,GAAG,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CAAC,+BAA+B,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC;IAC/D,CAAC;IACD,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAmB,CAAC;IACvD,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CAAC,0BAA0B,OAAO,CAAC,KAAK,CAAC,OAAO,IAAI,eAAe,EAAE,CAAC,CAAC;IACxF,CAAC;IACD,MAAM,SAAS,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,CAAC,SAAS,EAAE,OAAO,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEnF,MAAM,SAAS,GAAG,IAAI,GAAG,EAAU,CAAC;IACpC,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,gBAAgB,EAAE,eAAe,IAAI,EAAE,EAAE,CAAC;QACnE,IAAI,CAAC,CAAC,GAAG;YAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAClC,CAAC;IACD,KAAK,MAAM,KAAK,IAAI,SAAS,EAAE,iBAAiB,EAAE,eAAe,IAAI,EAAE,EAAE,CAAC;QACxE,IAAI,KAAK,CAAC,GAAG,EAAE,GAAG;YAAE,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACnD,CAAC;IACD,IAAI,SAAS,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACzB,KAAK,MAAM,CAAC,IAAI,WAAW,CAAC,MAAM,CAAC;YAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACxD,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;AACtD,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,CAAC,MAAM,eAAe,GAAG,UAAU,CAAC;IACxC,IAAI,EAAE,eAAe;IACrB,WAAW,EACT,2YAA2Y;IAC7Y,WAAW;IACX,YAAY;IACZ,KAAK,CAAC,OAAO,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,GAAG;QAClE,MAAM,MAAM,GACV,MAAM,KAAK,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC;YACxD,MAAM,KAAK,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;gBAC1D,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC;QAC3C,MAAM,GAAG,GAAG,MAAM,IAAI,MAAM,CAAC;QAC7B,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,MAAM,MAAM,GACV,MAAM,KAAK,WAAW,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC;gBAC9C,MAAM,KAAK,YAAY,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC;oBAChD,8BAA8B,CAAC;YACjC,MAAM,IAAI,KAAK,CAAC,iCAAiC,MAAM,wBAAwB,MAAM,GAAG,CAAC,CAAC;QAC5F,CAAC;QAED,IAAI,QAAQ,EAAE,CAAC;YACb,MAAM,QAAQ,GAAG,aAAa,CAAC,eAAe,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC;YAC/D,MAAM,MAAM,GAAG,MAAM,cAAc,CAAY,QAAQ,EAAE,QAAQ,EAAE,cAAc,CAAC,CAAC;YACnF,IAAI,MAAM,EAAE,CAAC;gBACX,iEAAiE;gBACjE,8DAA8D;gBAC9D,wCAAwC;gBACxC,MAAM,cAAc,GAAG,YAAY;oBACjC,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC;oBAC5D,CAAC,CAAC,IAAI,CAAC;gBACT,OAAO,EAAE,GAAG,MAAM,EAAE,cAAc,EAAE,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;YACvE,CAAC;QACH,CAAC;QAED,IAAI,KAA8C,CAAC;QACnD,IAAI,MAAM,KAAK,WAAW,EAAE,CAAC;YAC3B,KAAK,GAAG,MAAM,YAAY,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QACtD,CAAC;aAAM,IAAI,MAAM,KAAK,YAAY,EAAE,CAAC;YACnC,KAAK,GAAG,MAAM,aAAa,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QACvD,CAAC;aAAM,CAAC;YACN,KAAK,GAAG,MAAM,SAAS,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QACnD,CAAC;QAED,MAAM,cAAc,GAAG,YAAY;YACjC,CAAC,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC;YAC3D,CAAC,CAAC,IAAI,CAAC;QAET,MAAM,MAAM,GAAc;YACxB,MAAM;YACN,KAAK;YACL,MAAM,EAAE,KAAK,CAAC,MAAM;YACpB,SAAS,EAAE,KAAK,CAAC,SAAS;YAC1B,cAAc;YACd,SAAS,EAAE,KAAK;YAChB,UAAU,EAAE,kBAAkB,CAAC,MAAM,CAAC;SACvC,CAAC;QAEF,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,aAAa,CAAC,eAAe,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC;gBAC/D,MAAM,eAAe,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;YACpD,CAAC;YAAC,MAAM,CAAC;gBACP,sCAAsC;YACxC,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF,CAAC,CAAC"}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
/**
|
|
3
|
+
* Domain-level AEO check: does the site's robots.txt block AI crawlers
|
|
4
|
+
* (GPTBot, ClaudeBot, PerplexityBot, Google-Extended, etc.)? Wraps
|
|
5
|
+
* `crawlerAccessRule` with a built-in fetcher.
|
|
6
|
+
*
|
|
7
|
+
* Calls this once per domain near the start of an audit alongside
|
|
8
|
+
* `check_domain_llms_txt`. Sites that disallow GPTBot in robots.txt are
|
|
9
|
+
* uncitable in ChatGPT regardless of how good their content is.
|
|
10
|
+
*/
|
|
11
|
+
export declare const checkDomainCrawlerAccessTool: {
|
|
12
|
+
name: string;
|
|
13
|
+
description: string;
|
|
14
|
+
inputSchema: z.ZodType<{
|
|
15
|
+
origin: string;
|
|
16
|
+
timeoutMs?: number | undefined;
|
|
17
|
+
}, unknown, z.core.$ZodTypeInternals<{
|
|
18
|
+
origin: string;
|
|
19
|
+
timeoutMs?: number | undefined;
|
|
20
|
+
}, unknown>>;
|
|
21
|
+
outputSchema: z.ZodType<{
|
|
22
|
+
origin: string;
|
|
23
|
+
robotsTxtUrl: string;
|
|
24
|
+
robotsTxtPresent: boolean;
|
|
25
|
+
findings: {
|
|
26
|
+
ruleId: "aeo/crawler-access";
|
|
27
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
28
|
+
message: string;
|
|
29
|
+
fix?: string | undefined;
|
|
30
|
+
}[];
|
|
31
|
+
}, unknown, z.core.$ZodTypeInternals<{
|
|
32
|
+
origin: string;
|
|
33
|
+
robotsTxtUrl: string;
|
|
34
|
+
robotsTxtPresent: boolean;
|
|
35
|
+
findings: {
|
|
36
|
+
ruleId: "aeo/crawler-access";
|
|
37
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
38
|
+
message: string;
|
|
39
|
+
fix?: string | undefined;
|
|
40
|
+
}[];
|
|
41
|
+
}, unknown>>;
|
|
42
|
+
toAiTool(): import("ai").Tool<{
|
|
43
|
+
origin: string;
|
|
44
|
+
timeoutMs?: number | undefined;
|
|
45
|
+
}, import("./types.js").ToolResult<{
|
|
46
|
+
origin: string;
|
|
47
|
+
robotsTxtUrl: string;
|
|
48
|
+
robotsTxtPresent: boolean;
|
|
49
|
+
findings: {
|
|
50
|
+
ruleId: "aeo/crawler-access";
|
|
51
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
52
|
+
message: string;
|
|
53
|
+
fix?: string | undefined;
|
|
54
|
+
}[];
|
|
55
|
+
}>>;
|
|
56
|
+
run(input: {
|
|
57
|
+
origin: string;
|
|
58
|
+
timeoutMs?: number | undefined;
|
|
59
|
+
}, ctx?: import("./types.js").ToolExecuteContext): Promise<import("./types.js").ToolResult<{
|
|
60
|
+
origin: string;
|
|
61
|
+
robotsTxtUrl: string;
|
|
62
|
+
robotsTxtPresent: boolean;
|
|
63
|
+
findings: {
|
|
64
|
+
ruleId: "aeo/crawler-access";
|
|
65
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
66
|
+
message: string;
|
|
67
|
+
fix?: string | undefined;
|
|
68
|
+
}[];
|
|
69
|
+
}>>;
|
|
70
|
+
};
|
|
71
|
+
//# sourceMappingURL=check-domain-crawler-access.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"check-domain-crawler-access.d.ts","sourceRoot":"","sources":["../../../src/ai/tools/check-domain-crawler-access.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AA4BxB;;;;;;;;GAQG;AACH,eAAO,MAAM,4BAA4B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA4CvC,CAAC"}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { crawlerAccessRule } from "../../rules/aeo/crawler-access.js";
|
|
3
|
+
import { cachedFetch } from "../../cache.js";
|
|
4
|
+
import { validateTargetHost } from "../../ssrf-guard.js";
|
|
5
|
+
import { defineTool } from "./types.js";
|
|
6
|
+
const inputSchema = z.object({
|
|
7
|
+
origin: z
|
|
8
|
+
.string()
|
|
9
|
+
.url()
|
|
10
|
+
.describe("Site origin (e.g. https://example.com). The tool fetches /robots.txt."),
|
|
11
|
+
timeoutMs: z.number().int().positive().max(15_000).optional(),
|
|
12
|
+
});
|
|
13
|
+
const outputSchema = z.object({
|
|
14
|
+
origin: z.string(),
|
|
15
|
+
robotsTxtUrl: z.string(),
|
|
16
|
+
robotsTxtPresent: z.boolean(),
|
|
17
|
+
findings: z.array(z.object({
|
|
18
|
+
ruleId: z.literal("aeo/crawler-access"),
|
|
19
|
+
severity: z.enum(["info", "warning", "error", "critical"]),
|
|
20
|
+
message: z.string(),
|
|
21
|
+
fix: z.string().optional(),
|
|
22
|
+
})),
|
|
23
|
+
});
|
|
24
|
+
/**
|
|
25
|
+
* Domain-level AEO check: does the site's robots.txt block AI crawlers
|
|
26
|
+
* (GPTBot, ClaudeBot, PerplexityBot, Google-Extended, etc.)? Wraps
|
|
27
|
+
* `crawlerAccessRule` with a built-in fetcher.
|
|
28
|
+
*
|
|
29
|
+
* Calls this once per domain near the start of an audit alongside
|
|
30
|
+
* `check_domain_llms_txt`. Sites that disallow GPTBot in robots.txt are
|
|
31
|
+
* uncitable in ChatGPT regardless of how good their content is.
|
|
32
|
+
*/
|
|
33
|
+
export const checkDomainCrawlerAccessTool = defineTool({
|
|
34
|
+
name: "check_domain_crawler_access",
|
|
35
|
+
description: "Check whether the site's /robots.txt blocks AI crawlers (GPTBot, ClaudeBot, PerplexityBot, Google-Extended, etc.). Call once per domain. Sites that disallow these user agents are uncitable in the corresponding AI engine, regardless of content quality. Returns `robotsTxtPresent: false` when the file is absent.",
|
|
36
|
+
inputSchema,
|
|
37
|
+
outputSchema,
|
|
38
|
+
async execute({ origin, timeoutMs = 10_000 }, ctx) {
|
|
39
|
+
const robotsUrl = `${new URL(origin).origin}/robots.txt`;
|
|
40
|
+
const validateHop = async (hopUrl) => {
|
|
41
|
+
let host;
|
|
42
|
+
try {
|
|
43
|
+
host = new URL(hopUrl).hostname;
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
throw new Error(`check_domain_crawler_access: invalid URL ${hopUrl}`);
|
|
47
|
+
}
|
|
48
|
+
await validateTargetHost(host);
|
|
49
|
+
};
|
|
50
|
+
let robotsTxtContent = "";
|
|
51
|
+
let robotsTxtPresent = false;
|
|
52
|
+
try {
|
|
53
|
+
const res = await cachedFetch(robotsUrl, { timeoutMs, cache: null, validateHop, signal: ctx?.signal });
|
|
54
|
+
if (res.status >= 200 && res.status < 300) {
|
|
55
|
+
robotsTxtContent = res.body;
|
|
56
|
+
robotsTxtPresent = true;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
catch {
|
|
60
|
+
// robots.txt absent or unreachable; rule will return [] for no content.
|
|
61
|
+
}
|
|
62
|
+
const findings = crawlerAccessRule(robotsTxtContent);
|
|
63
|
+
return {
|
|
64
|
+
origin: new URL(origin).origin,
|
|
65
|
+
robotsTxtUrl: robotsUrl,
|
|
66
|
+
robotsTxtPresent,
|
|
67
|
+
findings: findings.map((f) => ({
|
|
68
|
+
ruleId: "aeo/crawler-access",
|
|
69
|
+
severity: f.severity,
|
|
70
|
+
message: f.message,
|
|
71
|
+
fix: f.fix,
|
|
72
|
+
})),
|
|
73
|
+
};
|
|
74
|
+
},
|
|
75
|
+
});
|
|
76
|
+
//# sourceMappingURL=check-domain-crawler-access.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"check-domain-crawler-access.js","sourceRoot":"","sources":["../../../src/ai/tools/check-domain-crawler-access.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,iBAAiB,EAAE,MAAM,mCAAmC,CAAC;AACtE,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAC3B,MAAM,EAAE,CAAC;SACN,MAAM,EAAE;SACR,GAAG,EAAE;SACL,QAAQ,CAAC,uEAAuE,CAAC;IACpF,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE;CAC9D,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC;IAC5B,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE;IAClB,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE;IACxB,gBAAgB,EAAE,CAAC,CAAC,OAAO,EAAE;IAC7B,QAAQ,EAAE,CAAC,CAAC,KAAK,CACf,CAAC,CAAC,MAAM,CAAC;QACP,MAAM,EAAE,CAAC,CAAC,OAAO,CAAC,oBAAoB,CAAC;QACvC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC;QAC1D,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE;QACnB,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;KAC3B,CAAC,CACH;CACF,CAAC,CAAC;AAEH;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,4BAA4B,GAAG,UAAU,CAAC;IACrD,IAAI,EAAE,6BAA6B;IACnC,WAAW,EACT,wTAAwT;IAC1T,WAAW;IACX,YAAY;IACZ,KAAK,CAAC,OAAO,CAAC,EAAE,MAAM,EAAE,SAAS,GAAG,MAAM,EAAE,EAAE,GAAG;QAC/C,MAAM,SAAS,GAAG,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,MAAM,aAAa,CAAC;QACzD,MAAM,WAAW,GAAG,KAAK,EAAE,MAAc,EAAiB,EAAE;YAC1D,IAAI,IAAY,CAAC;YACjB,IAAI,CAAC;gBACH,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC;YAClC,CAAC;YAAC,MAAM,CAAC;gBACP,MAAM,IAAI,KAAK,CAAC,4CAA4C,MAAM,EAAE,CAAC,CAAC;YACxE,CAAC;YACD,MAAM,kBAAkB,CAAC,IAAI,CAAC,CAAC;QACjC,CAAC,CAAC;QAEF,IAAI,gBAAgB,GAAG,EAAE,CAAC;QAC1B,IAAI,gBAAgB,GAAG,KAAK,CAAC;QAC7B,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;YACvG,IAAI,GAAG,CAAC,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;gBAC1C,gBAAgB,GAAG,GAAG,CAAC,IAAI,CAAC;gBAC5B,gBAAgB,GAAG,IAAI,CAAC;YAC1B,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,wEAAwE;QAC1E,CAAC;QAED,MAAM,QAAQ,GAAG,iBAAiB,CAAC,gBAAgB,CAAC,CAAC;QAErD,OAAO;YACL,MAAM,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,MAAM;YAC9B,YAAY,EAAE,SAAS;YACvB,gBAAgB;YAChB,QAAQ,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC7B,MAAM,EAAE,oBAA6B;gBACrC,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,GAAG,EAAE,CAAC,CAAC,GAAG;aACX,CAAC,CAAC;SACJ,CAAC;IACJ,CAAC;CACF,CAAC,CAAC"}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
/**
|
|
3
|
+
* Domain-level AEO check: does the site publish a valid /llms.txt? Wraps
|
|
4
|
+
* `llmsTxtRule` with a built-in fetcher that goes through the SSRF guard.
|
|
5
|
+
*
|
|
6
|
+
* Unlike the per-page rules, this is called once per domain — the
|
|
7
|
+
* orchestrator typically calls this near the top of the audit alongside
|
|
8
|
+
* `check_domain_crawler_access`.
|
|
9
|
+
*/
|
|
10
|
+
export declare const checkDomainLlmsTxtTool: {
|
|
11
|
+
name: string;
|
|
12
|
+
description: string;
|
|
13
|
+
inputSchema: z.ZodType<{
|
|
14
|
+
origin: string;
|
|
15
|
+
timeoutMs?: number | undefined;
|
|
16
|
+
}, unknown, z.core.$ZodTypeInternals<{
|
|
17
|
+
origin: string;
|
|
18
|
+
timeoutMs?: number | undefined;
|
|
19
|
+
}, unknown>>;
|
|
20
|
+
outputSchema: z.ZodType<{
|
|
21
|
+
origin: string;
|
|
22
|
+
llmsTxtUrl: string;
|
|
23
|
+
found: boolean;
|
|
24
|
+
findings: {
|
|
25
|
+
ruleId: "aeo/llms-txt";
|
|
26
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
27
|
+
message: string;
|
|
28
|
+
fix?: string | undefined;
|
|
29
|
+
}[];
|
|
30
|
+
}, unknown, z.core.$ZodTypeInternals<{
|
|
31
|
+
origin: string;
|
|
32
|
+
llmsTxtUrl: string;
|
|
33
|
+
found: boolean;
|
|
34
|
+
findings: {
|
|
35
|
+
ruleId: "aeo/llms-txt";
|
|
36
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
37
|
+
message: string;
|
|
38
|
+
fix?: string | undefined;
|
|
39
|
+
}[];
|
|
40
|
+
}, unknown>>;
|
|
41
|
+
toAiTool(): import("ai").Tool<{
|
|
42
|
+
origin: string;
|
|
43
|
+
timeoutMs?: number | undefined;
|
|
44
|
+
}, import("./types.js").ToolResult<{
|
|
45
|
+
origin: string;
|
|
46
|
+
llmsTxtUrl: string;
|
|
47
|
+
found: boolean;
|
|
48
|
+
findings: {
|
|
49
|
+
ruleId: "aeo/llms-txt";
|
|
50
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
51
|
+
message: string;
|
|
52
|
+
fix?: string | undefined;
|
|
53
|
+
}[];
|
|
54
|
+
}>>;
|
|
55
|
+
run(input: {
|
|
56
|
+
origin: string;
|
|
57
|
+
timeoutMs?: number | undefined;
|
|
58
|
+
}, ctx?: import("./types.js").ToolExecuteContext): Promise<import("./types.js").ToolResult<{
|
|
59
|
+
origin: string;
|
|
60
|
+
llmsTxtUrl: string;
|
|
61
|
+
found: boolean;
|
|
62
|
+
findings: {
|
|
63
|
+
ruleId: "aeo/llms-txt";
|
|
64
|
+
severity: "info" | "warning" | "error" | "critical";
|
|
65
|
+
message: string;
|
|
66
|
+
fix?: string | undefined;
|
|
67
|
+
}[];
|
|
68
|
+
}>>;
|
|
69
|
+
};
|
|
70
|
+
//# sourceMappingURL=check-domain-llms-txt.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"check-domain-llms-txt.d.ts","sourceRoot":"","sources":["../../../src/ai/tools/check-domain-llms-txt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AA4BxB;;;;;;;GAOG;AACH,eAAO,MAAM,sBAAsB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA2CjC,CAAC"}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { llmsTxtRule } from "../../rules/aeo/llms-txt.js";
|
|
3
|
+
import { cachedFetch } from "../../cache.js";
|
|
4
|
+
import { validateTargetHost } from "../../ssrf-guard.js";
|
|
5
|
+
import { defineTool } from "./types.js";
|
|
6
|
+
const inputSchema = z.object({
|
|
7
|
+
origin: z
|
|
8
|
+
.string()
|
|
9
|
+
.url()
|
|
10
|
+
.describe("Site origin (e.g. https://example.com). The tool fetches /llms.txt automatically."),
|
|
11
|
+
timeoutMs: z.number().int().positive().max(15_000).optional(),
|
|
12
|
+
});
|
|
13
|
+
const outputSchema = z.object({
|
|
14
|
+
origin: z.string(),
|
|
15
|
+
llmsTxtUrl: z.string(),
|
|
16
|
+
found: z.boolean(),
|
|
17
|
+
findings: z.array(z.object({
|
|
18
|
+
ruleId: z.literal("aeo/llms-txt"),
|
|
19
|
+
severity: z.enum(["info", "warning", "error", "critical"]),
|
|
20
|
+
message: z.string(),
|
|
21
|
+
fix: z.string().optional(),
|
|
22
|
+
})),
|
|
23
|
+
});
|
|
24
|
+
/**
|
|
25
|
+
* Domain-level AEO check: does the site publish a valid /llms.txt? Wraps
|
|
26
|
+
* `llmsTxtRule` with a built-in fetcher that goes through the SSRF guard.
|
|
27
|
+
*
|
|
28
|
+
* Unlike the per-page rules, this is called once per domain — the
|
|
29
|
+
* orchestrator typically calls this near the top of the audit alongside
|
|
30
|
+
* `check_domain_crawler_access`.
|
|
31
|
+
*/
|
|
32
|
+
export const checkDomainLlmsTxtTool = defineTool({
|
|
33
|
+
name: "check_domain_llms_txt",
|
|
34
|
+
description: "Check whether the site publishes a valid /llms.txt file (the emerging AEO standard for telling LLMs which content to ingest and how). Call once per domain near the start of an audit. Returns `found: false` when the file is absent — that itself is informational, not a hard error.",
|
|
35
|
+
inputSchema,
|
|
36
|
+
outputSchema,
|
|
37
|
+
async execute({ origin, timeoutMs = 10_000 }, ctx) {
|
|
38
|
+
const validateHop = async (hopUrl) => {
|
|
39
|
+
let host;
|
|
40
|
+
try {
|
|
41
|
+
host = new URL(hopUrl).hostname;
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
throw new Error(`check_domain_llms_txt: invalid URL ${hopUrl}`);
|
|
45
|
+
}
|
|
46
|
+
await validateTargetHost(host);
|
|
47
|
+
};
|
|
48
|
+
const fetcher = async (url) => {
|
|
49
|
+
try {
|
|
50
|
+
const res = await cachedFetch(url, { timeoutMs, cache: null, validateHop, signal: ctx?.signal });
|
|
51
|
+
if (res.status >= 200 && res.status < 300)
|
|
52
|
+
return res.body;
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
};
|
|
59
|
+
const findings = await llmsTxtRule(origin, fetcher);
|
|
60
|
+
const llmsTxtUrl = `${new URL(origin).origin}/llms.txt`;
|
|
61
|
+
const found = !findings.some((f) => /not found|missing/i.test(f.message));
|
|
62
|
+
return {
|
|
63
|
+
origin: new URL(origin).origin,
|
|
64
|
+
llmsTxtUrl,
|
|
65
|
+
found,
|
|
66
|
+
findings: findings.map((f) => ({
|
|
67
|
+
ruleId: "aeo/llms-txt",
|
|
68
|
+
severity: f.severity,
|
|
69
|
+
message: f.message,
|
|
70
|
+
fix: f.fix,
|
|
71
|
+
})),
|
|
72
|
+
};
|
|
73
|
+
},
|
|
74
|
+
});
|
|
75
|
+
//# sourceMappingURL=check-domain-llms-txt.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"check-domain-llms-txt.js","sourceRoot":"","sources":["../../../src/ai/tools/check-domain-llms-txt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,WAAW,EAAE,MAAM,6BAA6B,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAC3B,MAAM,EAAE,CAAC;SACN,MAAM,EAAE;SACR,GAAG,EAAE;SACL,QAAQ,CAAC,mFAAmF,CAAC;IAChG,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE;CAC9D,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC;IAC5B,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE;IAClB,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE;IACtB,KAAK,EAAE,CAAC,CAAC,OAAO,EAAE;IAClB,QAAQ,EAAE,CAAC,CAAC,KAAK,CACf,CAAC,CAAC,MAAM,CAAC;QACP,MAAM,EAAE,CAAC,CAAC,OAAO,CAAC,cAAc,CAAC;QACjC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC;QAC1D,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE;QACnB,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;KAC3B,CAAC,CACH;CACF,CAAC,CAAC;AAEH;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAG,UAAU,CAAC;IAC/C,IAAI,EAAE,uBAAuB;IAC7B,WAAW,EACT,yRAAyR;IAC3R,WAAW;IACX,YAAY;IACZ,KAAK,CAAC,OAAO,CAAC,EAAE,MAAM,EAAE,SAAS,GAAG,MAAM,EAAE,EAAE,GAAG;QAC/C,MAAM,WAAW,GAAG,KAAK,EAAE,MAAc,EAAiB,EAAE;YAC1D,IAAI,IAAY,CAAC;YACjB,IAAI,CAAC;gBACH,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC;YAClC,CAAC;YAAC,MAAM,CAAC;gBACP,MAAM,IAAI,KAAK,CAAC,sCAAsC,MAAM,EAAE,CAAC,CAAC;YAClE,CAAC;YACD,MAAM,kBAAkB,CAAC,IAAI,CAAC,CAAC;QACjC,CAAC,CAAC;QAEF,MAAM,OAAO,GAAG,KAAK,EAAE,GAAW,EAA0B,EAAE;YAC5D,IAAI,CAAC;gBACH,MAAM,GAAG,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;gBACjG,IAAI,GAAG,CAAC,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,GAAG,GAAG;oBAAE,OAAO,GAAG,CAAC,IAAI,CAAC;gBAC3D,OAAO,IAAI,CAAC;YACd,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACpD,MAAM,UAAU,GAAG,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,MAAM,WAAW,CAAC;QACxD,MAAM,KAAK,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;QAE1E,OAAO;YACL,MAAM,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,MAAM;YAC9B,UAAU;YACV,KAAK;YACL,QAAQ,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC7B,MAAM,EAAE,cAAuB;gBAC/B,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,GAAG,EAAE,CAAC,CAAC,GAAG;aACX,CAAC,CAAC;SACJ,CAAC;IACJ,CAAC;CACF,CAAC,CAAC"}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
export declare const checkIndexabilityTool: {
|
|
3
|
+
name: string;
|
|
4
|
+
description: string;
|
|
5
|
+
inputSchema: z.ZodType<{
|
|
6
|
+
pageId: string;
|
|
7
|
+
xRobotsTagHeader?: string | undefined;
|
|
8
|
+
}, unknown, z.core.$ZodTypeInternals<{
|
|
9
|
+
pageId: string;
|
|
10
|
+
xRobotsTagHeader?: string | undefined;
|
|
11
|
+
}, unknown>>;
|
|
12
|
+
outputSchema: z.ZodType<{
|
|
13
|
+
url: string;
|
|
14
|
+
indexable: boolean;
|
|
15
|
+
reasons: string[];
|
|
16
|
+
metaRobots: string;
|
|
17
|
+
metaNoindex: boolean;
|
|
18
|
+
xRobotsNoindex: boolean;
|
|
19
|
+
canonicalUrl: string;
|
|
20
|
+
canonicalSelfReferencing: boolean;
|
|
21
|
+
}, unknown, z.core.$ZodTypeInternals<{
|
|
22
|
+
url: string;
|
|
23
|
+
indexable: boolean;
|
|
24
|
+
reasons: string[];
|
|
25
|
+
metaRobots: string;
|
|
26
|
+
metaNoindex: boolean;
|
|
27
|
+
xRobotsNoindex: boolean;
|
|
28
|
+
canonicalUrl: string;
|
|
29
|
+
canonicalSelfReferencing: boolean;
|
|
30
|
+
}, unknown>>;
|
|
31
|
+
toAiTool(): import("ai").Tool<{
|
|
32
|
+
pageId: string;
|
|
33
|
+
xRobotsTagHeader?: string | undefined;
|
|
34
|
+
}, import("./types.js").ToolResult<{
|
|
35
|
+
url: string;
|
|
36
|
+
indexable: boolean;
|
|
37
|
+
reasons: string[];
|
|
38
|
+
metaRobots: string;
|
|
39
|
+
metaNoindex: boolean;
|
|
40
|
+
xRobotsNoindex: boolean;
|
|
41
|
+
canonicalUrl: string;
|
|
42
|
+
canonicalSelfReferencing: boolean;
|
|
43
|
+
}>>;
|
|
44
|
+
run(input: {
|
|
45
|
+
pageId: string;
|
|
46
|
+
xRobotsTagHeader?: string | undefined;
|
|
47
|
+
}, ctx?: import("./types.js").ToolExecuteContext): Promise<import("./types.js").ToolResult<{
|
|
48
|
+
url: string;
|
|
49
|
+
indexable: boolean;
|
|
50
|
+
reasons: string[];
|
|
51
|
+
metaRobots: string;
|
|
52
|
+
metaNoindex: boolean;
|
|
53
|
+
xRobotsNoindex: boolean;
|
|
54
|
+
canonicalUrl: string;
|
|
55
|
+
canonicalSelfReferencing: boolean;
|
|
56
|
+
}>>;
|
|
57
|
+
};
|
|
58
|
+
//# sourceMappingURL=check-indexability.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"check-indexability.d.ts","sourceRoot":"","sources":["../../../src/ai/tools/check-indexability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAkCxB,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAqChC,CAAC"}
|