@automatelab/citation-intelligence 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -1
- package/dist/adapters/google-ai-mode.d.ts +6 -0
- package/dist/adapters/google-ai-mode.d.ts.map +1 -0
- package/dist/adapters/google-ai-mode.js +48 -0
- package/dist/adapters/google-ai-mode.js.map +1 -0
- package/dist/index.js +44 -2
- package/dist/index.js.map +1 -1
- package/dist/prompts.d.ts +3 -0
- package/dist/prompts.d.ts.map +1 -0
- package/dist/prompts.js +137 -0
- package/dist/prompts.js.map +1 -0
- package/dist/resources.d.ts +3 -0
- package/dist/resources.d.ts.map +1 -0
- package/dist/resources.js +242 -0
- package/dist/resources.js.map +1 -0
- package/dist/tools/am-i-cited.d.ts +4 -4
- package/dist/tools/am-i-cited.js +1 -1
- package/dist/tools/am-i-cited.js.map +1 -1
- package/dist/tools/canonical-competitor-set.d.ts +73 -0
- package/dist/tools/canonical-competitor-set.d.ts.map +1 -0
- package/dist/tools/canonical-competitor-set.js +209 -0
- package/dist/tools/canonical-competitor-set.js.map +1 -0
- package/dist/tools/check-citations.d.ts +4 -4
- package/dist/tools/check-citations.d.ts.map +1 -1
- package/dist/tools/check-citations.js +15 -1
- package/dist/tools/check-citations.js.map +1 -1
- package/dist/tools/citation-evidence.d.ts +54 -0
- package/dist/tools/citation-evidence.d.ts.map +1 -0
- package/dist/tools/citation-evidence.js +148 -0
- package/dist/tools/citation-evidence.js.map +1 -0
- package/dist/tools/crawler-access-audit.d.ts +49 -0
- package/dist/tools/crawler-access-audit.d.ts.map +1 -0
- package/dist/tools/crawler-access-audit.js +256 -0
- package/dist/tools/crawler-access-audit.js.map +1 -0
- package/dist/tools/run-panel.d.ts +5 -5
- package/dist/tools/run-panel.js +1 -1
- package/dist/tools/run-panel.js.map +1 -1
- package/dist/tools/sitemap-citation-map.d.ts +77 -0
- package/dist/tools/sitemap-citation-map.d.ts.map +1 -0
- package/dist/tools/sitemap-citation-map.js +149 -0
- package/dist/tools/sitemap-citation-map.js.map +1 -0
- package/dist/types.d.ts +2 -2
- package/dist/types.d.ts.map +1 -1
- package/package.json +2 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"citation-evidence.js","sourceRoot":"","sources":["../../src/tools/citation-evidence.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,GAAG,EAAE,MAAM,eAAe,CAAC;AAEpC,MAAM,CAAC,MAAM,2BAA2B,GAAG;IACzC,KAAK,EAAE,CAAC;SACL,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,QAAQ,CAAC,iEAAiE,CAAC;IAC9E,MAAM,EAAE,CAAC;SACN,IAAI,CAAC,CAAC,YAAY,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;SAC3E,OAAO,CAAC,MAAM,CAAC;SACf,QAAQ,CACP,kFAAkF,CACnF;IACH,WAAW,EAAE,CAAC;SACX,MAAM,EAAE;SACR,GAAG,EAAE;SACL,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,EAAE,CAAC;SACP,OAAO,CAAC,EAAE,CAAC;SACX,QAAQ,CAAC,wCAAwC,CAAC;IACrD,aAAa,EAAE,CAAC;SACb,MAAM,EAAE;SACR,GAAG,EAAE;SACL,GAAG,CAAC,EAAE,CAAC;SACP,GAAG,CAAC,GAAG,CAAC;SACR,OAAO,CAAC,GAAG,CAAC;SACZ,QAAQ,CACP,wGAAwG,CACzG;CACJ,CAAC;AAEF,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC,2BAA2B,CAAC,CAAC;AAE1D,SAAS,iBAAiB,CAAC,GAAW;IACpC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACb,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC,CAAC;IACzC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,sBAAsB,EAAE,EAAE,CAAC,CAAC,CAAC;IACjD,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACvB,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QACpB,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,CAAC;IAC5C,CAAC;IAAC,MAAM,CAAC;QACP,SAAS;IACX,CAAC;IACD,OAAO,CAAC,GAAG,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AAClC,CAAC;AAED,SAAS,gBAAgB,CACvB,GAAW,EACX,IAAY;IAEZ,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,IAAI,IAAI,GAA8C,IAAI,CAAC;IAC3D,KAAK,MAAM,CAAC,IAAI,iBAAiB,CAAC,GAAG,CAAC,EAAE,CAAC;QACvC,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAC5B,IAAI,GAAG,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC;YAAE,IAAI,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;IACnF,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,aAAa,CACpB,IAAY,EACZ,WAAmB,EACnB,SAAiB,EACjB,SAAiB;IAEjB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,GAAG,SAAS,CAAC,CAAC;IACnD,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,WAAW,GAAG,SAAS,GAAG,SAAS,CAAC,CAAC;IACvE,IAAI,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IACrC,IAAI,KAAK,GAAG,CAAC;QAAE,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;IACvC,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM;QAAE,OAAO,GAAG,GAAG,OAAO,GAAG,CAAC;IAC/C,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,KAAK,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;AAC3D,CAAC;AAED,2EAA2E;AAC3E,oDAAoD;AACpD,SAAS,uBAAuB,CAC9B,IAAY,EACZ,UAAkB;IAElB,IAAI,CAAC,IAAI,IAAI,UAAU,GAAG,CAAC;QAAE,OAAO,SAAS,CAAC;IAC9C,MAAM,WAAW,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;IAC9C,8DAA8D;IAC9D,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;QAC7C,IAAI,IAAI,KAAK,CAAC,CAAC;YAAE,SAAS;QAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;QAC1C,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,KAAK,GAAG,IAAI,GAAG,GAAG;YAAE,SAAS;QACjD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,EAAE,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;QACjD,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC;YAAE,OAAO,KAAK,CAAC;IACtC,CAAC;IACD,gCAAgC;IAChC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CACxB,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,UAAU,CAAC,EACjC,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,UAAU,CAAC,EACjC,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,UAAU,CAAC,EACjC,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,UAAU,CAAC,CACnC,CAAC;IACF,MAAM,OAAO,GAAG,CAAC,GAAG,EAAE;QACpB,MAAM,UAAU,GAAG;YACjB,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,UAAU,CAAC;YAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,UAAU,CAAC;YAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,UAAU,CAAC;YAC7B,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,UAAU,CAAC;SAC/B,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC1B,OAAO,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC;IACvE,CAAC,CAAC,EAAE,CAAC;IACL,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAC3D,IAAI,IAAI,CAAC,MAAM,IAAI,EAAE,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC;IACzD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,KAAkC;IACvE,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACxC,GAAG,CAAC,KAAK,CAAC,yBAAyB,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;IAE9D,MAAM,GAAG,GAAG,MAAM,cAAc,CAAC;QAC/B,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,WAAW,EAAE,MAAM,CAAC,WAAW;KAChC,CAAC,CAAC;IAEH,MAAM,GAAG,GAAG,GAAG,CAAC,UAAU,IAAI,EAAE,CAAC;IACjC,MAAM,OAAO,GAAG,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC;IAE/B,MAAM,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACvC,MAAM,OAAO,GAAG,OAAO,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAC9D,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO;gBACL,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,KAAK,EAAE,KAAK;gBACZ,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,IAAI;gBAC1B,YAAY,EAAE,IAAI;gBAClB,YAAY,EAAE,IAAI;aACnB,CAAC;QACJ,CAAC;QACD,MAAM,EAAE,OAAO,EAAE,GAAG,aAAa,CAC/B,GAAG,EACH,OAAO,CAAC,KAAK,EACb,OAAO,CAAC,OAAO,CAAC,MAAM,EACtB,MAAM,CAAC,aAAa,CACrB,CAAC;QACF,OAAO;YACL,GAAG,EAAE,CAAC,CAAC,GAAG;YACV,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,KAAK,EAAE,IAAI;YACX,OAAO;YACP,YAAY,EAAE,uBAAuB,CAAC,GAAG,EAAE,OAAO,CAAC,KAAK,CAAC,IAAI,IAAI;YACjE,YAAY,EAAE,OAAO,CAAC,KAAK;YAC3B,OAAO,EAAE,OAAO,CAAC,OAAO;SACzB,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACpC,gBAAgB,EAAE,GAAG,CAAC,MAAM;QAC5B,cAAc,EAAE,OAAO;QACvB,eAAe,EAAE,GAAG,CAAC,SAAS,CAAC,MAAM;QACrC,cAAc,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM;QACtD,QAAQ;QACR,IAAI,EAAE,OAAO;YACX,CAAC,CAAC,kKAAkK;YACpK,CAAC,CAAC,sJAAsJ;KAC3J,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
export declare const crawlerAccessAuditInputSchema: {
|
|
3
|
+
url: z.ZodString;
|
|
4
|
+
bots: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
|
|
5
|
+
fetch_with_ua: z.ZodDefault<z.ZodBoolean>;
|
|
6
|
+
};
|
|
7
|
+
declare const inputSchema: z.ZodObject<{
|
|
8
|
+
url: z.ZodString;
|
|
9
|
+
bots: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
|
|
10
|
+
fetch_with_ua: z.ZodDefault<z.ZodBoolean>;
|
|
11
|
+
}, "strip", z.ZodTypeAny, {
|
|
12
|
+
url: string;
|
|
13
|
+
fetch_with_ua: boolean;
|
|
14
|
+
bots?: string[] | undefined;
|
|
15
|
+
}, {
|
|
16
|
+
url: string;
|
|
17
|
+
bots?: string[] | undefined;
|
|
18
|
+
fetch_with_ua?: boolean | undefined;
|
|
19
|
+
}>;
|
|
20
|
+
type BotResult = {
|
|
21
|
+
name: string;
|
|
22
|
+
ua_token: string;
|
|
23
|
+
operator: string;
|
|
24
|
+
purpose: string;
|
|
25
|
+
robots_allowed: boolean | "unknown";
|
|
26
|
+
robots_rule: string | null;
|
|
27
|
+
fetch_status: number | null;
|
|
28
|
+
fetch_ok: boolean | null;
|
|
29
|
+
fetch_error: string | null;
|
|
30
|
+
verdict: "allowed" | "blocked" | "robots_only_allowed" | "robots_only_blocked" | "unknown";
|
|
31
|
+
};
|
|
32
|
+
export declare function crawlerAccessAudit(input: z.infer<typeof inputSchema>): Promise<{
|
|
33
|
+
url: string;
|
|
34
|
+
robots_url: string;
|
|
35
|
+
robots_status: number | null;
|
|
36
|
+
robots_present: boolean;
|
|
37
|
+
robots_error: string | null;
|
|
38
|
+
fetched_at: string;
|
|
39
|
+
bots: BotResult[];
|
|
40
|
+
summary: {
|
|
41
|
+
total: number;
|
|
42
|
+
allowed: number;
|
|
43
|
+
blocked: number;
|
|
44
|
+
unknown: number;
|
|
45
|
+
};
|
|
46
|
+
note: string;
|
|
47
|
+
}>;
|
|
48
|
+
export {};
|
|
49
|
+
//# sourceMappingURL=crawler-access-audit.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"crawler-access-audit.d.ts","sourceRoot":"","sources":["../../src/tools/crawler-access-audit.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAiBxB,eAAO,MAAM,6BAA6B;;;;CAmBzC,CAAC;AAEF,QAAA,MAAM,WAAW;;;;;;;;;;;;EAA0C,CAAC;AA8G5D,KAAK,SAAS,GAAG;IACf,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,OAAO,GAAG,SAAS,CAAC;IACpC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,QAAQ,EAAE,OAAO,GAAG,IAAI,CAAC;IACzB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,OAAO,EAAE,SAAS,GAAG,SAAS,GAAG,qBAAqB,GAAG,qBAAqB,GAAG,SAAS,CAAC;CAC5F,CAAC;AAEF,wBAAsB,kBAAkB,CAAC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,WAAW,CAAC;;;;;;;;;;;;;;;GAsJ1E"}
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import robotsParserDefault from "robots-parser";
|
|
3
|
+
import { fetchText } from "../lib/fetch.js";
|
|
4
|
+
import { log } from "../lib/log.js";
|
|
5
|
+
const robotsParser = robotsParserDefault;
|
|
6
|
+
export const crawlerAccessAuditInputSchema = {
|
|
7
|
+
url: z
|
|
8
|
+
.string()
|
|
9
|
+
.url()
|
|
10
|
+
.describe("Page URL to test for AI crawler access."),
|
|
11
|
+
bots: z
|
|
12
|
+
.array(z.string())
|
|
13
|
+
.min(1)
|
|
14
|
+
.max(20)
|
|
15
|
+
.optional()
|
|
16
|
+
.describe("Override the default bot list. Each entry is a User-Agent token (e.g. 'GPTBot', 'ClaudeBot')."),
|
|
17
|
+
fetch_with_ua: z
|
|
18
|
+
.boolean()
|
|
19
|
+
.default(true)
|
|
20
|
+
.describe("If true, do a live GET as each bot's User-Agent and report status. Disable to only parse robots.txt (no extra requests)."),
|
|
21
|
+
};
|
|
22
|
+
const inputSchema = z.object(crawlerAccessAuditInputSchema);
|
|
23
|
+
// Curated list of the AI crawlers that matter for LLM citation visibility.
|
|
24
|
+
// User-agent tokens are the canonical strings each operator documents.
|
|
25
|
+
const DEFAULT_BOTS = [
|
|
26
|
+
{
|
|
27
|
+
name: "GPTBot",
|
|
28
|
+
ua_token: "GPTBot",
|
|
29
|
+
ua_full: "Mozilla/5.0 (compatible; GPTBot/1.0; +https://openai.com/gptbot)",
|
|
30
|
+
operator: "OpenAI",
|
|
31
|
+
purpose: "ChatGPT training data",
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
name: "OAI-SearchBot",
|
|
35
|
+
ua_token: "OAI-SearchBot",
|
|
36
|
+
ua_full: "Mozilla/5.0 (compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot)",
|
|
37
|
+
operator: "OpenAI",
|
|
38
|
+
purpose: "ChatGPT Search index",
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
name: "ChatGPT-User",
|
|
42
|
+
ua_token: "ChatGPT-User",
|
|
43
|
+
ua_full: "Mozilla/5.0 (compatible; ChatGPT-User/1.0; +https://openai.com/bot)",
|
|
44
|
+
operator: "OpenAI",
|
|
45
|
+
purpose: "Real-time fetch on user prompt",
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
name: "ClaudeBot",
|
|
49
|
+
ua_token: "ClaudeBot",
|
|
50
|
+
ua_full: "Mozilla/5.0 (compatible; ClaudeBot/1.0; +claudebot@anthropic.com)",
|
|
51
|
+
operator: "Anthropic",
|
|
52
|
+
purpose: "Claude training data",
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
name: "Claude-Web",
|
|
56
|
+
ua_token: "Claude-Web",
|
|
57
|
+
ua_full: "Mozilla/5.0 (compatible; Claude-Web/1.0; +https://www.anthropic.com)",
|
|
58
|
+
operator: "Anthropic",
|
|
59
|
+
purpose: "Real-time fetch when Claude browses",
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
name: "PerplexityBot",
|
|
63
|
+
ua_token: "PerplexityBot",
|
|
64
|
+
ua_full: "Mozilla/5.0 (compatible; PerplexityBot/1.0; +https://docs.perplexity.ai/guides/bots)",
|
|
65
|
+
operator: "Perplexity",
|
|
66
|
+
purpose: "Perplexity index",
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
name: "Perplexity-User",
|
|
70
|
+
ua_token: "Perplexity-User",
|
|
71
|
+
ua_full: "Mozilla/5.0 (compatible; Perplexity-User/1.0; +https://docs.perplexity.ai/guides/bots)",
|
|
72
|
+
operator: "Perplexity",
|
|
73
|
+
purpose: "Real-time fetch on user query",
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
name: "CCBot",
|
|
77
|
+
ua_token: "CCBot",
|
|
78
|
+
ua_full: "CCBot/2.0 (https://commoncrawl.org/faq/)",
|
|
79
|
+
operator: "Common Crawl",
|
|
80
|
+
purpose: "Used by many LLM training corpora",
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
name: "Google-Extended",
|
|
84
|
+
ua_token: "Google-Extended",
|
|
85
|
+
ua_full: "Google-Extended",
|
|
86
|
+
operator: "Google",
|
|
87
|
+
purpose: "Gemini training opt-out token (robots-only, no live fetch)",
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
name: "Applebot-Extended",
|
|
91
|
+
ua_token: "Applebot-Extended",
|
|
92
|
+
ua_full: "Applebot-Extended",
|
|
93
|
+
operator: "Apple",
|
|
94
|
+
purpose: "Apple Intelligence training opt-out (robots-only)",
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
name: "Bytespider",
|
|
98
|
+
ua_token: "Bytespider",
|
|
99
|
+
ua_full: "Mozilla/5.0 (compatible; Bytespider; spider-feedback@bytedance.com)",
|
|
100
|
+
operator: "ByteDance",
|
|
101
|
+
purpose: "Doubao/Volcengine LLM training",
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
name: "Meta-ExternalAgent",
|
|
105
|
+
ua_token: "Meta-ExternalAgent",
|
|
106
|
+
ua_full: "meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)",
|
|
107
|
+
operator: "Meta",
|
|
108
|
+
purpose: "Llama training data fetch",
|
|
109
|
+
},
|
|
110
|
+
];
|
|
111
|
+
// robots-only (cannot be live-tested because the operator does not actually
|
|
112
|
+
// fetch the URL - it's an opt-out token consumed by their pipeline).
|
|
113
|
+
const ROBOTS_ONLY = new Set(["Google-Extended", "Applebot-Extended"]);
|
|
114
|
+
function robotsUrlFor(target) {
|
|
115
|
+
const u = new URL(target);
|
|
116
|
+
return `${u.protocol}//${u.host}/robots.txt`;
|
|
117
|
+
}
|
|
118
|
+
export async function crawlerAccessAudit(input) {
|
|
119
|
+
const parsed = inputSchema.parse(input);
|
|
120
|
+
log.debug("crawler_access_audit start", { url: parsed.url });
|
|
121
|
+
const robotsUrl = robotsUrlFor(parsed.url);
|
|
122
|
+
let robotsBody = null;
|
|
123
|
+
let robotsStatus = null;
|
|
124
|
+
let robotsError = null;
|
|
125
|
+
try {
|
|
126
|
+
const r = await fetchText(robotsUrl, { timeoutMs: 15_000 });
|
|
127
|
+
robotsStatus = r.status;
|
|
128
|
+
robotsBody = r.status < 400 ? r.text : "";
|
|
129
|
+
}
|
|
130
|
+
catch (err) {
|
|
131
|
+
robotsError = err instanceof Error ? err.message : String(err);
|
|
132
|
+
robotsBody = "";
|
|
133
|
+
}
|
|
134
|
+
const robots = robotsParser(robotsUrl, robotsBody ?? "");
|
|
135
|
+
const botList = parsed.bots
|
|
136
|
+
? parsed.bots.map((token) => {
|
|
137
|
+
const known = DEFAULT_BOTS.find((b) => b.name.toLowerCase() === token.toLowerCase() || b.ua_token === token);
|
|
138
|
+
return (known ?? {
|
|
139
|
+
name: token,
|
|
140
|
+
ua_token: token,
|
|
141
|
+
ua_full: token,
|
|
142
|
+
operator: "unknown",
|
|
143
|
+
purpose: "user-supplied bot",
|
|
144
|
+
});
|
|
145
|
+
})
|
|
146
|
+
: DEFAULT_BOTS;
|
|
147
|
+
const results = await Promise.all(botList.map(async (bot) => {
|
|
148
|
+
let allowed = "unknown";
|
|
149
|
+
try {
|
|
150
|
+
const a = robots.isAllowed(parsed.url, bot.ua_token);
|
|
151
|
+
if (typeof a === "boolean")
|
|
152
|
+
allowed = a;
|
|
153
|
+
}
|
|
154
|
+
catch {
|
|
155
|
+
allowed = "unknown";
|
|
156
|
+
}
|
|
157
|
+
const rule = (() => {
|
|
158
|
+
try {
|
|
159
|
+
// robots-parser exposes the matched rule via getMatchingLineNumber, but
|
|
160
|
+
// not all versions expose it. Keep it simple: report the User-Agent
|
|
161
|
+
// group we matched if any disallow line exists for this bot.
|
|
162
|
+
if (!robotsBody)
|
|
163
|
+
return null;
|
|
164
|
+
const re = new RegExp(`(^|\\n)User-agent:\\s*${bot.ua_token}([\\s\\S]*?)(?=\\n\\s*User-agent:|$)`, "i");
|
|
165
|
+
const m = robotsBody.match(re);
|
|
166
|
+
if (!m)
|
|
167
|
+
return null;
|
|
168
|
+
const block = m[2]
|
|
169
|
+
.split("\n")
|
|
170
|
+
.map((line) => line.trim())
|
|
171
|
+
.filter((line) => /^(allow|disallow|crawl-delay):/i.test(line))
|
|
172
|
+
.join(" | ");
|
|
173
|
+
return block || null;
|
|
174
|
+
}
|
|
175
|
+
catch {
|
|
176
|
+
return null;
|
|
177
|
+
}
|
|
178
|
+
})();
|
|
179
|
+
const isRobotsOnly = ROBOTS_ONLY.has(bot.name);
|
|
180
|
+
if (isRobotsOnly || !parsed.fetch_with_ua) {
|
|
181
|
+
const verdict = allowed === true
|
|
182
|
+
? isRobotsOnly
|
|
183
|
+
? "robots_only_allowed"
|
|
184
|
+
: "allowed"
|
|
185
|
+
: allowed === false
|
|
186
|
+
? isRobotsOnly
|
|
187
|
+
? "robots_only_blocked"
|
|
188
|
+
: "blocked"
|
|
189
|
+
: "unknown";
|
|
190
|
+
return {
|
|
191
|
+
name: bot.name,
|
|
192
|
+
ua_token: bot.ua_token,
|
|
193
|
+
operator: bot.operator,
|
|
194
|
+
purpose: bot.purpose,
|
|
195
|
+
robots_allowed: allowed,
|
|
196
|
+
robots_rule: rule,
|
|
197
|
+
fetch_status: null,
|
|
198
|
+
fetch_ok: null,
|
|
199
|
+
fetch_error: null,
|
|
200
|
+
verdict,
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
// live UA test
|
|
204
|
+
let status = null;
|
|
205
|
+
let err = null;
|
|
206
|
+
try {
|
|
207
|
+
const r = await fetchText(parsed.url, {
|
|
208
|
+
method: "GET",
|
|
209
|
+
headers: { "user-agent": bot.ua_full },
|
|
210
|
+
timeoutMs: 15_000,
|
|
211
|
+
});
|
|
212
|
+
status = r.status;
|
|
213
|
+
}
|
|
214
|
+
catch (e) {
|
|
215
|
+
err = e instanceof Error ? e.message : String(e);
|
|
216
|
+
}
|
|
217
|
+
const fetchOk = status !== null && status < 400;
|
|
218
|
+
const verdict = allowed === false
|
|
219
|
+
? "blocked"
|
|
220
|
+
: fetchOk
|
|
221
|
+
? "allowed"
|
|
222
|
+
: allowed === true
|
|
223
|
+
? "blocked"
|
|
224
|
+
: "unknown";
|
|
225
|
+
return {
|
|
226
|
+
name: bot.name,
|
|
227
|
+
ua_token: bot.ua_token,
|
|
228
|
+
operator: bot.operator,
|
|
229
|
+
purpose: bot.purpose,
|
|
230
|
+
robots_allowed: allowed,
|
|
231
|
+
robots_rule: rule,
|
|
232
|
+
fetch_status: status,
|
|
233
|
+
fetch_ok: fetchOk,
|
|
234
|
+
fetch_error: err,
|
|
235
|
+
verdict,
|
|
236
|
+
};
|
|
237
|
+
}));
|
|
238
|
+
const summary = {
|
|
239
|
+
total: results.length,
|
|
240
|
+
allowed: results.filter((r) => r.verdict === "allowed" || r.verdict === "robots_only_allowed").length,
|
|
241
|
+
blocked: results.filter((r) => r.verdict === "blocked" || r.verdict === "robots_only_blocked").length,
|
|
242
|
+
unknown: results.filter((r) => r.verdict === "unknown").length,
|
|
243
|
+
};
|
|
244
|
+
return {
|
|
245
|
+
url: parsed.url,
|
|
246
|
+
robots_url: robotsUrl,
|
|
247
|
+
robots_status: robotsStatus,
|
|
248
|
+
robots_present: !!(robotsBody && robotsBody.length > 0),
|
|
249
|
+
robots_error: robotsError,
|
|
250
|
+
fetched_at: new Date().toISOString(),
|
|
251
|
+
bots: results,
|
|
252
|
+
summary,
|
|
253
|
+
note: "verdict combines robots.txt parsing with a live GET using each bot's User-Agent (unless fetch_with_ua=false or the bot is opt-out-token only, like Google-Extended). 'blocked' = robots.txt forbids OR the page 4xx/5xx'd under that UA.",
|
|
254
|
+
};
|
|
255
|
+
}
|
|
256
|
+
//# sourceMappingURL=crawler-access-audit.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"crawler-access-audit.js","sourceRoot":"","sources":["../../src/tools/crawler-access-audit.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,mBAAmB,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAC5C,OAAO,EAAE,GAAG,EAAE,MAAM,eAAe,CAAC;AASpC,MAAM,YAAY,GAAG,mBAGN,CAAC;AAEhB,MAAM,CAAC,MAAM,6BAA6B,GAAG;IAC3C,GAAG,EAAE,CAAC;SACH,MAAM,EAAE;SACR,GAAG,EAAE;SACL,QAAQ,CAAC,yCAAyC,CAAC;IACtD,IAAI,EAAE,CAAC;SACJ,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;SACjB,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,EAAE,CAAC;SACP,QAAQ,EAAE;SACV,QAAQ,CACP,+FAA+F,CAChG;IACH,aAAa,EAAE,CAAC;SACb,OAAO,EAAE;SACT,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CACP,0HAA0H,CAC3H;CACJ,CAAC;AAEF,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC,6BAA6B,CAAC,CAAC;AAE5D,2EAA2E;AAC3E,uEAAuE;AACvE,MAAM,YAAY,GAMb;IACH;QACE,IAAI,EAAE,QAAQ;QACd,QAAQ,EAAE,QAAQ;QAClB,OAAO,EAAE,kEAAkE;QAC3E,QAAQ,EAAE,QAAQ;QAClB,OAAO,EAAE,uBAAuB;KACjC;IACD;QACE,IAAI,EAAE,eAAe;QACrB,QAAQ,EAAE,eAAe;QACzB,OAAO,EACL,4EAA4E;QAC9E,QAAQ,EAAE,QAAQ;QAClB,OAAO,EAAE,sBAAsB;KAChC;IACD;QACE,IAAI,EAAE,cAAc;QACpB,QAAQ,EAAE,cAAc;QACxB,OAAO,EAAE,qEAAqE;QAC9E,QAAQ,EAAE,QAAQ;QAClB,OAAO,EAAE,gCAAgC;KAC1C;IACD;QACE,IAAI,EAAE,WAAW;QACjB,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE,mEAAmE;QAC5E,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE,sBAAsB;KAChC;IACD;QACE,IAAI,EAAE,YAAY;QAClB,QAAQ,EAAE,YAAY;QACtB,OAAO,EAAE,sEAAsE;QAC/E,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE,qCAAqC;KAC/C;IACD;QACE,IAAI,EAAE,eAAe;QACrB,QAAQ,EAAE,eAAe;QACzB,OAAO,EACL,sFAAsF;QACxF,QAAQ,EAAE,YAAY;QACtB,OAAO,EAAE,kBAAkB;KAC5B;IACD;QACE,IAAI,EAAE,iBAAiB;QACvB,QAAQ,EAAE,iBAAiB;QAC3B,OAAO,EACL,wFAAwF;QAC1F,QAAQ,EAAE,YAAY;QACtB,OAAO,EAAE,+BAA+B;KACzC;IACD;QACE,IAAI,EAAE,OAAO;QACb,QAAQ,EAAE,OAAO;QACjB,OAAO,EAAE,0CAA0C;QACnD,QAAQ,EAAE,cAAc;QACxB,OAAO,EAAE,mCAAmC;KAC7C;IACD;QACE,IAAI,EAAE,iBAAiB;QACvB,QAAQ,EAAE,iBAAiB;QAC3B,OAAO,EAAE,iBAAiB;QAC1B,QAAQ,EAAE,QAAQ;QAClB,OAAO,EAAE,4DAA4D;KACtE;IACD;QACE,IAAI,EAAE,mBAAmB;QACzB,QAAQ,EAAE,mBAAmB;QAC7B,OAAO,EAAE,mBAAmB;QAC5B,QAAQ,EAAE,OAAO;QACjB,OAAO,EAAE,mDAAmD;KAC7D;IACD;QACE,IAAI,EAAE,YAAY;QAClB,QAAQ,EAAE,YAAY;QACtB,OAAO,EAAE,qEAAqE;QAC9E,QAAQ,EAAE,WAAW;QACrB,OAAO,EAAE,gCAAgC;KAC1C;IACD;QACE,IAAI,EAAE,oBAAoB;QAC1B,QAAQ,EAAE,oBAAoB;QAC9B,OAAO,EACL,2FAA2F;QAC7F,QAAQ,EAAE,MAAM;QAChB,OAAO,EAAE,2BAA2B;KACrC;CACF,CAAC;AAEF,4EAA4E;AAC5E,qEAAqE;AACrE,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,CAAC,iBAAiB,EAAE,mBAAmB,CAAC,CAAC,CAAC;AAEtE,SAAS,YAAY,CAAC,MAAc;IAClC,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;IAC1B,OAAO,GAAG,CAAC,CAAC,QAAQ,KAAK,CAAC,CAAC,IAAI,aAAa,CAAC;AAC/C,CAAC;AAeD,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,KAAkC;IACzE,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACxC,GAAG,CAAC,KAAK,CAAC,4BAA4B,EAAE,EAAE,GAAG,EAAE,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC;IAE7D,MAAM,SAAS,GAAG,YAAY,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IAC3C,IAAI,UAAU,GAAkB,IAAI,CAAC;IACrC,IAAI,YAAY,GAAkB,IAAI,CAAC;IACvC,IAAI,WAAW,GAAkB,IAAI,CAAC;IACtC,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,MAAM,SAAS,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC,CAAC;QAC5D,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC;QACxB,UAAU,GAAG,CAAC,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;IAC5C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,WAAW,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAC/D,UAAU,GAAG,EAAE,CAAC;IAClB,CAAC;IAED,MAAM,MAAM,GAAG,YAAY,CAAC,SAAS,EAAE,UAAU,IAAI,EAAE,CAAC,CAAC;IAEzD,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI;QACzB,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE;YACxB,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,CAC7B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,KAAK,CAAC,WAAW,EAAE,IAAI,CAAC,CAAC,QAAQ,KAAK,KAAK,CAC5E,CAAC;YACF,OAAO,CACL,KAAK,IAAI;gBACP,IAAI,EAAE,KAAK;gBACX,QAAQ,EAAE,KAAK;gBACf,OAAO,EAAE,KAAK;gBACd,QAAQ,EAAE,SAAS;gBACnB,OAAO,EAAE,mBAAmB;aAC7B,CACF,CAAC;QACJ,CAAC,CAAC;QACJ,CAAC,CAAC,YAAY,CAAC;IAEjB,MAAM,OAAO,GAAgB,MAAM,OAAO,CAAC,GAAG,CAC5C,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,EAAsB,EAAE;QAC5C,IAAI,OAAO,GAAwB,SAAS,CAAC;QAC7C,IAAI,CAAC;YACH,MAAM,CAAC,GAAG,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,QAAQ,CAAC,CAAC;YACrD,IAAI,OAAO,CAAC,KAAK,SAAS;gBAAE,OAAO,GAAG,CAAC,CAAC;QAC1C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,GAAG,SAAS,CAAC;QACtB,CAAC;QACD,MAAM,IAAI,GAAG,CAAC,GAAG,EAAE;YACjB,IAAI,CAAC;gBACH,wEAAwE;gBACxE,oEAAoE;gBACpE,6DAA6D;gBAC7D,IAAI,CAAC,UAAU;oBAAE,OAAO,IAAI,CAAC;gBAC7B,MAAM,EAAE,GAAG,IAAI,MAAM,CACnB,yBAAyB,GAAG,CAAC,QAAQ,sCAAsC,EAC3E,GAAG,CACJ,CAAC;gBACF,MAAM,CAAC,GAAG,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;gBAC/B,IAAI,CAAC,CAAC;oBAAE,OAAO,IAAI,CAAC;gBACpB,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC;qBACf,KAAK,CAAC,IAAI,CAAC;qBACX,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;qBAC1B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,iCAAiC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;qBAC9D,IAAI,CAAC,KAAK,CAAC,CAAC;gBACf,OAAO,KAAK,IAAI,IAAI,CAAC;YACvB,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC,CAAC,EAAE,CAAC;QAEL,MAAM,YAAY,GAAG,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC/C,IAAI,YAAY,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,CAAC;YAC1C,MAAM,OAAO,GACX,OAAO,KAAK,IAAI;gBACd,CAAC,CAAC,YAAY;oBACZ,CAAC,CAAC,qBAAqB;oBACvB,CAAC,CAAC,SAAS;gBACb,CAAC,CAAC,OAAO,KAAK,KAAK;oBACjB,CAAC,CAAC,YAAY;wBACZ,CAAC,CAAC,qBAAqB;wBACvB,CAAC,CAAC,SAAS;oBACb,CAAC,CAAC,SAAS,CAAC;YAClB,OAAO;gBACL,IAAI,EAAE,GAAG,CAAC,IAAI;gBACd,QAAQ,EAAE,GAAG,CAAC,QAAQ;gBACtB,QAAQ,EAAE,GAAG,CAAC,QAAQ;gBACtB,OAAO,EAAE,GAAG,CAAC,OAAO;gBACpB,cAAc,EAAE,OAAO;gBACvB,WAAW,EAAE,IAAI;gBACjB,YAAY,EAAE,IAAI;gBAClB,QAAQ,EAAE,IAAI;gBACd,WAAW,EAAE,IAAI;gBACjB,OAAO;aACR,CAAC;QACJ,CAAC;QAED,eAAe;QACf,IAAI,MAAM,GAAkB,IAAI,CAAC;QACjC,IAAI,GAAG,GAAkB,IAAI,CAAC;QAC9B,IAAI,CAAC;YACH,MAAM,CAAC,GAAG,MAAM,SAAS,CAAC,MAAM,CAAC,GAAG,EAAE;gBACpC,MAAM,EAAE,KAAK;gBACb,OAAO,EAAE,EAAE,YAAY,EAAE,GAAG,CAAC,OAAO,EAAE;gBACtC,SAAS,EAAE,MAAM;aAClB,CAAC,CAAC;YACH,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC;QACpB,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,GAAG,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QACnD,CAAC;QACD,MAAM,OAAO,GAAG,MAAM,KAAK,IAAI,IAAI,MAAM,GAAG,GAAG,CAAC;QAChD,MAAM,OAAO,GACX,OAAO,KAAK,KAAK;YACf,CAAC,CAAC,SAAS;YACX,CAAC,CAAC,OAAO;gBACP,CAAC,CAAC,SAAS;gBACX,CAAC,CAAC,OAAO,KAAK,IAAI;oBAChB,CAAC,CAAC,SAAS;oBACX,CAAC,CAAC,SAAS,CAAC;QACpB,OAAO;YACL,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,QAAQ,EAAE,GAAG,CAAC,QAAQ;YACtB,QAAQ,EAAE,GAAG,CAAC,QAAQ;YACtB,OAAO,EAAE,GAAG,CAAC,OAAO;YACpB,cAAc,EAAE,OAAO;YACvB,WAAW,EAAE,IAAI;YACjB,YAAY,EAAE,MAAM;YACpB,QAAQ,EAAE,OAAO;YACjB,WAAW,EAAE,GAAG;YAChB,OAAO;SACR,CAAC;IACJ,CAAC,CAAC,CACH,CAAC;IAEF,MAAM,OAAO,GAAG;QACd,KAAK,EAAE,OAAO,CAAC,MAAM;QACrB,OAAO,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,SAAS,IAAI,CAAC,CAAC,OAAO,KAAK,qBAAqB,CAAC,CAAC,MAAM;QACrG,OAAO,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,SAAS,IAAI,CAAC,CAAC,OAAO,KAAK,qBAAqB,CAAC,CAAC,MAAM;QACrG,OAAO,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,MAAM;KAC/D,CAAC;IAEF,OAAO;QACL,GAAG,EAAE,MAAM,CAAC,GAAG;QACf,UAAU,EAAE,SAAS;QACrB,aAAa,EAAE,YAAY;QAC3B,cAAc,EAAE,CAAC,CAAC,CAAC,UAAU,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC;QACvD,YAAY,EAAE,WAAW;QACzB,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACpC,IAAI,EAAE,OAAO;QACb,OAAO;QACP,IAAI,EACF,0OAA0O;KAC7O,CAAC;AACJ,CAAC"}
|
|
@@ -2,19 +2,19 @@ import { z } from "zod";
|
|
|
2
2
|
export declare const runPanelInputSchema: {
|
|
3
3
|
name: z.ZodString;
|
|
4
4
|
domain: z.ZodOptional<z.ZodString>;
|
|
5
|
-
engine: z.ZodDefault<z.ZodEnum<["perplexity", "claude", "openai", "gemini", "bing", "brave", "auto"]>>;
|
|
5
|
+
engine: z.ZodDefault<z.ZodEnum<["perplexity", "claude", "openai", "gemini", "bing", "brave", "google_ai_mode", "auto"]>>;
|
|
6
6
|
};
|
|
7
7
|
declare const inputSchema: z.ZodObject<{
|
|
8
8
|
name: z.ZodString;
|
|
9
9
|
domain: z.ZodOptional<z.ZodString>;
|
|
10
|
-
engine: z.ZodDefault<z.ZodEnum<["perplexity", "claude", "openai", "gemini", "bing", "brave", "auto"]>>;
|
|
10
|
+
engine: z.ZodDefault<z.ZodEnum<["perplexity", "claude", "openai", "gemini", "bing", "brave", "google_ai_mode", "auto"]>>;
|
|
11
11
|
}, "strip", z.ZodTypeAny, {
|
|
12
|
-
engine: "perplexity" | "claude" | "openai" | "gemini" | "bing" | "brave" | "auto";
|
|
12
|
+
engine: "perplexity" | "claude" | "openai" | "gemini" | "bing" | "brave" | "google_ai_mode" | "auto";
|
|
13
13
|
name: string;
|
|
14
14
|
domain?: string | undefined;
|
|
15
15
|
}, {
|
|
16
16
|
name: string;
|
|
17
|
-
engine?: "perplexity" | "claude" | "openai" | "gemini" | "bing" | "brave" | "auto" | undefined;
|
|
17
|
+
engine?: "perplexity" | "claude" | "openai" | "gemini" | "bing" | "brave" | "google_ai_mode" | "auto" | undefined;
|
|
18
18
|
domain?: string | undefined;
|
|
19
19
|
}>;
|
|
20
20
|
export declare function runPanel(input: z.infer<typeof inputSchema>): Promise<{
|
|
@@ -26,7 +26,7 @@ export declare function runPanel(input: z.infer<typeof inputSchema>): Promise<{
|
|
|
26
26
|
snapshot: {
|
|
27
27
|
panel: string;
|
|
28
28
|
domain: string;
|
|
29
|
-
engine: "perplexity" | "claude" | "openai" | "gemini" | "bing" | "brave" | "auto";
|
|
29
|
+
engine: "perplexity" | "claude" | "openai" | "gemini" | "bing" | "brave" | "google_ai_mode" | "auto";
|
|
30
30
|
taken_at: string;
|
|
31
31
|
per_query: {
|
|
32
32
|
query: string;
|
package/dist/tools/run-panel.js
CHANGED
|
@@ -11,7 +11,7 @@ export const runPanelInputSchema = {
|
|
|
11
11
|
.optional()
|
|
12
12
|
.describe("Override the panel's default domain for this run."),
|
|
13
13
|
engine: z
|
|
14
|
-
.enum(["perplexity", "claude", "openai", "gemini", "bing", "brave", "auto"])
|
|
14
|
+
.enum(["perplexity", "claude", "openai", "gemini", "bing", "brave", "google_ai_mode", "auto"])
|
|
15
15
|
.default("auto")
|
|
16
16
|
.describe("AI engine to query."),
|
|
17
17
|
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run-panel.js","sourceRoot":"","sources":["../../src/tools/run-panel.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAC3C,OAAO,EAAE,SAAS,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAE7D,MAAM,CAAC,MAAM,mBAAmB,GAAG;IACjC,IAAI,EAAE,CAAC;SACJ,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,QAAQ,CAAC,gDAAgD,CAAC;IAC7D,MAAM,EAAE,CAAC;SACN,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CAAC,mDAAmD,CAAC;IAChE,MAAM,EAAE,CAAC;SACN,IAAI,CAAC,CAAC,YAAY,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"run-panel.js","sourceRoot":"","sources":["../../src/tools/run-panel.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAC3C,OAAO,EAAE,SAAS,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAE7D,MAAM,CAAC,MAAM,mBAAmB,GAAG;IACjC,IAAI,EAAE,CAAC;SACJ,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,QAAQ,CAAC,gDAAgD,CAAC;IAC7D,MAAM,EAAE,CAAC;SACN,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CAAC,mDAAmD,CAAC;IAChE,MAAM,EAAE,CAAC;SACN,IAAI,CAAC,CAAC,YAAY,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,gBAAgB,EAAE,MAAM,CAAC,CAAC;SAC7F,OAAO,CAAC,MAAM,CAAC;SACf,QAAQ,CAAC,qBAAqB,CAAC;CACnC,CAAC;AAEF,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAC;AAElD,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,KAAkC;IAC/D,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACxC,MAAM,KAAK,GAAG,MAAM,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAC3C,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,EAAE,KAAK,EAAE,UAAU,MAAM,CAAC,IAAI,aAAa,EAAE,CAAC;IACvD,CAAC;IACD,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,KAAK,CAAC,MAAM,CAAC;IAC7C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO;YACL,KAAK,EACH,iFAAiF;SACpF,CAAC;IACJ,CAAC;IAED,MAAM,KAAK,GAAG,EAAE,CAAC;IACjB,MAAM,MAAM,GAAe,EAAE,CAAC;IAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,KAAK,EAAE,CAAC;QACrD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC;IACjD,CAAC;IAED,MAAM,QAAQ,GAAqF,EAAE,CAAC;IACtG,IAAI,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC;IAC/B,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC3C,KAAK,MAAM,OAAO,IAAI,MAAM,EAAE,CAAC;QAC7B,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QACvE,UAAU,GAAG,GAAG,CAAC,MAAM,CAAC;QACxB,QAAQ,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;IAChC,CAAC;IAED,MAAM,YAAY,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;IAC5D,MAAM,QAAQ,GAAG;QACf,KAAK,EAAE,KAAK,CAAC,IAAI;QACjB,MAAM;QACN,MAAM,EAAE,UAAU;QAClB,QAAQ,EAAE,SAAS;QACnB,SAAS,EAAE,QAAQ;QACnB,OAAO,EAAE;YACP,aAAa,EAAE,QAAQ,CAAC,MAAM;YAC9B,aAAa,EAAE,YAAY;YAC3B,aAAa,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;SACxE;KACF,CAAC;IACF,MAAM,IAAI,GAAG,MAAM,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC5C,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;AACtC,CAAC"}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
export declare const sitemapCitationMapInputSchema: {
|
|
3
|
+
sitemap_url: z.ZodString;
|
|
4
|
+
domain: z.ZodOptional<z.ZodString>;
|
|
5
|
+
since: z.ZodOptional<z.ZodString>;
|
|
6
|
+
limit: z.ZodDefault<z.ZodNumber>;
|
|
7
|
+
};
|
|
8
|
+
declare const inputSchema: z.ZodObject<{
|
|
9
|
+
sitemap_url: z.ZodString;
|
|
10
|
+
domain: z.ZodOptional<z.ZodString>;
|
|
11
|
+
since: z.ZodOptional<z.ZodString>;
|
|
12
|
+
limit: z.ZodDefault<z.ZodNumber>;
|
|
13
|
+
}, "strip", z.ZodTypeAny, {
|
|
14
|
+
limit: number;
|
|
15
|
+
sitemap_url: string;
|
|
16
|
+
domain?: string | undefined;
|
|
17
|
+
since?: string | undefined;
|
|
18
|
+
}, {
|
|
19
|
+
sitemap_url: string;
|
|
20
|
+
domain?: string | undefined;
|
|
21
|
+
since?: string | undefined;
|
|
22
|
+
limit?: number | undefined;
|
|
23
|
+
}>;
|
|
24
|
+
export declare function sitemapCitationMap(input: z.infer<typeof inputSchema>): Promise<{
|
|
25
|
+
sitemap_url: string;
|
|
26
|
+
total_urls: number;
|
|
27
|
+
mapped: number;
|
|
28
|
+
unmapped: number;
|
|
29
|
+
message: string;
|
|
30
|
+
domain?: undefined;
|
|
31
|
+
since?: undefined;
|
|
32
|
+
fetched_at?: undefined;
|
|
33
|
+
coverage_pct?: undefined;
|
|
34
|
+
citations_in_cache?: undefined;
|
|
35
|
+
mapped_urls?: undefined;
|
|
36
|
+
unmapped_urls?: undefined;
|
|
37
|
+
note?: undefined;
|
|
38
|
+
} | {
|
|
39
|
+
sitemap_url: string;
|
|
40
|
+
domain: string;
|
|
41
|
+
total_urls: number;
|
|
42
|
+
mapped: number;
|
|
43
|
+
unmapped: number;
|
|
44
|
+
message: string;
|
|
45
|
+
since?: undefined;
|
|
46
|
+
fetched_at?: undefined;
|
|
47
|
+
coverage_pct?: undefined;
|
|
48
|
+
citations_in_cache?: undefined;
|
|
49
|
+
mapped_urls?: undefined;
|
|
50
|
+
unmapped_urls?: undefined;
|
|
51
|
+
note?: undefined;
|
|
52
|
+
} | {
|
|
53
|
+
sitemap_url: string;
|
|
54
|
+
domain: string;
|
|
55
|
+
since: string | undefined;
|
|
56
|
+
fetched_at: string;
|
|
57
|
+
total_urls: number;
|
|
58
|
+
mapped: number;
|
|
59
|
+
unmapped: number;
|
|
60
|
+
coverage_pct: number;
|
|
61
|
+
citations_in_cache: number;
|
|
62
|
+
mapped_urls: {
|
|
63
|
+
url: string;
|
|
64
|
+
citation_count: number;
|
|
65
|
+
unique_queries: number;
|
|
66
|
+
engines: string[];
|
|
67
|
+
last_seen: string;
|
|
68
|
+
sample_queries: string[];
|
|
69
|
+
}[];
|
|
70
|
+
unmapped_urls: {
|
|
71
|
+
url: string;
|
|
72
|
+
}[];
|
|
73
|
+
note: string;
|
|
74
|
+
message?: undefined;
|
|
75
|
+
}>;
|
|
76
|
+
export {};
|
|
77
|
+
//# sourceMappingURL=sitemap-citation-map.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sitemap-citation-map.d.ts","sourceRoot":"","sources":["../../src/tools/sitemap-citation-map.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAMxB,eAAO,MAAM,6BAA6B;;;;;CAuBzC,CAAC;AAEF,QAAA,MAAM,WAAW;;;;;;;;;;;;;;;EAA0C,CAAC;AAgC5D,wBAAsB,kBAAkB,CAAC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,WAAW,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;aAsElE,MAAM;wBACK,MAAM;wBACN,MAAM;iBACb,MAAM,EAAE;mBACN,MAAM;wBACD,MAAM,EAAE;;;aAEA,MAAM;;;;GA4CjC"}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import * as cheerio from "cheerio";
|
|
3
|
+
import { fetchText } from "../lib/fetch.js";
|
|
4
|
+
import { citedForDomain } from "../lib/cache.js";
|
|
5
|
+
import { log } from "../lib/log.js";
|
|
6
|
+
export const sitemapCitationMapInputSchema = {
|
|
7
|
+
sitemap_url: z
|
|
8
|
+
.string()
|
|
9
|
+
.url()
|
|
10
|
+
.describe("URL of sitemap.xml (or a sitemap index). Nested sitemaps are followed."),
|
|
11
|
+
domain: z
|
|
12
|
+
.string()
|
|
13
|
+
.min(1)
|
|
14
|
+
.optional()
|
|
15
|
+
.describe("Domain to look up citations for. If omitted, inferred from the sitemap host."),
|
|
16
|
+
since: z
|
|
17
|
+
.string()
|
|
18
|
+
.optional()
|
|
19
|
+
.describe("ISO date floor; only count citations recorded on or after this date."),
|
|
20
|
+
limit: z
|
|
21
|
+
.number()
|
|
22
|
+
.int()
|
|
23
|
+
.min(1)
|
|
24
|
+
.max(5000)
|
|
25
|
+
.default(500)
|
|
26
|
+
.describe("Max sitemap URLs to consider."),
|
|
27
|
+
};
|
|
28
|
+
const inputSchema = z.object(sitemapCitationMapInputSchema);
|
|
29
|
+
async function parseSitemap(url, depth = 0) {
|
|
30
|
+
if (depth > 2)
|
|
31
|
+
return [];
|
|
32
|
+
const { text } = await fetchText(url, { timeoutMs: 15_000 });
|
|
33
|
+
const $ = cheerio.load(text, { xmlMode: true });
|
|
34
|
+
const childSitemaps = $("sitemap > loc")
|
|
35
|
+
.toArray()
|
|
36
|
+
.map((el) => $(el).text().trim())
|
|
37
|
+
.filter(Boolean);
|
|
38
|
+
if (childSitemaps.length > 0) {
|
|
39
|
+
const nested = await Promise.all(childSitemaps.map((s) => parseSitemap(s, depth + 1)));
|
|
40
|
+
return nested.flat();
|
|
41
|
+
}
|
|
42
|
+
return $("url > loc")
|
|
43
|
+
.toArray()
|
|
44
|
+
.map((el) => $(el).text().trim())
|
|
45
|
+
.filter(Boolean);
|
|
46
|
+
}
|
|
47
|
+
function normalizeUrl(u) {
|
|
48
|
+
try {
|
|
49
|
+
const parsed = new URL(u);
|
|
50
|
+
parsed.hash = "";
|
|
51
|
+
let p = parsed.pathname.replace(/\/+$/, "") || "/";
|
|
52
|
+
parsed.pathname = p;
|
|
53
|
+
return parsed.toString().toLowerCase();
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
return u.toLowerCase();
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
export async function sitemapCitationMap(input) {
|
|
60
|
+
const parsed = inputSchema.parse(input);
|
|
61
|
+
log.debug("sitemap_citation_map start", { sitemap_url: parsed.sitemap_url });
|
|
62
|
+
const inferredDomain = parsed.domain ?? (() => {
|
|
63
|
+
try {
|
|
64
|
+
return new URL(parsed.sitemap_url).hostname;
|
|
65
|
+
}
|
|
66
|
+
catch {
|
|
67
|
+
return "";
|
|
68
|
+
}
|
|
69
|
+
})();
|
|
70
|
+
if (!inferredDomain) {
|
|
71
|
+
return {
|
|
72
|
+
sitemap_url: parsed.sitemap_url,
|
|
73
|
+
total_urls: 0,
|
|
74
|
+
mapped: 0,
|
|
75
|
+
unmapped: 0,
|
|
76
|
+
message: "could not infer domain from sitemap_url and no domain provided",
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
const urls = (await parseSitemap(parsed.sitemap_url)).slice(0, parsed.limit);
|
|
80
|
+
if (urls.length === 0) {
|
|
81
|
+
return {
|
|
82
|
+
sitemap_url: parsed.sitemap_url,
|
|
83
|
+
domain: inferredDomain,
|
|
84
|
+
total_urls: 0,
|
|
85
|
+
mapped: 0,
|
|
86
|
+
unmapped: 0,
|
|
87
|
+
message: "no URLs found in sitemap",
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
const citations = await citedForDomain(inferredDomain, parsed.since, undefined, 100_000);
|
|
91
|
+
// Bucket cached citations by normalized URL.
|
|
92
|
+
const byUrl = new Map();
|
|
93
|
+
for (const c of citations) {
|
|
94
|
+
const norm = normalizeUrl(c.url);
|
|
95
|
+
let entry = byUrl.get(norm);
|
|
96
|
+
if (!entry) {
|
|
97
|
+
entry = {
|
|
98
|
+
queries: new Set(),
|
|
99
|
+
engines: new Set(),
|
|
100
|
+
last_seen: c.fetched_at,
|
|
101
|
+
raw_urls: new Set(),
|
|
102
|
+
};
|
|
103
|
+
byUrl.set(norm, entry);
|
|
104
|
+
}
|
|
105
|
+
entry.queries.add(c.query);
|
|
106
|
+
entry.engines.add(c.engine);
|
|
107
|
+
entry.raw_urls.add(c.url);
|
|
108
|
+
if (c.fetched_at > entry.last_seen)
|
|
109
|
+
entry.last_seen = c.fetched_at;
|
|
110
|
+
}
|
|
111
|
+
const mapped = [];
|
|
112
|
+
const unmapped = [];
|
|
113
|
+
for (const u of urls) {
|
|
114
|
+
const norm = normalizeUrl(u);
|
|
115
|
+
const hit = byUrl.get(norm);
|
|
116
|
+
if (!hit) {
|
|
117
|
+
unmapped.push({ url: u });
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
const sampleQueries = [...hit.queries].slice(0, 5);
|
|
121
|
+
mapped.push({
|
|
122
|
+
url: u,
|
|
123
|
+
citation_count: citations.filter((c) => normalizeUrl(c.url) === norm).length,
|
|
124
|
+
unique_queries: hit.queries.size,
|
|
125
|
+
engines: [...hit.engines].sort(),
|
|
126
|
+
last_seen: hit.last_seen,
|
|
127
|
+
sample_queries: sampleQueries,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
mapped.sort((a, b) => b.citation_count - a.citation_count || b.unique_queries - a.unique_queries);
|
|
131
|
+
// Bucket sitemap URLs that aren't mapped, ranked by traffic potential isn't
|
|
132
|
+
// available here - emit deterministically (alphabetical, top first).
|
|
133
|
+
unmapped.sort((a, b) => a.url.localeCompare(b.url));
|
|
134
|
+
return {
|
|
135
|
+
sitemap_url: parsed.sitemap_url,
|
|
136
|
+
domain: inferredDomain,
|
|
137
|
+
since: parsed.since,
|
|
138
|
+
fetched_at: new Date().toISOString(),
|
|
139
|
+
total_urls: urls.length,
|
|
140
|
+
mapped: mapped.length,
|
|
141
|
+
unmapped: unmapped.length,
|
|
142
|
+
coverage_pct: urls.length === 0 ? 0 : Math.round((mapped.length / urls.length) * 1000) / 10,
|
|
143
|
+
citations_in_cache: citations.length,
|
|
144
|
+
mapped_urls: mapped,
|
|
145
|
+
unmapped_urls: unmapped.slice(0, 200),
|
|
146
|
+
note: "mapped_urls = sitemap URLs that appear in the citation cache. unmapped_urls = sitemap URLs the cache has never seen cited (next-action candidates). Coverage_pct = mapped / total_urls. Cache must be primed first via check_citations or run_panel.",
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
//# sourceMappingURL=sitemap-citation-map.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sitemap-citation-map.js","sourceRoot":"","sources":["../../src/tools/sitemap-citation-map.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAC5C,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,GAAG,EAAE,MAAM,eAAe,CAAC;AAEpC,MAAM,CAAC,MAAM,6BAA6B,GAAG;IAC3C,WAAW,EAAE,CAAC;SACX,MAAM,EAAE;SACR,GAAG,EAAE;SACL,QAAQ,CAAC,wEAAwE,CAAC;IACrF,MAAM,EAAE,CAAC;SACN,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,QAAQ,EAAE;SACV,QAAQ,CACP,8EAA8E,CAC/E;IACH,KAAK,EAAE,CAAC;SACL,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CAAC,sEAAsE,CAAC;IACnF,KAAK,EAAE,CAAC;SACL,MAAM,EAAE;SACR,GAAG,EAAE;SACL,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,IAAI,CAAC;SACT,OAAO,CAAC,GAAG,CAAC;SACZ,QAAQ,CAAC,+BAA+B,CAAC;CAC7C,CAAC;AAEF,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC,6BAA6B,CAAC,CAAC;AAE5D,KAAK,UAAU,YAAY,CAAC,GAAW,EAAE,KAAK,GAAG,CAAC;IAChD,IAAI,KAAK,GAAG,CAAC;QAAE,OAAO,EAAE,CAAC;IACzB,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,SAAS,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC,CAAC;IAC7D,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;IAChD,MAAM,aAAa,GAAG,CAAC,CAAC,eAAe,CAAC;SACrC,OAAO,EAAE;SACT,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;SAChC,MAAM,CAAC,OAAO,CAAC,CAAC;IACnB,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,CAAC,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACvF,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IACvB,CAAC;IACD,OAAO,CAAC,CAAC,WAAW,CAAC;SAClB,OAAO,EAAE;SACT,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;SAChC,MAAM,CAAC,OAAO,CAAC,CAAC;AACrB,CAAC;AAED,SAAS,YAAY,CAAC,CAAS;IAC7B,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;QAC1B,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;QACjB,IAAI,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,IAAI,GAAG,CAAC;QACnD,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAC;QACpB,OAAO,MAAM,CAAC,QAAQ,EAAE,CAAC,WAAW,EAAE,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;IACzB,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,KAAkC;IACzE,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACxC,GAAG,CAAC,KAAK,CAAC,4BAA4B,EAAE,EAAE,WAAW,EAAE,MAAM,CAAC,WAAW,EAAE,CAAC,CAAC;IAE7E,MAAM,cAAc,GAClB,MAAM,CAAC,MAAM,IAAI,CAAC,GAAG,EAAE;QACrB,IAAI,CAAC;YACH,OAAO,IAAI,GAAG,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,QAAQ,CAAC;QAC9C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC,CAAC,EAAE,CAAC;IACP,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO;YACL,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,UAAU,EAAE,CAAC;YACb,MAAM,EAAE,CAAC;YACT,QAAQ,EAAE,CAAC;YACX,OAAO,EAAE,gEAAgE;SAC1E,CAAC;IACJ,CAAC;IAED,MAAM,IAAI,GAAG,CAAC,MAAM,YAAY,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;IAC7E,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO;YACL,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,MAAM,EAAE,cAAc;YACtB,UAAU,EAAE,CAAC;YACb,MAAM,EAAE,CAAC;YACT,QAAQ,EAAE,CAAC;YACX,OAAO,EAAE,0BAA0B;SACpC,CAAC;IACJ,CAAC;IAED,MAAM,SAAS,GAAG,MAAM,cAAc,CACpC,cAAc,EACd,MAAM,CAAC,KAAK,EACZ,SAAS,EACT,OAAO,CACR,CAAC;IAEF,6CAA6C;IAC7C,MAAM,KAAK,GAAG,IAAI,GAAG,EAQlB,CAAC;IACJ,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QAC1B,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACjC,IAAI,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC5B,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,KAAK,GAAG;gBACN,OAAO,EAAE,IAAI,GAAG,EAAU;gBAC1B,OAAO,EAAE,IAAI,GAAG,EAAU;gBAC1B,SAAS,EAAE,CAAC,CAAC,UAAU;gBACvB,QAAQ,EAAE,IAAI,GAAG,EAAU;aAC5B,CAAC;YACF,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;QACzB,CAAC;QACD,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QAC3B,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAC5B,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC1B,IAAI,CAAC,CAAC,UAAU,GAAG,KAAK,CAAC,SAAS;YAAE,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,UAAU,CAAC;IACrE,CAAC;IAYD,MAAM,MAAM,GAAgB,EAAE,CAAC;IAC/B,MAAM,QAAQ,GAAkB,EAAE,CAAC;IAEnC,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACrB,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC5B,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,QAAQ,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;YAC1B,SAAS;QACX,CAAC;QACD,MAAM,aAAa,GAAG,CAAC,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACnD,MAAM,CAAC,IAAI,CAAC;YACV,GAAG,EAAE,CAAC;YACN,cAAc,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,CAAC,MAAM;YAC5E,cAAc,EAAE,GAAG,CAAC,OAAO,CAAC,IAAI;YAChC,OAAO,EAAE,CAAC,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE;YAChC,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,cAAc,EAAE,aAAa;SAC9B,CAAC,CAAC;IACL,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,GAAG,CAAC,CAAC,cAAc,IAAI,CAAC,CAAC,cAAc,GAAG,CAAC,CAAC,cAAc,CAAC,CAAC;IAElG,4EAA4E;IAC5E,qEAAqE;IACrE,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAEpD,OAAO;QACL,WAAW,EAAE,MAAM,CAAC,WAAW;QAC/B,MAAM,EAAE,cAAc;QACtB,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACpC,UAAU,EAAE,IAAI,CAAC,MAAM;QACvB,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,QAAQ,EAAE,QAAQ,CAAC,MAAM;QACzB,YAAY,EAAE,IAAI,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE;QAC3F,kBAAkB,EAAE,SAAS,CAAC,MAAM;QACpC,WAAW,EAAE,MAAM;QACnB,aAAa,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;QACrC,IAAI,EACF,sPAAsP;KACzP,CAAC;AACJ,CAAC"}
|
package/dist/types.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export type Engine = "perplexity" | "claude" | "openai" | "gemini" | "bing" | "brave" | "auto";
|
|
1
|
+
export type Engine = "perplexity" | "claude" | "openai" | "gemini" | "bing" | "brave" | "google_ai_mode" | "auto";
|
|
2
2
|
export type Citation = {
|
|
3
3
|
url: string;
|
|
4
4
|
title?: string;
|
|
@@ -19,7 +19,7 @@ export type AdapterResult = {
|
|
|
19
19
|
};
|
|
20
20
|
export type ToolError = {
|
|
21
21
|
type: "missing_key";
|
|
22
|
-
engine: Engine;
|
|
22
|
+
engine: Engine | "google_ai_mode";
|
|
23
23
|
env_var: string;
|
|
24
24
|
message: string;
|
|
25
25
|
} | {
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,MAAM,GACd,YAAY,GACZ,QAAQ,GACR,QAAQ,GACR,QAAQ,GACR,MAAM,GACN,OAAO,GACP,MAAM,CAAC;AAEX,MAAM,MAAM,QAAQ,GAAG;IACrB,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB,CAAC;AAEF,MAAM,MAAM,wBAAwB,GAAG;IACrC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,OAAO,CAAC;CACjB,CAAC;AAEF,MAAM,MAAM,aAAa,GAAG;IAC1B,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,SAAS,GACjB;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,MAAM,GACd,YAAY,GACZ,QAAQ,GACR,QAAQ,GACR,QAAQ,GACR,MAAM,GACN,OAAO,GACP,gBAAgB,GAChB,MAAM,CAAC;AAEX,MAAM,MAAM,QAAQ,GAAG;IACrB,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB,CAAC;AAEF,MAAM,MAAM,wBAAwB,GAAG;IACrC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,OAAO,CAAC;CACjB,CAAC;AAEF,MAAM,MAAM,aAAa,GAAG;IAC1B,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,SAAS,GACjB;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,MAAM,EAAE,MAAM,GAAG,gBAAgB,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GAC5F;IAAE,IAAI,EAAE,qBAAqB,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GAChD;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GACtE;IAAE,IAAI,EAAE,cAAc,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GACzD;IAAE,IAAI,EAAE,eAAe,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC"}
|