@pseolint/core 0.4.3 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. package/README.md +264 -169
  2. package/dist/ai/manifest/diff.d.ts +78 -0
  3. package/dist/ai/manifest/diff.d.ts.map +1 -0
  4. package/dist/ai/manifest/diff.js +139 -0
  5. package/dist/ai/manifest/diff.js.map +1 -0
  6. package/dist/ai/manifest/index.d.ts +18 -0
  7. package/dist/ai/manifest/index.d.ts.map +1 -0
  8. package/dist/ai/manifest/index.js +15 -0
  9. package/dist/ai/manifest/index.js.map +1 -0
  10. package/dist/ai/manifest/validate-manifest.d.ts +37 -0
  11. package/dist/ai/manifest/validate-manifest.d.ts.map +1 -0
  12. package/dist/ai/manifest/validate-manifest.js +67 -0
  13. package/dist/ai/manifest/validate-manifest.js.map +1 -0
  14. package/dist/ai/manifest/validators/domain-patches.d.ts +15 -0
  15. package/dist/ai/manifest/validators/domain-patches.d.ts.map +1 -0
  16. package/dist/ai/manifest/validators/domain-patches.js +110 -0
  17. package/dist/ai/manifest/validators/domain-patches.js.map +1 -0
  18. package/dist/ai/manifest/validators/index.d.ts +5 -0
  19. package/dist/ai/manifest/validators/index.d.ts.map +1 -0
  20. package/dist/ai/manifest/validators/index.js +4 -0
  21. package/dist/ai/manifest/validators/index.js.map +1 -0
  22. package/dist/ai/manifest/validators/page-changes.d.ts +36 -0
  23. package/dist/ai/manifest/validators/page-changes.d.ts.map +1 -0
  24. package/dist/ai/manifest/validators/page-changes.js +221 -0
  25. package/dist/ai/manifest/validators/page-changes.js.map +1 -0
  26. package/dist/ai/manifest/validators/types.d.ts +17 -0
  27. package/dist/ai/manifest/validators/types.d.ts.map +1 -0
  28. package/dist/ai/manifest/validators/types.js +5 -0
  29. package/dist/ai/manifest/validators/types.js.map +1 -0
  30. package/dist/ai/orchestrate.d.ts +74 -0
  31. package/dist/ai/orchestrate.d.ts.map +1 -0
  32. package/dist/ai/orchestrate.js +54 -0
  33. package/dist/ai/orchestrate.js.map +1 -0
  34. package/dist/ai/orchestrator/budget.d.ts +57 -0
  35. package/dist/ai/orchestrator/budget.d.ts.map +1 -0
  36. package/dist/ai/orchestrator/budget.js +114 -0
  37. package/dist/ai/orchestrator/budget.js.map +1 -0
  38. package/dist/ai/orchestrator/finish-tool.d.ts +568 -0
  39. package/dist/ai/orchestrator/finish-tool.d.ts.map +1 -0
  40. package/dist/ai/orchestrator/finish-tool.js +114 -0
  41. package/dist/ai/orchestrator/finish-tool.js.map +1 -0
  42. package/dist/ai/orchestrator/index.d.ts +25 -0
  43. package/dist/ai/orchestrator/index.d.ts.map +1 -0
  44. package/dist/ai/orchestrator/index.js +21 -0
  45. package/dist/ai/orchestrator/index.js.map +1 -0
  46. package/dist/ai/orchestrator/log.d.ts +24 -0
  47. package/dist/ai/orchestrator/log.d.ts.map +1 -0
  48. package/dist/ai/orchestrator/log.js +48 -0
  49. package/dist/ai/orchestrator/log.js.map +1 -0
  50. package/dist/ai/orchestrator/page-cache.d.ts +64 -0
  51. package/dist/ai/orchestrator/page-cache.d.ts.map +1 -0
  52. package/dist/ai/orchestrator/page-cache.js +127 -0
  53. package/dist/ai/orchestrator/page-cache.js.map +1 -0
  54. package/dist/ai/orchestrator/prompt.d.ts +16 -0
  55. package/dist/ai/orchestrator/prompt.d.ts.map +1 -0
  56. package/dist/ai/orchestrator/prompt.js +52 -0
  57. package/dist/ai/orchestrator/prompt.js.map +1 -0
  58. package/dist/ai/orchestrator/runner.d.ts +65 -0
  59. package/dist/ai/orchestrator/runner.d.ts.map +1 -0
  60. package/dist/ai/orchestrator/runner.js +223 -0
  61. package/dist/ai/orchestrator/runner.js.map +1 -0
  62. package/dist/ai/orchestrator/session.d.ts +44 -0
  63. package/dist/ai/orchestrator/session.d.ts.map +1 -0
  64. package/dist/ai/orchestrator/session.js +64 -0
  65. package/dist/ai/orchestrator/session.js.map +1 -0
  66. package/dist/ai/orchestrator/types.d.ts +99 -0
  67. package/dist/ai/orchestrator/types.d.ts.map +1 -0
  68. package/dist/ai/orchestrator/types.js +8 -0
  69. package/dist/ai/orchestrator/types.js.map +1 -0
  70. package/dist/ai/probes/cache.d.ts +12 -0
  71. package/dist/ai/probes/cache.d.ts.map +1 -0
  72. package/dist/ai/probes/cache.js +46 -0
  73. package/dist/ai/probes/cache.js.map +1 -0
  74. package/dist/ai/tools/ask-ai-engine.d.ts +77 -0
  75. package/dist/ai/tools/ask-ai-engine.d.ts.map +1 -0
  76. package/dist/ai/tools/ask-ai-engine.js +253 -0
  77. package/dist/ai/tools/ask-ai-engine.js.map +1 -0
  78. package/dist/ai/tools/check-domain-crawler-access.d.ts +71 -0
  79. package/dist/ai/tools/check-domain-crawler-access.d.ts.map +1 -0
  80. package/dist/ai/tools/check-domain-crawler-access.js +76 -0
  81. package/dist/ai/tools/check-domain-crawler-access.js.map +1 -0
  82. package/dist/ai/tools/check-domain-llms-txt.d.ts +70 -0
  83. package/dist/ai/tools/check-domain-llms-txt.d.ts.map +1 -0
  84. package/dist/ai/tools/check-domain-llms-txt.js +75 -0
  85. package/dist/ai/tools/check-domain-llms-txt.js.map +1 -0
  86. package/dist/ai/tools/check-indexability.d.ts +58 -0
  87. package/dist/ai/tools/check-indexability.d.ts.map +1 -0
  88. package/dist/ai/tools/check-indexability.js +64 -0
  89. package/dist/ai/tools/check-indexability.js.map +1 -0
  90. package/dist/ai/tools/check-robots.d.ts +68 -0
  91. package/dist/ai/tools/check-robots.d.ts.map +1 -0
  92. package/dist/ai/tools/check-robots.js +90 -0
  93. package/dist/ai/tools/check-robots.js.map +1 -0
  94. package/dist/ai/tools/check-rule-answer-first.d.ts +54 -0
  95. package/dist/ai/tools/check-rule-answer-first.d.ts.map +1 -0
  96. package/dist/ai/tools/check-rule-answer-first.js +50 -0
  97. package/dist/ai/tools/check-rule-answer-first.js.map +1 -0
  98. package/dist/ai/tools/check-rule-canonical-consistency.d.ts +66 -0
  99. package/dist/ai/tools/check-rule-canonical-consistency.d.ts.map +1 -0
  100. package/dist/ai/tools/check-rule-canonical-consistency.js +51 -0
  101. package/dist/ai/tools/check-rule-canonical-consistency.js.map +1 -0
  102. package/dist/ai/tools/check-rule-citable-facts.d.ts +58 -0
  103. package/dist/ai/tools/check-rule-citable-facts.d.ts.map +1 -0
  104. package/dist/ai/tools/check-rule-citable-facts.js +41 -0
  105. package/dist/ai/tools/check-rule-citable-facts.js.map +1 -0
  106. package/dist/ai/tools/check-rule-content-modularity.d.ts +58 -0
  107. package/dist/ai/tools/check-rule-content-modularity.d.ts.map +1 -0
  108. package/dist/ai/tools/check-rule-content-modularity.js +45 -0
  109. package/dist/ai/tools/check-rule-content-modularity.js.map +1 -0
  110. package/dist/ai/tools/check-rule-faq-coverage.d.ts +54 -0
  111. package/dist/ai/tools/check-rule-faq-coverage.d.ts.map +1 -0
  112. package/dist/ai/tools/check-rule-faq-coverage.js +39 -0
  113. package/dist/ai/tools/check-rule-faq-coverage.js.map +1 -0
  114. package/dist/ai/tools/check-rule-freshness-signals.d.ts +54 -0
  115. package/dist/ai/tools/check-rule-freshness-signals.d.ts.map +1 -0
  116. package/dist/ai/tools/check-rule-freshness-signals.js +45 -0
  117. package/dist/ai/tools/check-rule-freshness-signals.js.map +1 -0
  118. package/dist/ai/tools/check-rule-json-ld-valid.d.ts +54 -0
  119. package/dist/ai/tools/check-rule-json-ld-valid.d.ts.map +1 -0
  120. package/dist/ai/tools/check-rule-json-ld-valid.js +44 -0
  121. package/dist/ai/tools/check-rule-json-ld-valid.js.map +1 -0
  122. package/dist/ai/tools/check-rule-missing-author.d.ts +54 -0
  123. package/dist/ai/tools/check-rule-missing-author.d.ts.map +1 -0
  124. package/dist/ai/tools/check-rule-missing-author.js +45 -0
  125. package/dist/ai/tools/check-rule-missing-author.js.map +1 -0
  126. package/dist/ai/tools/check-rule-near-duplicate.d.ts +82 -0
  127. package/dist/ai/tools/check-rule-near-duplicate.d.ts.map +1 -0
  128. package/dist/ai/tools/check-rule-near-duplicate.js +63 -0
  129. package/dist/ai/tools/check-rule-near-duplicate.js.map +1 -0
  130. package/dist/ai/tools/check-rule-required-fields.d.ts +50 -0
  131. package/dist/ai/tools/check-rule-required-fields.d.ts.map +1 -0
  132. package/dist/ai/tools/check-rule-required-fields.js +38 -0
  133. package/dist/ai/tools/check-rule-required-fields.js.map +1 -0
  134. package/dist/ai/tools/check-rule-schema-consistency.d.ts +54 -0
  135. package/dist/ai/tools/check-rule-schema-consistency.d.ts.map +1 -0
  136. package/dist/ai/tools/check-rule-schema-consistency.js +44 -0
  137. package/dist/ai/tools/check-rule-schema-consistency.js.map +1 -0
  138. package/dist/ai/tools/check-rule-summary-bait.d.ts +54 -0
  139. package/dist/ai/tools/check-rule-summary-bait.d.ts.map +1 -0
  140. package/dist/ai/tools/check-rule-summary-bait.js +39 -0
  141. package/dist/ai/tools/check-rule-summary-bait.js.map +1 -0
  142. package/dist/ai/tools/check-rule-thin-content.d.ts +66 -0
  143. package/dist/ai/tools/check-rule-thin-content.d.ts.map +1 -0
  144. package/dist/ai/tools/check-rule-thin-content.js +58 -0
  145. package/dist/ai/tools/check-rule-thin-content.js.map +1 -0
  146. package/dist/ai/tools/detect-templates.d.ts +60 -0
  147. package/dist/ai/tools/detect-templates.d.ts.map +1 -0
  148. package/dist/ai/tools/detect-templates.js +43 -0
  149. package/dist/ai/tools/detect-templates.js.map +1 -0
  150. package/dist/ai/tools/fetch-page.d.ts +70 -0
  151. package/dist/ai/tools/fetch-page.d.ts.map +1 -0
  152. package/dist/ai/tools/fetch-page.js +93 -0
  153. package/dist/ai/tools/fetch-page.js.map +1 -0
  154. package/dist/ai/tools/fetch-sitemap.d.ts +60 -0
  155. package/dist/ai/tools/fetch-sitemap.d.ts.map +1 -0
  156. package/dist/ai/tools/fetch-sitemap.js +116 -0
  157. package/dist/ai/tools/fetch-sitemap.js.map +1 -0
  158. package/dist/ai/tools/index.d.ts +1555 -0
  159. package/dist/ai/tools/index.d.ts.map +1 -0
  160. package/dist/ai/tools/index.js +119 -0
  161. package/dist/ai/tools/index.js.map +1 -0
  162. package/dist/ai/tools/parse-page.d.ts +94 -0
  163. package/dist/ai/tools/parse-page.d.ts.map +1 -0
  164. package/dist/ai/tools/parse-page.js +108 -0
  165. package/dist/ai/tools/parse-page.js.map +1 -0
  166. package/dist/ai/tools/query-serp.d.ts +113 -0
  167. package/dist/ai/tools/query-serp.d.ts.map +1 -0
  168. package/dist/ai/tools/query-serp.js +131 -0
  169. package/dist/ai/tools/query-serp.js.map +1 -0
  170. package/dist/ai/tools/sample-template.d.ts +67 -0
  171. package/dist/ai/tools/sample-template.d.ts.map +1 -0
  172. package/dist/ai/tools/sample-template.js +75 -0
  173. package/dist/ai/tools/sample-template.js.map +1 -0
  174. package/dist/ai/tools/types.d.ts +73 -0
  175. package/dist/ai/tools/types.d.ts.map +1 -0
  176. package/dist/ai/tools/types.js +64 -0
  177. package/dist/ai/tools/types.js.map +1 -0
  178. package/dist/ai/tools/validate-jsonld.d.ts +62 -0
  179. package/dist/ai/tools/validate-jsonld.d.ts.map +1 -0
  180. package/dist/ai/tools/validate-jsonld.js +84 -0
  181. package/dist/ai/tools/validate-jsonld.js.map +1 -0
  182. package/dist/auditor.d.ts +4 -0
  183. package/dist/auditor.d.ts.map +1 -1
  184. package/dist/auditor.js +629 -64
  185. package/dist/auditor.js.map +1 -1
  186. package/dist/backpressure.d.ts.map +1 -1
  187. package/dist/backpressure.js +10 -3
  188. package/dist/backpressure.js.map +1 -1
  189. package/dist/enrich-findings.d.ts.map +1 -1
  190. package/dist/enrich-findings.js +15 -1
  191. package/dist/enrich-findings.js.map +1 -1
  192. package/dist/formatters/console.d.ts.map +1 -1
  193. package/dist/formatters/console.js +13 -0
  194. package/dist/formatters/console.js.map +1 -1
  195. package/dist/formatters/markdown.d.ts.map +1 -1
  196. package/dist/formatters/markdown.js +20 -2
  197. package/dist/formatters/markdown.js.map +1 -1
  198. package/dist/index.d.ts +12 -1
  199. package/dist/index.d.ts.map +1 -1
  200. package/dist/index.js +8 -0
  201. package/dist/index.js.map +1 -1
  202. package/dist/rule-references.d.ts.map +1 -1
  203. package/dist/rule-references.js +5 -0
  204. package/dist/rule-references.js.map +1 -1
  205. package/dist/rules/content/heading-structure.d.ts +21 -0
  206. package/dist/rules/content/heading-structure.d.ts.map +1 -0
  207. package/dist/rules/content/heading-structure.js +56 -0
  208. package/dist/rules/content/heading-structure.js.map +1 -0
  209. package/dist/rules/content/image-alt-text.d.ts +18 -0
  210. package/dist/rules/content/image-alt-text.d.ts.map +1 -0
  211. package/dist/rules/content/image-alt-text.js +77 -0
  212. package/dist/rules/content/image-alt-text.js.map +1 -0
  213. package/dist/rules/content/title-uniqueness.d.ts +18 -0
  214. package/dist/rules/content/title-uniqueness.d.ts.map +1 -0
  215. package/dist/rules/content/title-uniqueness.js +70 -0
  216. package/dist/rules/content/title-uniqueness.js.map +1 -0
  217. package/dist/rules/links/host-section-divergence.d.ts +3 -0
  218. package/dist/rules/links/host-section-divergence.d.ts.map +1 -0
  219. package/dist/rules/links/host-section-divergence.js +158 -0
  220. package/dist/rules/links/host-section-divergence.js.map +1 -0
  221. package/dist/rules/links/link-depth.d.ts +12 -1
  222. package/dist/rules/links/link-depth.d.ts.map +1 -1
  223. package/dist/rules/links/link-depth.js +25 -12
  224. package/dist/rules/links/link-depth.js.map +1 -1
  225. package/dist/rules/scope.d.ts.map +1 -1
  226. package/dist/rules/scope.js +5 -0
  227. package/dist/rules/scope.js.map +1 -1
  228. package/dist/rules/spam/doorway-pattern.d.ts.map +1 -1
  229. package/dist/rules/spam/doorway-pattern.js +27 -4
  230. package/dist/rules/spam/doorway-pattern.js.map +1 -1
  231. package/dist/rules/spam/publication-velocity.d.ts +1 -1
  232. package/dist/rules/spam/publication-velocity.d.ts.map +1 -1
  233. package/dist/rules/spam/publication-velocity.js +9 -4
  234. package/dist/rules/spam/publication-velocity.js.map +1 -1
  235. package/dist/rules/spam/template-coverage.js +1 -1
  236. package/dist/rules/spam/template-coverage.js.map +1 -1
  237. package/dist/rules/spam/template-diversity.js +1 -1
  238. package/dist/rules/spam/template-diversity.js.map +1 -1
  239. package/dist/rules/tech/hreflang-consistency.d.ts.map +1 -1
  240. package/dist/rules/tech/hreflang-consistency.js +33 -4
  241. package/dist/rules/tech/hreflang-consistency.js.map +1 -1
  242. package/dist/rules/tech/og-completeness.d.ts +11 -0
  243. package/dist/rules/tech/og-completeness.d.ts.map +1 -1
  244. package/dist/rules/tech/og-completeness.js +22 -23
  245. package/dist/rules/tech/og-completeness.js.map +1 -1
  246. package/dist/ruleset-version.d.ts +8 -0
  247. package/dist/ruleset-version.d.ts.map +1 -0
  248. package/dist/ruleset-version.js +8 -0
  249. package/dist/ruleset-version.js.map +1 -0
  250. package/dist/scrape-strategy.d.ts +42 -0
  251. package/dist/scrape-strategy.d.ts.map +1 -0
  252. package/dist/scrape-strategy.js +101 -0
  253. package/dist/scrape-strategy.js.map +1 -0
  254. package/dist/site-classifier.d.ts.map +1 -1
  255. package/dist/site-classifier.js +1 -0
  256. package/dist/site-classifier.js.map +1 -1
  257. package/dist/state.d.ts +36 -1
  258. package/dist/state.d.ts.map +1 -1
  259. package/dist/state.js +3 -1
  260. package/dist/state.js.map +1 -1
  261. package/dist/stratified-sample.d.ts +9 -1
  262. package/dist/stratified-sample.d.ts.map +1 -1
  263. package/dist/stratified-sample.js +23 -6
  264. package/dist/stratified-sample.js.map +1 -1
  265. package/dist/types.d.ts +135 -2
  266. package/dist/types.d.ts.map +1 -1
  267. package/dist/url-normalize.d.ts.map +1 -1
  268. package/dist/url-normalize.js +13 -1
  269. package/dist/url-normalize.js.map +1 -1
  270. package/package.json +90 -90
@@ -0,0 +1,93 @@
1
+ import { z } from "zod";
2
+ import { cachedFetch } from "../../cache.js";
3
+ import { currentPageCache } from "../orchestrator/page-cache.js";
4
+ import { validateTargetHost } from "../../ssrf-guard.js";
5
+ import { defineTool } from "./types.js";
6
+ const inputSchema = z.object({
7
+ url: z.string().url().describe("Absolute http(s) URL to fetch."),
8
+ timeoutMs: z
9
+ .number()
10
+ .int()
11
+ .positive()
12
+ .max(30_000)
13
+ .optional()
14
+ .describe("Per-request timeout. Default 10s, max 30s."),
15
+ cacheDir: z
16
+ .string()
17
+ .optional()
18
+ .describe("Directory to read/write cache entries. Omit to disable caching. Orchestrator typically passes a session-scoped temp dir."),
19
+ cacheTtlMs: z
20
+ .number()
21
+ .int()
22
+ .nonnegative()
23
+ .optional()
24
+ .describe("Cache TTL for entries without ETag/Last-Modified validators. Default 5 minutes."),
25
+ });
26
+ const outputSchema = z.object({
27
+ url: z.string().describe("Final URL after following up to 10 redirects."),
28
+ status: z.number().int(),
29
+ headers: z.record(z.string(), z.string()),
30
+ /**
31
+ * Reference to the cached page body. Pass this `pageId` into parse_page,
32
+ * check_rule_*, validate_jsonld, check_indexability, etc. — the HTML
33
+ * itself never travels through the LLM conversation, which keeps token
34
+ * consumption bounded as the orchestrator pulls more pages.
35
+ */
36
+ pageId: z.string(),
37
+ /** First 500 chars of the body for at-a-glance reasoning. The LLM should NOT base rule decisions on this; it's a sniff aid. */
38
+ bodyExcerpt: z.string(),
39
+ fromCache: z.boolean(),
40
+ bodyBytes: z.number().int().nonnegative(),
41
+ });
42
+ /**
43
+ * SSRF-guarded HTTP fetch with optional disk cache. Wraps the existing
44
+ * `cachedFetch` primitive plus `validateTargetHost` so private/reserved IPs
45
+ * are rejected on every redirect hop.
46
+ *
47
+ * Returned as a tool to the orchestrator — typically the first call the
48
+ * model makes when auditing a domain.
49
+ */
50
+ export const fetchPageTool = defineTool({
51
+ name: "fetch_page",
52
+ description: "Fetch the HTML for a single URL with SSRF protection and disk caching. Follows up to 10 redirects. Returns the final URL, status, headers, and a `pageId` reference to the cached body — pass that pageId into parse_page / check_rule_* / validate_jsonld / check_indexability instead of repassing HTML. The HTML itself never travels through tool inputs, keeping token consumption bounded. Errors (private IP, redirect loop, timeout) come back as tool errors — keep going with another URL.",
53
+ inputSchema,
54
+ outputSchema,
55
+ async execute({ url, timeoutMs = 10_000, cacheDir, cacheTtlMs = 300_000 }, ctx) {
56
+ const validateHop = async (hopUrl) => {
57
+ let host;
58
+ try {
59
+ host = new URL(hopUrl).hostname;
60
+ }
61
+ catch {
62
+ throw new Error(`fetch_page: invalid URL ${hopUrl}`);
63
+ }
64
+ await validateTargetHost(host);
65
+ };
66
+ const result = await cachedFetch(url, {
67
+ timeoutMs,
68
+ cache: cacheDir ? { dir: cacheDir, ttlMs: cacheTtlMs } : null,
69
+ validateHop,
70
+ signal: ctx?.signal,
71
+ });
72
+ const cache = currentPageCache();
73
+ if (!cache) {
74
+ throw new Error("fetch_page: no page cache in scope (orchestrator runner must wrap generateText in withPageCache)");
75
+ }
76
+ const pageId = cache.put({
77
+ url: result.url,
78
+ html: result.body,
79
+ status: result.status,
80
+ headers: result.headers,
81
+ });
82
+ return {
83
+ url: result.url,
84
+ status: result.status,
85
+ headers: result.headers,
86
+ pageId,
87
+ bodyExcerpt: result.body.slice(0, 500),
88
+ fromCache: result.fromCache,
89
+ bodyBytes: result.body.length,
90
+ };
91
+ },
92
+ });
93
+ //# sourceMappingURL=fetch-page.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-page.js","sourceRoot":"","sources":["../../../src/ai/tools/fetch-page.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AACjE,OAAO,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAC3B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,gCAAgC,CAAC;IAChE,SAAS,EAAE,CAAC;SACT,MAAM,EAAE;SACR,GAAG,EAAE;SACL,QAAQ,EAAE;SACV,GAAG,CAAC,MAAM,CAAC;SACX,QAAQ,EAAE;SACV,QAAQ,CAAC,4CAA4C,CAAC;IACzD,QAAQ,EAAE,CAAC;SACR,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CACP,0HAA0H,CAC3H;IACH,UAAU,EAAE,CAAC;SACV,MAAM,EAAE;SACR,GAAG,EAAE;SACL,WAAW,EAAE;SACb,QAAQ,EAAE;SACV,QAAQ,CAAC,iFAAiF,CAAC;CAC/F,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC;IAC5B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;IACzE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE;IACxB,OAAO,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC;IACzC;;;;;OAKG;IACH,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE;IAClB,+HAA+H;IAC/H,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE;IACvB,SAAS,EAAE,CAAC,CAAC,OAAO,EAAE;IACtB,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,WAAW,EAAE;CAC1C,CAAC,CAAC;AAEH;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,aAAa,GAAG,UAAU,CAAC;IACtC,IAAI,EAAE,YAAY;IAClB,WAAW,EACT,seAAse;IACxe,WAAW;IACX,YAAY;IACZ,KAAK,CAAC,OAAO,CAAC,EAAE,GAAG,EAAE,SAAS,GAAG,MAAM,EAAE,QAAQ,EAAE,UAAU,GAAG,OAAO,EAAE,EAAE,GAAG;QAC5E,MAAM,WAAW,GAAG,KAAK,EAAE,MAAc,EAAiB,EAAE;YAC1D,IAAI,IAAY,CAAC;YACjB,IAAI,CAAC;gBACH,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC;YAClC,CAAC;YAAC,MAAM,CAAC;gBACP,MAAM,IAAI,KAAK,CAAC,2BAA2B,MAAM,EAAE,CAAC,CAAC;YACvD,CAAC;YACD,MAAM,kBAAkB,CAAC,IAAI,CAAC,CAAC;QACjC,CAAC,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE;YACpC,SAAS;YACT,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC,IAAI;YAC7D,WAAW;YACX,MAAM,EAAE,GAAG,EAAE,MAAM;SACpB,CAAC,CAAC;QAEH,MAAM,KAAK,GAAG,gBAAgB,EAAE,CAAC;QACjC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,IAAI,KAAK,CACb,kGAAkG,CACnG,CAAC;QACJ,CAAC;QACD,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC;YACvB,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,OAAO,EAAE,MAAM,CAAC,OAAO;SACxB,CAAC,CAAC;QAEH,OAAO;YACL,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,MAAM;YACN,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;YACtC,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM;SAC9B,CAAC;IACJ,CAAC;CACF,CAAC,CAAC"}
@@ -0,0 +1,60 @@
1
+ import { z } from "zod";
2
+ /**
3
+ * Fetch + parse sitemap.xml, optionally recursing into a sitemap index. Caps
4
+ * the URL list at `maxUrls` so a 50K-URL sitemap doesn't blow up the model
5
+ * context. Reports `truncated: true` when the cap fires — the LLM should then
6
+ * call `sample_template` with the returned subset.
7
+ */
8
+ export declare const fetchSitemapTool: {
9
+ name: string;
10
+ description: string;
11
+ inputSchema: z.ZodType<{
12
+ sitemapUrl: string;
13
+ maxUrls?: number | undefined;
14
+ maxDepth?: number | undefined;
15
+ timeoutMs?: number | undefined;
16
+ }, unknown, z.core.$ZodTypeInternals<{
17
+ sitemapUrl: string;
18
+ maxUrls?: number | undefined;
19
+ maxDepth?: number | undefined;
20
+ timeoutMs?: number | undefined;
21
+ }, unknown>>;
22
+ outputSchema: z.ZodType<{
23
+ rootUrl: string;
24
+ urlCount: number;
25
+ urls: string[];
26
+ truncated: boolean;
27
+ childSitemaps: string[];
28
+ }, unknown, z.core.$ZodTypeInternals<{
29
+ rootUrl: string;
30
+ urlCount: number;
31
+ urls: string[];
32
+ truncated: boolean;
33
+ childSitemaps: string[];
34
+ }, unknown>>;
35
+ toAiTool(): import("ai").Tool<{
36
+ sitemapUrl: string;
37
+ maxUrls?: number | undefined;
38
+ maxDepth?: number | undefined;
39
+ timeoutMs?: number | undefined;
40
+ }, import("./types.js").ToolResult<{
41
+ rootUrl: string;
42
+ urlCount: number;
43
+ urls: string[];
44
+ truncated: boolean;
45
+ childSitemaps: string[];
46
+ }>>;
47
+ run(input: {
48
+ sitemapUrl: string;
49
+ maxUrls?: number | undefined;
50
+ maxDepth?: number | undefined;
51
+ timeoutMs?: number | undefined;
52
+ }, ctx?: import("./types.js").ToolExecuteContext): Promise<import("./types.js").ToolResult<{
53
+ rootUrl: string;
54
+ urlCount: number;
55
+ urls: string[];
56
+ truncated: boolean;
57
+ childSitemaps: string[];
58
+ }>>;
59
+ };
60
+ //# sourceMappingURL=fetch-sitemap.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-sitemap.d.ts","sourceRoot":"","sources":["../../../src/ai/tools/fetch-sitemap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAqExB;;;;;GAKG;AACH,eAAO,MAAM,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA4C3B,CAAC"}
@@ -0,0 +1,116 @@
1
+ import { z } from "zod";
2
+ import { cachedFetch } from "../../cache.js";
3
+ import { validateTargetHost } from "../../ssrf-guard.js";
4
+ import { defineTool } from "./types.js";
5
+ const inputSchema = z.object({
6
+ sitemapUrl: z
7
+ .string()
8
+ .url()
9
+ .describe("Absolute URL of the sitemap.xml (or sitemap index)."),
10
+ maxUrls: z
11
+ .number()
12
+ .int()
13
+ .positive()
14
+ .max(50_000)
15
+ .optional()
16
+ .describe("Hard cap on URLs returned. Default 5000."),
17
+ maxDepth: z
18
+ .number()
19
+ .int()
20
+ .nonnegative()
21
+ .max(3)
22
+ .optional()
23
+ .describe("Maximum sitemap-index recursion depth. Default 1 (root + one level of nested sitemaps)."),
24
+ timeoutMs: z.number().int().positive().max(30_000).optional(),
25
+ });
26
+ const outputSchema = z.object({
27
+ rootUrl: z.string(),
28
+ urlCount: z.number().int().nonnegative(),
29
+ urls: z.array(z.string()),
30
+ truncated: z.boolean().describe("True when output was capped by maxUrls."),
31
+ childSitemaps: z.array(z.string()).describe("Discovered child sitemap URLs (whether followed or not)."),
32
+ });
33
+ const LOC_RE = /<loc>([\s\S]*?)<\/loc>/gi;
34
+ function extractLocs(xml) {
35
+ const out = [];
36
+ let match;
37
+ while ((match = LOC_RE.exec(xml)) !== null) {
38
+ out.push(match[1].trim());
39
+ }
40
+ LOC_RE.lastIndex = 0;
41
+ return out;
42
+ }
43
+ function isSitemapIndex(xml) {
44
+ return /<sitemapindex[\s>]/i.test(xml);
45
+ }
46
+ async function fetchXml(url, timeoutMs, signal) {
47
+ const validateHop = async (hopUrl) => {
48
+ let host;
49
+ try {
50
+ host = new URL(hopUrl).hostname;
51
+ }
52
+ catch {
53
+ throw new Error(`fetch_sitemap: invalid URL ${hopUrl}`);
54
+ }
55
+ await validateTargetHost(host);
56
+ };
57
+ const result = await cachedFetch(url, { timeoutMs, cache: null, validateHop, signal });
58
+ if (result.status >= 400) {
59
+ throw new Error(`fetch_sitemap: ${url} returned status ${result.status}`);
60
+ }
61
+ return result.body;
62
+ }
63
+ /**
64
+ * Fetch + parse sitemap.xml, optionally recursing into a sitemap index. Caps
65
+ * the URL list at `maxUrls` so a 50K-URL sitemap doesn't blow up the model
66
+ * context. Reports `truncated: true` when the cap fires — the LLM should then
67
+ * call `sample_template` with the returned subset.
68
+ */
69
+ export const fetchSitemapTool = defineTool({
70
+ name: "fetch_sitemap",
71
+ description: "Fetch a sitemap.xml and return its URL list. Handles sitemap-index recursion (one level by default). Capped at 5000 URLs by default — use detect_templates + sample_template afterwards to pick a representative sample for auditing. Returns `truncated: true` when capped.",
72
+ inputSchema,
73
+ outputSchema,
74
+ async execute({ sitemapUrl, maxUrls = 5000, maxDepth = 1, timeoutMs = 15_000 }, ctx) {
75
+ const rootXml = await fetchXml(sitemapUrl, timeoutMs, ctx?.signal);
76
+ const childSitemaps = [];
77
+ const urls = [];
78
+ if (isSitemapIndex(rootXml) && maxDepth > 0) {
79
+ const childUrls = extractLocs(rootXml);
80
+ childSitemaps.push(...childUrls);
81
+ for (const child of childUrls) {
82
+ if (urls.length >= maxUrls)
83
+ break;
84
+ try {
85
+ const childXml = await fetchXml(child, timeoutMs, ctx?.signal);
86
+ const childLocs = extractLocs(childXml);
87
+ for (const u of childLocs) {
88
+ if (urls.length >= maxUrls)
89
+ break;
90
+ urls.push(u);
91
+ }
92
+ }
93
+ catch {
94
+ // Skip unreachable child sitemaps; root sitemap may be misconfigured
95
+ // but we want to return whatever we did get.
96
+ }
97
+ }
98
+ }
99
+ else {
100
+ const locs = extractLocs(rootXml);
101
+ for (const u of locs) {
102
+ if (urls.length >= maxUrls)
103
+ break;
104
+ urls.push(u);
105
+ }
106
+ }
107
+ return {
108
+ rootUrl: sitemapUrl,
109
+ urlCount: urls.length,
110
+ urls,
111
+ truncated: urls.length >= maxUrls,
112
+ childSitemaps,
113
+ };
114
+ },
115
+ });
116
+ //# sourceMappingURL=fetch-sitemap.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-sitemap.js","sourceRoot":"","sources":["../../../src/ai/tools/fetch-sitemap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAC3B,UAAU,EAAE,CAAC;SACV,MAAM,EAAE;SACR,GAAG,EAAE;SACL,QAAQ,CAAC,qDAAqD,CAAC;IAClE,OAAO,EAAE,CAAC;SACP,MAAM,EAAE;SACR,GAAG,EAAE;SACL,QAAQ,EAAE;SACV,GAAG,CAAC,MAAM,CAAC;SACX,QAAQ,EAAE;SACV,QAAQ,CAAC,0CAA0C,CAAC;IACvD,QAAQ,EAAE,CAAC;SACR,MAAM,EAAE;SACR,GAAG,EAAE;SACL,WAAW,EAAE;SACb,GAAG,CAAC,CAAC,CAAC;SACN,QAAQ,EAAE;SACV,QAAQ,CAAC,yFAAyF,CAAC;IACtG,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE;CAC9D,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC;IAC5B,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE;IACnB,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,WAAW,EAAE;IACxC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;IACzB,SAAS,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,yCAAyC,CAAC;IAC1E,aAAa,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,CAAC,0DAA0D,CAAC;CACxG,CAAC,CAAC;AAEH,MAAM,MAAM,GAAG,0BAA0B,CAAC;AAE1C,SAAS,WAAW,CAAC,GAAW;IAC9B,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC3C,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC5B,CAAC;IACD,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC;IACrB,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,cAAc,CAAC,GAAW;IACjC,OAAO,qBAAqB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzC,CAAC;AAED,KAAK,UAAU,QAAQ,CAAC,GAAW,EAAE,SAAiB,EAAE,MAAoB;IAC1E,MAAM,WAAW,GAAG,KAAK,EAAE,MAAc,EAAiB,EAAE;QAC1D,IAAI,IAAY,CAAC;QACjB,IAAI,CAAC;YACH,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC;QAClC,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CAAC,8BAA8B,MAAM,EAAE,CAAC,CAAC;QAC1D,CAAC;QACD,MAAM,kBAAkB,CAAC,IAAI,CAAC,CAAC;IACjC,CAAC,CAAC;IAEF,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,CAAC,CAAC;IACvF,IAAI,MAAM,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,kBAAkB,GAAG,oBAAoB,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IAC5E,CAAC;IACD,OAAO,MAAM,CAAC,IAAI,CAAC;AACrB,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAG,UAAU,CAAC;IACzC,IAAI,EAAE,eAAe;IACrB,WAAW,EACT,8QAA8Q;IAChR,WAAW;IACX,YAAY;IACZ,KAAK,CAAC,OAAO,CAAC,EAAE,UAAU,EAAE,OAAO,GAAG,IAAI,EAAE,QAAQ,GAAG,CAAC,EAAE,SAAS,GAAG,MAAM,EAAE,EAAE,GAAG;QACjF,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,UAAU,EAAE,SAAS,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QACnE,MAAM,aAAa,GAAa,EAAE,CAAC;QACnC,MAAM,IAAI,GAAa,EAAE,CAAC;QAE1B,IAAI,cAAc,CAAC,OAAO,CAAC,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;YAC5C,MAAM,SAAS,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;YACvC,aAAa,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;YACjC,KAAK,MAAM,KAAK,IAAI,SAAS,EAAE,CAAC;gBAC9B,IAAI,IAAI,CAAC,MAAM,IAAI,OAAO;oBAAE,MAAM;gBAClC,IAAI,CAAC;oBACH,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;oBAC/D,MAAM,SAAS,GAAG,WAAW,CAAC,QAAQ,CAAC,CAAC;oBACxC,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;wBAC1B,IAAI,IAAI,CAAC,MAAM,IAAI,OAAO;4BAAE,MAAM;wBAClC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;oBACf,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,qEAAqE;oBACrE,6CAA6C;gBAC/C,CAAC;YACH,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,IAAI,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;YAClC,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;gBACrB,IAAI,IAAI,CAAC,MAAM,IAAI,OAAO;oBAAE,MAAM;gBAClC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACf,CAAC;QACH,CAAC;QAED,OAAO;YACL,OAAO,EAAE,UAAU;YACnB,QAAQ,EAAE,IAAI,CAAC,MAAM;YACrB,IAAI;YACJ,SAAS,EAAE,IAAI,CAAC,MAAM,IAAI,OAAO;YACjC,aAAa;SACd,CAAC;IACJ,CAAC;CACF,CAAC,CAAC"}