@pseolint/core 0.4.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (321) hide show
  1. package/README.md +264 -169
  2. package/dist/ai/manifest/diff.d.ts +78 -0
  3. package/dist/ai/manifest/diff.d.ts.map +1 -0
  4. package/dist/ai/manifest/diff.js +139 -0
  5. package/dist/ai/manifest/diff.js.map +1 -0
  6. package/dist/ai/manifest/index.d.ts +18 -0
  7. package/dist/ai/manifest/index.d.ts.map +1 -0
  8. package/dist/ai/manifest/index.js +15 -0
  9. package/dist/ai/manifest/index.js.map +1 -0
  10. package/dist/ai/manifest/validate-manifest.d.ts +37 -0
  11. package/dist/ai/manifest/validate-manifest.d.ts.map +1 -0
  12. package/dist/ai/manifest/validate-manifest.js +67 -0
  13. package/dist/ai/manifest/validate-manifest.js.map +1 -0
  14. package/dist/ai/manifest/validators/domain-patches.d.ts +15 -0
  15. package/dist/ai/manifest/validators/domain-patches.d.ts.map +1 -0
  16. package/dist/ai/manifest/validators/domain-patches.js +110 -0
  17. package/dist/ai/manifest/validators/domain-patches.js.map +1 -0
  18. package/dist/ai/manifest/validators/index.d.ts +5 -0
  19. package/dist/ai/manifest/validators/index.d.ts.map +1 -0
  20. package/dist/ai/manifest/validators/index.js +4 -0
  21. package/dist/ai/manifest/validators/index.js.map +1 -0
  22. package/dist/ai/manifest/validators/page-changes.d.ts +36 -0
  23. package/dist/ai/manifest/validators/page-changes.d.ts.map +1 -0
  24. package/dist/ai/manifest/validators/page-changes.js +221 -0
  25. package/dist/ai/manifest/validators/page-changes.js.map +1 -0
  26. package/dist/ai/manifest/validators/types.d.ts +17 -0
  27. package/dist/ai/manifest/validators/types.d.ts.map +1 -0
  28. package/dist/ai/manifest/validators/types.js +5 -0
  29. package/dist/ai/manifest/validators/types.js.map +1 -0
  30. package/dist/ai/orchestrate.d.ts +74 -0
  31. package/dist/ai/orchestrate.d.ts.map +1 -0
  32. package/dist/ai/orchestrate.js +54 -0
  33. package/dist/ai/orchestrate.js.map +1 -0
  34. package/dist/ai/orchestrator/budget.d.ts +57 -0
  35. package/dist/ai/orchestrator/budget.d.ts.map +1 -0
  36. package/dist/ai/orchestrator/budget.js +114 -0
  37. package/dist/ai/orchestrator/budget.js.map +1 -0
  38. package/dist/ai/orchestrator/finish-tool.d.ts +568 -0
  39. package/dist/ai/orchestrator/finish-tool.d.ts.map +1 -0
  40. package/dist/ai/orchestrator/finish-tool.js +114 -0
  41. package/dist/ai/orchestrator/finish-tool.js.map +1 -0
  42. package/dist/ai/orchestrator/index.d.ts +25 -0
  43. package/dist/ai/orchestrator/index.d.ts.map +1 -0
  44. package/dist/ai/orchestrator/index.js +21 -0
  45. package/dist/ai/orchestrator/index.js.map +1 -0
  46. package/dist/ai/orchestrator/log.d.ts +24 -0
  47. package/dist/ai/orchestrator/log.d.ts.map +1 -0
  48. package/dist/ai/orchestrator/log.js +48 -0
  49. package/dist/ai/orchestrator/log.js.map +1 -0
  50. package/dist/ai/orchestrator/page-cache.d.ts +64 -0
  51. package/dist/ai/orchestrator/page-cache.d.ts.map +1 -0
  52. package/dist/ai/orchestrator/page-cache.js +127 -0
  53. package/dist/ai/orchestrator/page-cache.js.map +1 -0
  54. package/dist/ai/orchestrator/prompt.d.ts +16 -0
  55. package/dist/ai/orchestrator/prompt.d.ts.map +1 -0
  56. package/dist/ai/orchestrator/prompt.js +52 -0
  57. package/dist/ai/orchestrator/prompt.js.map +1 -0
  58. package/dist/ai/orchestrator/runner.d.ts +65 -0
  59. package/dist/ai/orchestrator/runner.d.ts.map +1 -0
  60. package/dist/ai/orchestrator/runner.js +223 -0
  61. package/dist/ai/orchestrator/runner.js.map +1 -0
  62. package/dist/ai/orchestrator/session.d.ts +44 -0
  63. package/dist/ai/orchestrator/session.d.ts.map +1 -0
  64. package/dist/ai/orchestrator/session.js +64 -0
  65. package/dist/ai/orchestrator/session.js.map +1 -0
  66. package/dist/ai/orchestrator/types.d.ts +99 -0
  67. package/dist/ai/orchestrator/types.d.ts.map +1 -0
  68. package/dist/ai/orchestrator/types.js +8 -0
  69. package/dist/ai/orchestrator/types.js.map +1 -0
  70. package/dist/ai/probes/cache.d.ts +12 -0
  71. package/dist/ai/probes/cache.d.ts.map +1 -0
  72. package/dist/ai/probes/cache.js +46 -0
  73. package/dist/ai/probes/cache.js.map +1 -0
  74. package/dist/ai/tools/ask-ai-engine.d.ts +77 -0
  75. package/dist/ai/tools/ask-ai-engine.d.ts.map +1 -0
  76. package/dist/ai/tools/ask-ai-engine.js +253 -0
  77. package/dist/ai/tools/ask-ai-engine.js.map +1 -0
  78. package/dist/ai/tools/check-domain-crawler-access.d.ts +71 -0
  79. package/dist/ai/tools/check-domain-crawler-access.d.ts.map +1 -0
  80. package/dist/ai/tools/check-domain-crawler-access.js +76 -0
  81. package/dist/ai/tools/check-domain-crawler-access.js.map +1 -0
  82. package/dist/ai/tools/check-domain-llms-txt.d.ts +70 -0
  83. package/dist/ai/tools/check-domain-llms-txt.d.ts.map +1 -0
  84. package/dist/ai/tools/check-domain-llms-txt.js +75 -0
  85. package/dist/ai/tools/check-domain-llms-txt.js.map +1 -0
  86. package/dist/ai/tools/check-indexability.d.ts +58 -0
  87. package/dist/ai/tools/check-indexability.d.ts.map +1 -0
  88. package/dist/ai/tools/check-indexability.js +64 -0
  89. package/dist/ai/tools/check-indexability.js.map +1 -0
  90. package/dist/ai/tools/check-robots.d.ts +68 -0
  91. package/dist/ai/tools/check-robots.d.ts.map +1 -0
  92. package/dist/ai/tools/check-robots.js +90 -0
  93. package/dist/ai/tools/check-robots.js.map +1 -0
  94. package/dist/ai/tools/check-rule-answer-first.d.ts +54 -0
  95. package/dist/ai/tools/check-rule-answer-first.d.ts.map +1 -0
  96. package/dist/ai/tools/check-rule-answer-first.js +50 -0
  97. package/dist/ai/tools/check-rule-answer-first.js.map +1 -0
  98. package/dist/ai/tools/check-rule-canonical-consistency.d.ts +66 -0
  99. package/dist/ai/tools/check-rule-canonical-consistency.d.ts.map +1 -0
  100. package/dist/ai/tools/check-rule-canonical-consistency.js +51 -0
  101. package/dist/ai/tools/check-rule-canonical-consistency.js.map +1 -0
  102. package/dist/ai/tools/check-rule-citable-facts.d.ts +58 -0
  103. package/dist/ai/tools/check-rule-citable-facts.d.ts.map +1 -0
  104. package/dist/ai/tools/check-rule-citable-facts.js +41 -0
  105. package/dist/ai/tools/check-rule-citable-facts.js.map +1 -0
  106. package/dist/ai/tools/check-rule-content-modularity.d.ts +58 -0
  107. package/dist/ai/tools/check-rule-content-modularity.d.ts.map +1 -0
  108. package/dist/ai/tools/check-rule-content-modularity.js +45 -0
  109. package/dist/ai/tools/check-rule-content-modularity.js.map +1 -0
  110. package/dist/ai/tools/check-rule-faq-coverage.d.ts +54 -0
  111. package/dist/ai/tools/check-rule-faq-coverage.d.ts.map +1 -0
  112. package/dist/ai/tools/check-rule-faq-coverage.js +39 -0
  113. package/dist/ai/tools/check-rule-faq-coverage.js.map +1 -0
  114. package/dist/ai/tools/check-rule-freshness-signals.d.ts +54 -0
  115. package/dist/ai/tools/check-rule-freshness-signals.d.ts.map +1 -0
  116. package/dist/ai/tools/check-rule-freshness-signals.js +45 -0
  117. package/dist/ai/tools/check-rule-freshness-signals.js.map +1 -0
  118. package/dist/ai/tools/check-rule-json-ld-valid.d.ts +54 -0
  119. package/dist/ai/tools/check-rule-json-ld-valid.d.ts.map +1 -0
  120. package/dist/ai/tools/check-rule-json-ld-valid.js +44 -0
  121. package/dist/ai/tools/check-rule-json-ld-valid.js.map +1 -0
  122. package/dist/ai/tools/check-rule-missing-author.d.ts +54 -0
  123. package/dist/ai/tools/check-rule-missing-author.d.ts.map +1 -0
  124. package/dist/ai/tools/check-rule-missing-author.js +45 -0
  125. package/dist/ai/tools/check-rule-missing-author.js.map +1 -0
  126. package/dist/ai/tools/check-rule-near-duplicate.d.ts +82 -0
  127. package/dist/ai/tools/check-rule-near-duplicate.d.ts.map +1 -0
  128. package/dist/ai/tools/check-rule-near-duplicate.js +63 -0
  129. package/dist/ai/tools/check-rule-near-duplicate.js.map +1 -0
  130. package/dist/ai/tools/check-rule-required-fields.d.ts +50 -0
  131. package/dist/ai/tools/check-rule-required-fields.d.ts.map +1 -0
  132. package/dist/ai/tools/check-rule-required-fields.js +38 -0
  133. package/dist/ai/tools/check-rule-required-fields.js.map +1 -0
  134. package/dist/ai/tools/check-rule-schema-consistency.d.ts +54 -0
  135. package/dist/ai/tools/check-rule-schema-consistency.d.ts.map +1 -0
  136. package/dist/ai/tools/check-rule-schema-consistency.js +44 -0
  137. package/dist/ai/tools/check-rule-schema-consistency.js.map +1 -0
  138. package/dist/ai/tools/check-rule-summary-bait.d.ts +54 -0
  139. package/dist/ai/tools/check-rule-summary-bait.d.ts.map +1 -0
  140. package/dist/ai/tools/check-rule-summary-bait.js +39 -0
  141. package/dist/ai/tools/check-rule-summary-bait.js.map +1 -0
  142. package/dist/ai/tools/check-rule-thin-content.d.ts +66 -0
  143. package/dist/ai/tools/check-rule-thin-content.d.ts.map +1 -0
  144. package/dist/ai/tools/check-rule-thin-content.js +58 -0
  145. package/dist/ai/tools/check-rule-thin-content.js.map +1 -0
  146. package/dist/ai/tools/detect-templates.d.ts +60 -0
  147. package/dist/ai/tools/detect-templates.d.ts.map +1 -0
  148. package/dist/ai/tools/detect-templates.js +43 -0
  149. package/dist/ai/tools/detect-templates.js.map +1 -0
  150. package/dist/ai/tools/fetch-page.d.ts +70 -0
  151. package/dist/ai/tools/fetch-page.d.ts.map +1 -0
  152. package/dist/ai/tools/fetch-page.js +93 -0
  153. package/dist/ai/tools/fetch-page.js.map +1 -0
  154. package/dist/ai/tools/fetch-sitemap.d.ts +60 -0
  155. package/dist/ai/tools/fetch-sitemap.d.ts.map +1 -0
  156. package/dist/ai/tools/fetch-sitemap.js +116 -0
  157. package/dist/ai/tools/fetch-sitemap.js.map +1 -0
  158. package/dist/ai/tools/index.d.ts +1555 -0
  159. package/dist/ai/tools/index.d.ts.map +1 -0
  160. package/dist/ai/tools/index.js +119 -0
  161. package/dist/ai/tools/index.js.map +1 -0
  162. package/dist/ai/tools/parse-page.d.ts +94 -0
  163. package/dist/ai/tools/parse-page.d.ts.map +1 -0
  164. package/dist/ai/tools/parse-page.js +108 -0
  165. package/dist/ai/tools/parse-page.js.map +1 -0
  166. package/dist/ai/tools/query-serp.d.ts +113 -0
  167. package/dist/ai/tools/query-serp.d.ts.map +1 -0
  168. package/dist/ai/tools/query-serp.js +131 -0
  169. package/dist/ai/tools/query-serp.js.map +1 -0
  170. package/dist/ai/tools/sample-template.d.ts +67 -0
  171. package/dist/ai/tools/sample-template.d.ts.map +1 -0
  172. package/dist/ai/tools/sample-template.js +75 -0
  173. package/dist/ai/tools/sample-template.js.map +1 -0
  174. package/dist/ai/tools/types.d.ts +73 -0
  175. package/dist/ai/tools/types.d.ts.map +1 -0
  176. package/dist/ai/tools/types.js +64 -0
  177. package/dist/ai/tools/types.js.map +1 -0
  178. package/dist/ai/tools/validate-jsonld.d.ts +62 -0
  179. package/dist/ai/tools/validate-jsonld.d.ts.map +1 -0
  180. package/dist/ai/tools/validate-jsonld.js +84 -0
  181. package/dist/ai/tools/validate-jsonld.js.map +1 -0
  182. package/dist/auditor.d.ts +16 -1
  183. package/dist/auditor.d.ts.map +1 -1
  184. package/dist/auditor.js +862 -88
  185. package/dist/auditor.js.map +1 -1
  186. package/dist/backpressure.d.ts.map +1 -1
  187. package/dist/backpressure.js +10 -3
  188. package/dist/backpressure.js.map +1 -1
  189. package/dist/enrich-findings.d.ts.map +1 -1
  190. package/dist/enrich-findings.js +15 -1
  191. package/dist/enrich-findings.js.map +1 -1
  192. package/dist/formatters/bucket-findings.d.ts +43 -0
  193. package/dist/formatters/bucket-findings.d.ts.map +1 -0
  194. package/dist/formatters/bucket-findings.js +110 -0
  195. package/dist/formatters/bucket-findings.js.map +1 -0
  196. package/dist/formatters/console.d.ts.map +1 -1
  197. package/dist/formatters/console.js +116 -34
  198. package/dist/formatters/console.js.map +1 -1
  199. package/dist/formatters/fixplan.d.ts +13 -0
  200. package/dist/formatters/fixplan.d.ts.map +1 -0
  201. package/dist/formatters/fixplan.js +328 -0
  202. package/dist/formatters/fixplan.js.map +1 -0
  203. package/dist/formatters/html.d.ts.map +1 -1
  204. package/dist/formatters/html.js +27 -0
  205. package/dist/formatters/html.js.map +1 -1
  206. package/dist/formatters/index.d.ts +2 -0
  207. package/dist/formatters/index.d.ts.map +1 -1
  208. package/dist/formatters/index.js +1 -0
  209. package/dist/formatters/index.js.map +1 -1
  210. package/dist/formatters/markdown.d.ts.map +1 -1
  211. package/dist/formatters/markdown.js +97 -9
  212. package/dist/formatters/markdown.js.map +1 -1
  213. package/dist/index.d.ts +12 -1
  214. package/dist/index.d.ts.map +1 -1
  215. package/dist/index.js +8 -0
  216. package/dist/index.js.map +1 -1
  217. package/dist/page-filter.d.ts +64 -6
  218. package/dist/page-filter.d.ts.map +1 -1
  219. package/dist/page-filter.js +124 -3
  220. package/dist/page-filter.js.map +1 -1
  221. package/dist/rule-references.d.ts.map +1 -1
  222. package/dist/rule-references.js +5 -0
  223. package/dist/rule-references.js.map +1 -1
  224. package/dist/rules/aeo/answer-first.d.ts.map +1 -1
  225. package/dist/rules/aeo/answer-first.js +17 -3
  226. package/dist/rules/aeo/answer-first.js.map +1 -1
  227. package/dist/rules/aeo/citable-facts.d.ts.map +1 -1
  228. package/dist/rules/aeo/citable-facts.js +12 -1
  229. package/dist/rules/aeo/citable-facts.js.map +1 -1
  230. package/dist/rules/aeo/content-modularity.d.ts.map +1 -1
  231. package/dist/rules/aeo/content-modularity.js +3 -0
  232. package/dist/rules/aeo/content-modularity.js.map +1 -1
  233. package/dist/rules/aeo/crawler-access.d.ts.map +1 -1
  234. package/dist/rules/aeo/crawler-access.js +6 -0
  235. package/dist/rules/aeo/crawler-access.js.map +1 -1
  236. package/dist/rules/aeo/faq-coverage.d.ts.map +1 -1
  237. package/dist/rules/aeo/faq-coverage.js +4 -0
  238. package/dist/rules/aeo/faq-coverage.js.map +1 -1
  239. package/dist/rules/aeo/freshness-signals.d.ts.map +1 -1
  240. package/dist/rules/aeo/freshness-signals.js +9 -2
  241. package/dist/rules/aeo/freshness-signals.js.map +1 -1
  242. package/dist/rules/aeo/llms-txt.d.ts.map +1 -1
  243. package/dist/rules/aeo/llms-txt.js +6 -1
  244. package/dist/rules/aeo/llms-txt.js.map +1 -1
  245. package/dist/rules/aeo/summary-bait.d.ts.map +1 -1
  246. package/dist/rules/aeo/summary-bait.js +5 -2
  247. package/dist/rules/aeo/summary-bait.js.map +1 -1
  248. package/dist/rules/content/heading-structure.d.ts +21 -0
  249. package/dist/rules/content/heading-structure.d.ts.map +1 -0
  250. package/dist/rules/content/heading-structure.js +56 -0
  251. package/dist/rules/content/heading-structure.js.map +1 -0
  252. package/dist/rules/content/image-alt-text.d.ts +18 -0
  253. package/dist/rules/content/image-alt-text.d.ts.map +1 -0
  254. package/dist/rules/content/image-alt-text.js +77 -0
  255. package/dist/rules/content/image-alt-text.js.map +1 -0
  256. package/dist/rules/content/missing-author.d.ts.map +1 -1
  257. package/dist/rules/content/missing-author.js +10 -2
  258. package/dist/rules/content/missing-author.js.map +1 -1
  259. package/dist/rules/content/title-uniqueness.d.ts +18 -0
  260. package/dist/rules/content/title-uniqueness.d.ts.map +1 -0
  261. package/dist/rules/content/title-uniqueness.js +70 -0
  262. package/dist/rules/content/title-uniqueness.js.map +1 -0
  263. package/dist/rules/links/host-section-divergence.d.ts +3 -0
  264. package/dist/rules/links/host-section-divergence.d.ts.map +1 -0
  265. package/dist/rules/links/host-section-divergence.js +158 -0
  266. package/dist/rules/links/host-section-divergence.js.map +1 -0
  267. package/dist/rules/links/link-depth.d.ts +12 -1
  268. package/dist/rules/links/link-depth.d.ts.map +1 -1
  269. package/dist/rules/links/link-depth.js +25 -12
  270. package/dist/rules/links/link-depth.js.map +1 -1
  271. package/dist/rules/scope.d.ts.map +1 -1
  272. package/dist/rules/scope.js +5 -0
  273. package/dist/rules/scope.js.map +1 -1
  274. package/dist/rules/spam/doorway-pattern.d.ts.map +1 -1
  275. package/dist/rules/spam/doorway-pattern.js +27 -4
  276. package/dist/rules/spam/doorway-pattern.js.map +1 -1
  277. package/dist/rules/spam/publication-velocity.d.ts +1 -1
  278. package/dist/rules/spam/publication-velocity.d.ts.map +1 -1
  279. package/dist/rules/spam/publication-velocity.js +9 -4
  280. package/dist/rules/spam/publication-velocity.js.map +1 -1
  281. package/dist/rules/spam/template-coverage.js +1 -1
  282. package/dist/rules/spam/template-coverage.js.map +1 -1
  283. package/dist/rules/spam/template-diversity.js +1 -1
  284. package/dist/rules/spam/template-diversity.js.map +1 -1
  285. package/dist/rules/spam/thin-content.d.ts.map +1 -1
  286. package/dist/rules/spam/thin-content.js +9 -1
  287. package/dist/rules/spam/thin-content.js.map +1 -1
  288. package/dist/rules/tech/hreflang-consistency.d.ts.map +1 -1
  289. package/dist/rules/tech/hreflang-consistency.js +33 -4
  290. package/dist/rules/tech/hreflang-consistency.js.map +1 -1
  291. package/dist/rules/tech/og-completeness.d.ts +11 -0
  292. package/dist/rules/tech/og-completeness.d.ts.map +1 -1
  293. package/dist/rules/tech/og-completeness.js +22 -23
  294. package/dist/rules/tech/og-completeness.js.map +1 -1
  295. package/dist/ruleset-version.d.ts +8 -0
  296. package/dist/ruleset-version.d.ts.map +1 -0
  297. package/dist/ruleset-version.js +8 -0
  298. package/dist/ruleset-version.js.map +1 -0
  299. package/dist/scrape-strategy.d.ts +42 -0
  300. package/dist/scrape-strategy.d.ts.map +1 -0
  301. package/dist/scrape-strategy.js +101 -0
  302. package/dist/scrape-strategy.js.map +1 -0
  303. package/dist/site-classifier.d.ts +1 -1
  304. package/dist/site-classifier.d.ts.map +1 -1
  305. package/dist/site-classifier.js +217 -0
  306. package/dist/site-classifier.js.map +1 -1
  307. package/dist/state.d.ts +36 -1
  308. package/dist/state.d.ts.map +1 -1
  309. package/dist/state.js +3 -1
  310. package/dist/state.js.map +1 -1
  311. package/dist/stratified-sample.d.ts +9 -1
  312. package/dist/stratified-sample.d.ts.map +1 -1
  313. package/dist/stratified-sample.js +23 -6
  314. package/dist/stratified-sample.js.map +1 -1
  315. package/dist/types.d.ts +179 -2
  316. package/dist/types.d.ts.map +1 -1
  317. package/dist/types.js.map +1 -1
  318. package/dist/url-normalize.d.ts.map +1 -1
  319. package/dist/url-normalize.js +13 -1
  320. package/dist/url-normalize.js.map +1 -1
  321. package/package.json +90 -90
@@ -0,0 +1,93 @@
1
+ import { z } from "zod";
2
+ import { cachedFetch } from "../../cache.js";
3
+ import { currentPageCache } from "../orchestrator/page-cache.js";
4
+ import { validateTargetHost } from "../../ssrf-guard.js";
5
+ import { defineTool } from "./types.js";
6
+ const inputSchema = z.object({
7
+ url: z.string().url().describe("Absolute http(s) URL to fetch."),
8
+ timeoutMs: z
9
+ .number()
10
+ .int()
11
+ .positive()
12
+ .max(30_000)
13
+ .optional()
14
+ .describe("Per-request timeout. Default 10s, max 30s."),
15
+ cacheDir: z
16
+ .string()
17
+ .optional()
18
+ .describe("Directory to read/write cache entries. Omit to disable caching. Orchestrator typically passes a session-scoped temp dir."),
19
+ cacheTtlMs: z
20
+ .number()
21
+ .int()
22
+ .nonnegative()
23
+ .optional()
24
+ .describe("Cache TTL for entries without ETag/Last-Modified validators. Default 5 minutes."),
25
+ });
26
+ const outputSchema = z.object({
27
+ url: z.string().describe("Final URL after following up to 10 redirects."),
28
+ status: z.number().int(),
29
+ headers: z.record(z.string(), z.string()),
30
+ /**
31
+ * Reference to the cached page body. Pass this `pageId` into parse_page,
32
+ * check_rule_*, validate_jsonld, check_indexability, etc. — the HTML
33
+ * itself never travels through the LLM conversation, which keeps token
34
+ * consumption bounded as the orchestrator pulls more pages.
35
+ */
36
+ pageId: z.string(),
37
+ /** First 500 chars of the body for at-a-glance reasoning. The LLM should NOT base rule decisions on this; it's a sniff aid. */
38
+ bodyExcerpt: z.string(),
39
+ fromCache: z.boolean(),
40
+ bodyBytes: z.number().int().nonnegative(),
41
+ });
42
+ /**
43
+ * SSRF-guarded HTTP fetch with optional disk cache. Wraps the existing
44
+ * `cachedFetch` primitive plus `validateTargetHost` so private/reserved IPs
45
+ * are rejected on every redirect hop.
46
+ *
47
+ * Returned as a tool to the orchestrator — typically the first call the
48
+ * model makes when auditing a domain.
49
+ */
50
+ export const fetchPageTool = defineTool({
51
+ name: "fetch_page",
52
+ description: "Fetch the HTML for a single URL with SSRF protection and disk caching. Follows up to 10 redirects. Returns the final URL, status, headers, and a `pageId` reference to the cached body — pass that pageId into parse_page / check_rule_* / validate_jsonld / check_indexability instead of repassing HTML. The HTML itself never travels through tool inputs, keeping token consumption bounded. Errors (private IP, redirect loop, timeout) come back as tool errors — keep going with another URL.",
53
+ inputSchema,
54
+ outputSchema,
55
+ async execute({ url, timeoutMs = 10_000, cacheDir, cacheTtlMs = 300_000 }, ctx) {
56
+ const validateHop = async (hopUrl) => {
57
+ let host;
58
+ try {
59
+ host = new URL(hopUrl).hostname;
60
+ }
61
+ catch {
62
+ throw new Error(`fetch_page: invalid URL ${hopUrl}`);
63
+ }
64
+ await validateTargetHost(host);
65
+ };
66
+ const result = await cachedFetch(url, {
67
+ timeoutMs,
68
+ cache: cacheDir ? { dir: cacheDir, ttlMs: cacheTtlMs } : null,
69
+ validateHop,
70
+ signal: ctx?.signal,
71
+ });
72
+ const cache = currentPageCache();
73
+ if (!cache) {
74
+ throw new Error("fetch_page: no page cache in scope (orchestrator runner must wrap generateText in withPageCache)");
75
+ }
76
+ const pageId = cache.put({
77
+ url: result.url,
78
+ html: result.body,
79
+ status: result.status,
80
+ headers: result.headers,
81
+ });
82
+ return {
83
+ url: result.url,
84
+ status: result.status,
85
+ headers: result.headers,
86
+ pageId,
87
+ bodyExcerpt: result.body.slice(0, 500),
88
+ fromCache: result.fromCache,
89
+ bodyBytes: result.body.length,
90
+ };
91
+ },
92
+ });
93
+ //# sourceMappingURL=fetch-page.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-page.js","sourceRoot":"","sources":["../../../src/ai/tools/fetch-page.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AACjE,OAAO,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAC3B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,gCAAgC,CAAC;IAChE,SAAS,EAAE,CAAC;SACT,MAAM,EAAE;SACR,GAAG,EAAE;SACL,QAAQ,EAAE;SACV,GAAG,CAAC,MAAM,CAAC;SACX,QAAQ,EAAE;SACV,QAAQ,CAAC,4CAA4C,CAAC;IACzD,QAAQ,EAAE,CAAC;SACR,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CACP,0HAA0H,CAC3H;IACH,UAAU,EAAE,CAAC;SACV,MAAM,EAAE;SACR,GAAG,EAAE;SACL,WAAW,EAAE;SACb,QAAQ,EAAE;SACV,QAAQ,CAAC,iFAAiF,CAAC;CAC/F,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC;IAC5B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;IACzE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE;IACxB,OAAO,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC;IACzC;;;;;OAKG;IACH,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE;IAClB,+HAA+H;IAC/H,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE;IACvB,SAAS,EAAE,CAAC,CAAC,OAAO,EAAE;IACtB,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,WAAW,EAAE;CAC1C,CAAC,CAAC;AAEH;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,aAAa,GAAG,UAAU,CAAC;IACtC,IAAI,EAAE,YAAY;IAClB,WAAW,EACT,seAAse;IACxe,WAAW;IACX,YAAY;IACZ,KAAK,CAAC,OAAO,CAAC,EAAE,GAAG,EAAE,SAAS,GAAG,MAAM,EAAE,QAAQ,EAAE,UAAU,GAAG,OAAO,EAAE,EAAE,GAAG;QAC5E,MAAM,WAAW,GAAG,KAAK,EAAE,MAAc,EAAiB,EAAE;YAC1D,IAAI,IAAY,CAAC;YACjB,IAAI,CAAC;gBACH,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC;YAClC,CAAC;YAAC,MAAM,CAAC;gBACP,MAAM,IAAI,KAAK,CAAC,2BAA2B,MAAM,EAAE,CAAC,CAAC;YACvD,CAAC;YACD,MAAM,kBAAkB,CAAC,IAAI,CAAC,CAAC;QACjC,CAAC,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE;YACpC,SAAS;YACT,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC,IAAI;YAC7D,WAAW;YACX,MAAM,EAAE,GAAG,EAAE,MAAM;SACpB,CAAC,CAAC;QAEH,MAAM,KAAK,GAAG,gBAAgB,EAAE,CAAC;QACjC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,IAAI,KAAK,CACb,kGAAkG,CACnG,CAAC;QACJ,CAAC;QACD,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC;YACvB,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,OAAO,EAAE,MAAM,CAAC,OAAO;SACxB,CAAC,CAAC;QAEH,OAAO;YACL,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,MAAM;YACN,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;YACtC,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM;SAC9B,CAAC;IACJ,CAAC;CACF,CAAC,CAAC"}
@@ -0,0 +1,60 @@
1
+ import { z } from "zod";
2
+ /**
3
+ * Fetch + parse sitemap.xml, optionally recursing into a sitemap index. Caps
4
+ * the URL list at `maxUrls` so a 50K-URL sitemap doesn't blow up the model
5
+ * context. Reports `truncated: true` when the cap fires — the LLM should then
6
+ * call `sample_template` with the returned subset.
7
+ */
8
+ export declare const fetchSitemapTool: {
9
+ name: string;
10
+ description: string;
11
+ inputSchema: z.ZodType<{
12
+ sitemapUrl: string;
13
+ maxUrls?: number | undefined;
14
+ maxDepth?: number | undefined;
15
+ timeoutMs?: number | undefined;
16
+ }, unknown, z.core.$ZodTypeInternals<{
17
+ sitemapUrl: string;
18
+ maxUrls?: number | undefined;
19
+ maxDepth?: number | undefined;
20
+ timeoutMs?: number | undefined;
21
+ }, unknown>>;
22
+ outputSchema: z.ZodType<{
23
+ rootUrl: string;
24
+ urlCount: number;
25
+ urls: string[];
26
+ truncated: boolean;
27
+ childSitemaps: string[];
28
+ }, unknown, z.core.$ZodTypeInternals<{
29
+ rootUrl: string;
30
+ urlCount: number;
31
+ urls: string[];
32
+ truncated: boolean;
33
+ childSitemaps: string[];
34
+ }, unknown>>;
35
+ toAiTool(): import("ai").Tool<{
36
+ sitemapUrl: string;
37
+ maxUrls?: number | undefined;
38
+ maxDepth?: number | undefined;
39
+ timeoutMs?: number | undefined;
40
+ }, import("./types.js").ToolResult<{
41
+ rootUrl: string;
42
+ urlCount: number;
43
+ urls: string[];
44
+ truncated: boolean;
45
+ childSitemaps: string[];
46
+ }>>;
47
+ run(input: {
48
+ sitemapUrl: string;
49
+ maxUrls?: number | undefined;
50
+ maxDepth?: number | undefined;
51
+ timeoutMs?: number | undefined;
52
+ }, ctx?: import("./types.js").ToolExecuteContext): Promise<import("./types.js").ToolResult<{
53
+ rootUrl: string;
54
+ urlCount: number;
55
+ urls: string[];
56
+ truncated: boolean;
57
+ childSitemaps: string[];
58
+ }>>;
59
+ };
60
+ //# sourceMappingURL=fetch-sitemap.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-sitemap.d.ts","sourceRoot":"","sources":["../../../src/ai/tools/fetch-sitemap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAqExB;;;;;GAKG;AACH,eAAO,MAAM,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA4C3B,CAAC"}
@@ -0,0 +1,116 @@
1
+ import { z } from "zod";
2
+ import { cachedFetch } from "../../cache.js";
3
+ import { validateTargetHost } from "../../ssrf-guard.js";
4
+ import { defineTool } from "./types.js";
5
+ const inputSchema = z.object({
6
+ sitemapUrl: z
7
+ .string()
8
+ .url()
9
+ .describe("Absolute URL of the sitemap.xml (or sitemap index)."),
10
+ maxUrls: z
11
+ .number()
12
+ .int()
13
+ .positive()
14
+ .max(50_000)
15
+ .optional()
16
+ .describe("Hard cap on URLs returned. Default 5000."),
17
+ maxDepth: z
18
+ .number()
19
+ .int()
20
+ .nonnegative()
21
+ .max(3)
22
+ .optional()
23
+ .describe("Maximum sitemap-index recursion depth. Default 1 (root + one level of nested sitemaps)."),
24
+ timeoutMs: z.number().int().positive().max(30_000).optional(),
25
+ });
26
+ const outputSchema = z.object({
27
+ rootUrl: z.string(),
28
+ urlCount: z.number().int().nonnegative(),
29
+ urls: z.array(z.string()),
30
+ truncated: z.boolean().describe("True when output was capped by maxUrls."),
31
+ childSitemaps: z.array(z.string()).describe("Discovered child sitemap URLs (whether followed or not)."),
32
+ });
33
+ const LOC_RE = /<loc>([\s\S]*?)<\/loc>/gi;
34
+ function extractLocs(xml) {
35
+ const out = [];
36
+ let match;
37
+ while ((match = LOC_RE.exec(xml)) !== null) {
38
+ out.push(match[1].trim());
39
+ }
40
+ LOC_RE.lastIndex = 0;
41
+ return out;
42
+ }
43
+ function isSitemapIndex(xml) {
44
+ return /<sitemapindex[\s>]/i.test(xml);
45
+ }
46
+ async function fetchXml(url, timeoutMs, signal) {
47
+ const validateHop = async (hopUrl) => {
48
+ let host;
49
+ try {
50
+ host = new URL(hopUrl).hostname;
51
+ }
52
+ catch {
53
+ throw new Error(`fetch_sitemap: invalid URL ${hopUrl}`);
54
+ }
55
+ await validateTargetHost(host);
56
+ };
57
+ const result = await cachedFetch(url, { timeoutMs, cache: null, validateHop, signal });
58
+ if (result.status >= 400) {
59
+ throw new Error(`fetch_sitemap: ${url} returned status ${result.status}`);
60
+ }
61
+ return result.body;
62
+ }
63
+ /**
64
+ * Fetch + parse sitemap.xml, optionally recursing into a sitemap index. Caps
65
+ * the URL list at `maxUrls` so a 50K-URL sitemap doesn't blow up the model
66
+ * context. Reports `truncated: true` when the cap fires — the LLM should then
67
+ * call `sample_template` with the returned subset.
68
+ */
69
+ export const fetchSitemapTool = defineTool({
70
+ name: "fetch_sitemap",
71
+ description: "Fetch a sitemap.xml and return its URL list. Handles sitemap-index recursion (one level by default). Capped at 5000 URLs by default — use detect_templates + sample_template afterwards to pick a representative sample for auditing. Returns `truncated: true` when capped.",
72
+ inputSchema,
73
+ outputSchema,
74
+ async execute({ sitemapUrl, maxUrls = 5000, maxDepth = 1, timeoutMs = 15_000 }, ctx) {
75
+ const rootXml = await fetchXml(sitemapUrl, timeoutMs, ctx?.signal);
76
+ const childSitemaps = [];
77
+ const urls = [];
78
+ if (isSitemapIndex(rootXml) && maxDepth > 0) {
79
+ const childUrls = extractLocs(rootXml);
80
+ childSitemaps.push(...childUrls);
81
+ for (const child of childUrls) {
82
+ if (urls.length >= maxUrls)
83
+ break;
84
+ try {
85
+ const childXml = await fetchXml(child, timeoutMs, ctx?.signal);
86
+ const childLocs = extractLocs(childXml);
87
+ for (const u of childLocs) {
88
+ if (urls.length >= maxUrls)
89
+ break;
90
+ urls.push(u);
91
+ }
92
+ }
93
+ catch {
94
+ // Skip unreachable child sitemaps; root sitemap may be misconfigured
95
+ // but we want to return whatever we did get.
96
+ }
97
+ }
98
+ }
99
+ else {
100
+ const locs = extractLocs(rootXml);
101
+ for (const u of locs) {
102
+ if (urls.length >= maxUrls)
103
+ break;
104
+ urls.push(u);
105
+ }
106
+ }
107
+ return {
108
+ rootUrl: sitemapUrl,
109
+ urlCount: urls.length,
110
+ urls,
111
+ truncated: urls.length >= maxUrls,
112
+ childSitemaps,
113
+ };
114
+ },
115
+ });
116
+ //# sourceMappingURL=fetch-sitemap.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-sitemap.js","sourceRoot":"","sources":["../../../src/ai/tools/fetch-sitemap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC;IAC3B,UAAU,EAAE,CAAC;SACV,MAAM,EAAE;SACR,GAAG,EAAE;SACL,QAAQ,CAAC,qDAAqD,CAAC;IAClE,OAAO,EAAE,CAAC;SACP,MAAM,EAAE;SACR,GAAG,EAAE;SACL,QAAQ,EAAE;SACV,GAAG,CAAC,MAAM,CAAC;SACX,QAAQ,EAAE;SACV,QAAQ,CAAC,0CAA0C,CAAC;IACvD,QAAQ,EAAE,CAAC;SACR,MAAM,EAAE;SACR,GAAG,EAAE;SACL,WAAW,EAAE;SACb,GAAG,CAAC,CAAC,CAAC;SACN,QAAQ,EAAE;SACV,QAAQ,CAAC,yFAAyF,CAAC;IACtG,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE;CAC9D,CAAC,CAAC;AAEH,MAAM,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC;IAC5B,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE;IACnB,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,WAAW,EAAE;IACxC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;IACzB,SAAS,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,yCAAyC,CAAC;IAC1E,aAAa,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,CAAC,0DAA0D,CAAC;CACxG,CAAC,CAAC;AAEH,MAAM,MAAM,GAAG,0BAA0B,CAAC;AAE1C,SAAS,WAAW,CAAC,GAAW;IAC9B,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC3C,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC5B,CAAC;IACD,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC;IACrB,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,cAAc,CAAC,GAAW;IACjC,OAAO,qBAAqB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzC,CAAC;AAED,KAAK,UAAU,QAAQ,CAAC,GAAW,EAAE,SAAiB,EAAE,MAAoB;IAC1E,MAAM,WAAW,GAAG,KAAK,EAAE,MAAc,EAAiB,EAAE;QAC1D,IAAI,IAAY,CAAC;QACjB,IAAI,CAAC;YACH,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC;QAClC,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CAAC,8BAA8B,MAAM,EAAE,CAAC,CAAC;QAC1D,CAAC;QACD,MAAM,kBAAkB,CAAC,IAAI,CAAC,CAAC;IACjC,CAAC,CAAC;IAEF,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,CAAC,CAAC;IACvF,IAAI,MAAM,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,kBAAkB,GAAG,oBAAoB,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IAC5E,CAAC;IACD,OAAO,MAAM,CAAC,IAAI,CAAC;AACrB,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAG,UAAU,CAAC;IACzC,IAAI,EAAE,eAAe;IACrB,WAAW,EACT,8QAA8Q;IAChR,WAAW;IACX,YAAY;IACZ,KAAK,CAAC,OAAO,CAAC,EAAE,UAAU,EAAE,OAAO,GAAG,IAAI,EAAE,QAAQ,GAAG,CAAC,EAAE,SAAS,GAAG,MAAM,EAAE,EAAE,GAAG;QACjF,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,UAAU,EAAE,SAAS,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QACnE,MAAM,aAAa,GAAa,EAAE,CAAC;QACnC,MAAM,IAAI,GAAa,EAAE,CAAC;QAE1B,IAAI,cAAc,CAAC,OAAO,CAAC,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;YAC5C,MAAM,SAAS,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;YACvC,aAAa,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;YACjC,KAAK,MAAM,KAAK,IAAI,SAAS,EAAE,CAAC;gBAC9B,IAAI,IAAI,CAAC,MAAM,IAAI,OAAO;oBAAE,MAAM;gBAClC,IAAI,CAAC;oBACH,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;oBAC/D,MAAM,SAAS,GAAG,WAAW,CAAC,QAAQ,CAAC,CAAC;oBACxC,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;wBAC1B,IAAI,IAAI,CAAC,MAAM,IAAI,OAAO;4BAAE,MAAM;wBAClC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;oBACf,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,qEAAqE;oBACrE,6CAA6C;gBAC/C,CAAC;YACH,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,IAAI,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;YAClC,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;gBACrB,IAAI,IAAI,CAAC,MAAM,IAAI,OAAO;oBAAE,MAAM;gBAClC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACf,CAAC;QACH,CAAC;QAED,OAAO;YACL,OAAO,EAAE,UAAU;YACnB,QAAQ,EAAE,IAAI,CAAC,MAAM;YACrB,IAAI;YACJ,SAAS,EAAE,IAAI,CAAC,MAAM,IAAI,OAAO;YACjC,aAAa;SACd,CAAC;IACJ,CAAC;CACF,CAAC,CAAC"}