recker 1.0.42 → 1.0.43-next.7a08602

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. package/dist/bin/recker-linux-x64 +0 -0
  2. package/dist/bin/recker-macos-x64 +0 -0
  3. package/dist/bin/recker-win-x64.exe +0 -0
  4. package/dist/bin/rek.cjs +93958 -0
  5. package/dist/browser/ai/adaptive-timeout.d.ts +50 -0
  6. package/dist/browser/ai/adaptive-timeout.js +208 -0
  7. package/dist/browser/ai/client.d.ts +22 -0
  8. package/dist/browser/ai/client.js +294 -0
  9. package/dist/browser/ai/index.d.ts +14 -0
  10. package/dist/browser/ai/index.js +11 -0
  11. package/dist/browser/ai/providers/anthropic.d.ts +63 -0
  12. package/dist/browser/ai/providers/anthropic.js +370 -0
  13. package/dist/browser/ai/providers/base.d.ts +48 -0
  14. package/dist/browser/ai/providers/base.js +150 -0
  15. package/dist/browser/ai/providers/google.d.ts +59 -0
  16. package/dist/browser/ai/providers/google.js +305 -0
  17. package/dist/browser/ai/providers/ollama.d.ts +44 -0
  18. package/dist/browser/ai/providers/ollama.js +240 -0
  19. package/dist/browser/ai/providers/openai.d.ts +64 -0
  20. package/dist/browser/ai/providers/openai.js +298 -0
  21. package/dist/browser/ai/rate-limiter.d.ts +43 -0
  22. package/dist/browser/ai/rate-limiter.js +215 -0
  23. package/dist/browser/ai/vector/index.d.ts +2 -0
  24. package/dist/browser/ai/vector/index.js +2 -0
  25. package/dist/browser/ai/vector/similarity.d.ts +2 -0
  26. package/dist/browser/ai/vector/similarity.js +27 -0
  27. package/dist/browser/ai/vector/store.d.ts +27 -0
  28. package/dist/browser/ai/vector/store.js +82 -0
  29. package/dist/browser/browser/cache.d.ts +2 -40
  30. package/dist/browser/browser/cache.js +2 -199
  31. package/dist/browser/browser/index.d.ts +8 -0
  32. package/dist/browser/browser/index.js +8 -0
  33. package/dist/browser/browser/recker.d.ts +8 -1
  34. package/dist/browser/browser/recker.js +8 -2
  35. package/dist/browser/cache/indexed-db.d.ts +10 -0
  36. package/dist/browser/cache/indexed-db.js +88 -0
  37. package/dist/browser/cache/service-worker-cache.d.ts +18 -0
  38. package/dist/browser/cache/service-worker-cache.js +103 -0
  39. package/dist/browser/cache.d.ts +2 -40
  40. package/dist/browser/cache.js +2 -199
  41. package/dist/browser/constants/user-agents.d.ts +7 -0
  42. package/dist/browser/constants/user-agents.js +7 -0
  43. package/dist/browser/core/client.d.ts +2 -0
  44. package/dist/browser/core/client.js +19 -1
  45. package/dist/browser/index.d.ts +8 -0
  46. package/dist/browser/index.js +8 -0
  47. package/dist/browser/plugins/har-recorder.d.ts +40 -0
  48. package/dist/browser/plugins/har-recorder.js +120 -0
  49. package/dist/browser/plugins/network-simulation.d.ts +7 -0
  50. package/dist/browser/plugins/network-simulation.js +13 -0
  51. package/dist/browser/presets/android.d.ts +2 -0
  52. package/dist/browser/presets/android.js +16 -0
  53. package/dist/browser/presets/anthropic.d.ts +8 -0
  54. package/dist/browser/presets/anthropic.js +27 -0
  55. package/dist/browser/presets/aws.d.ts +19 -0
  56. package/dist/browser/presets/aws.js +68 -0
  57. package/dist/browser/presets/azure-openai.d.ts +10 -0
  58. package/dist/browser/presets/azure-openai.js +35 -0
  59. package/dist/browser/presets/azure.d.ts +41 -0
  60. package/dist/browser/presets/azure.js +104 -0
  61. package/dist/browser/presets/chaturbate.d.ts +2 -0
  62. package/dist/browser/presets/chaturbate.js +17 -0
  63. package/dist/browser/presets/cloudflare.d.ts +12 -0
  64. package/dist/browser/presets/cloudflare.js +39 -0
  65. package/dist/browser/presets/cohere.d.ts +7 -0
  66. package/dist/browser/presets/cohere.js +22 -0
  67. package/dist/browser/presets/deepseek.d.ts +7 -0
  68. package/dist/browser/presets/deepseek.js +22 -0
  69. package/dist/browser/presets/digitalocean.d.ts +5 -0
  70. package/dist/browser/presets/digitalocean.js +16 -0
  71. package/dist/browser/presets/discord.d.ts +6 -0
  72. package/dist/browser/presets/discord.js +17 -0
  73. package/dist/browser/presets/elevenlabs.d.ts +6 -0
  74. package/dist/browser/presets/elevenlabs.js +20 -0
  75. package/dist/browser/presets/enhancers.d.ts +20 -0
  76. package/dist/browser/presets/enhancers.js +85 -0
  77. package/dist/browser/presets/fireworks.d.ts +7 -0
  78. package/dist/browser/presets/fireworks.js +22 -0
  79. package/dist/browser/presets/gcp.d.ts +34 -0
  80. package/dist/browser/presets/gcp.js +91 -0
  81. package/dist/browser/presets/gemini.d.ts +7 -0
  82. package/dist/browser/presets/gemini.js +23 -0
  83. package/dist/browser/presets/github.d.ts +6 -0
  84. package/dist/browser/presets/github.js +17 -0
  85. package/dist/browser/presets/gitlab.d.ts +6 -0
  86. package/dist/browser/presets/gitlab.js +16 -0
  87. package/dist/browser/presets/groq.d.ts +7 -0
  88. package/dist/browser/presets/groq.js +22 -0
  89. package/dist/browser/presets/hubspot.d.ts +9 -0
  90. package/dist/browser/presets/hubspot.js +28 -0
  91. package/dist/browser/presets/huggingface.d.ts +7 -0
  92. package/dist/browser/presets/huggingface.js +23 -0
  93. package/dist/browser/presets/index.d.ts +47 -0
  94. package/dist/browser/presets/index.js +47 -0
  95. package/dist/browser/presets/ios.d.ts +2 -0
  96. package/dist/browser/presets/ios.js +13 -0
  97. package/dist/browser/presets/linear.d.ts +5 -0
  98. package/dist/browser/presets/linear.js +16 -0
  99. package/dist/browser/presets/mailgun.d.ts +7 -0
  100. package/dist/browser/presets/mailgun.js +20 -0
  101. package/dist/browser/presets/meta.d.ts +10 -0
  102. package/dist/browser/presets/meta.js +33 -0
  103. package/dist/browser/presets/mistral.d.ts +7 -0
  104. package/dist/browser/presets/mistral.js +22 -0
  105. package/dist/browser/presets/notion.d.ts +6 -0
  106. package/dist/browser/presets/notion.js +17 -0
  107. package/dist/browser/presets/openai.d.ts +9 -0
  108. package/dist/browser/presets/openai.js +30 -0
  109. package/dist/browser/presets/oracle.d.ts +19 -0
  110. package/dist/browser/presets/oracle.js +117 -0
  111. package/dist/browser/presets/perplexity.d.ts +7 -0
  112. package/dist/browser/presets/perplexity.js +22 -0
  113. package/dist/browser/presets/pinecone.d.ts +8 -0
  114. package/dist/browser/presets/pinecone.js +42 -0
  115. package/dist/browser/presets/registry.d.ts +23 -0
  116. package/dist/browser/presets/registry.js +519 -0
  117. package/dist/browser/presets/replicate.d.ts +7 -0
  118. package/dist/browser/presets/replicate.js +23 -0
  119. package/dist/browser/presets/sendgrid.d.ts +6 -0
  120. package/dist/browser/presets/sendgrid.js +20 -0
  121. package/dist/browser/presets/sentry.d.ts +11 -0
  122. package/dist/browser/presets/sentry.js +48 -0
  123. package/dist/browser/presets/sinch.d.ts +9 -0
  124. package/dist/browser/presets/sinch.js +39 -0
  125. package/dist/browser/presets/slack.d.ts +5 -0
  126. package/dist/browser/presets/slack.js +16 -0
  127. package/dist/browser/presets/square.d.ts +10 -0
  128. package/dist/browser/presets/square.js +33 -0
  129. package/dist/browser/presets/stripe.d.ts +7 -0
  130. package/dist/browser/presets/stripe.js +23 -0
  131. package/dist/browser/presets/supabase.d.ts +6 -0
  132. package/dist/browser/presets/supabase.js +18 -0
  133. package/dist/browser/presets/tiktok.d.ts +10 -0
  134. package/dist/browser/presets/tiktok.js +38 -0
  135. package/dist/browser/presets/together.d.ts +7 -0
  136. package/dist/browser/presets/together.js +22 -0
  137. package/dist/browser/presets/twilio.d.ts +6 -0
  138. package/dist/browser/presets/twilio.js +17 -0
  139. package/dist/browser/presets/vercel.d.ts +6 -0
  140. package/dist/browser/presets/vercel.js +23 -0
  141. package/dist/browser/presets/vultr.d.ts +5 -0
  142. package/dist/browser/presets/vultr.js +16 -0
  143. package/dist/browser/presets/xai.d.ts +8 -0
  144. package/dist/browser/presets/xai.js +23 -0
  145. package/dist/browser/presets/youtube.d.ts +5 -0
  146. package/dist/browser/presets/youtube.js +20 -0
  147. package/dist/browser/recker.d.ts +8 -1
  148. package/dist/browser/recker.js +8 -2
  149. package/dist/browser/scrape/document.d.ts +5 -4
  150. package/dist/browser/scrape/document.js +89 -76
  151. package/dist/browser/scrape/element.d.ts +10 -8
  152. package/dist/browser/scrape/element.js +295 -81
  153. package/dist/browser/scrape/extractors.d.ts +11 -11
  154. package/dist/browser/scrape/extractors.js +145 -113
  155. package/dist/browser/scrape/parser/back.d.ts +1 -0
  156. package/dist/browser/scrape/parser/back.js +3 -0
  157. package/dist/browser/scrape/parser/index.d.ts +20 -0
  158. package/dist/browser/scrape/parser/index.js +19 -0
  159. package/dist/browser/scrape/parser/matcher.d.ts +30 -0
  160. package/dist/browser/scrape/parser/matcher.js +99 -0
  161. package/dist/browser/scrape/parser/nodes/comment.d.ts +12 -0
  162. package/dist/browser/scrape/parser/nodes/comment.js +21 -0
  163. package/dist/browser/scrape/parser/nodes/html.d.ts +110 -0
  164. package/dist/browser/scrape/parser/nodes/html.js +978 -0
  165. package/dist/browser/scrape/parser/nodes/node.d.ts +18 -0
  166. package/dist/browser/scrape/parser/nodes/node.js +31 -0
  167. package/dist/browser/scrape/parser/nodes/text.d.ts +14 -0
  168. package/dist/browser/scrape/parser/nodes/text.js +30 -0
  169. package/dist/browser/scrape/parser/nodes/type.d.ts +6 -0
  170. package/dist/browser/scrape/parser/nodes/type.js +7 -0
  171. package/dist/browser/scrape/parser/parse.d.ts +1 -0
  172. package/dist/browser/scrape/parser/parse.js +1 -0
  173. package/dist/browser/scrape/parser/valid.d.ts +2 -0
  174. package/dist/browser/scrape/parser/valid.js +5 -0
  175. package/dist/browser/scrape/parser/void-tag.d.ts +7 -0
  176. package/dist/browser/scrape/parser/void-tag.js +43 -0
  177. package/dist/browser/scrape/types.d.ts +7 -0
  178. package/dist/browser/seo/analyzer.d.ts +59 -0
  179. package/dist/browser/seo/analyzer.js +1399 -0
  180. package/dist/browser/seo/keywords.d.ts +16 -0
  181. package/dist/browser/seo/keywords.js +55 -0
  182. package/dist/browser/seo/rules/accessibility.d.ts +2 -0
  183. package/dist/browser/seo/rules/accessibility.js +722 -0
  184. package/dist/browser/seo/rules/ai-search.d.ts +2 -0
  185. package/dist/browser/seo/rules/ai-search.js +436 -0
  186. package/dist/browser/seo/rules/analytics.d.ts +2 -0
  187. package/dist/browser/seo/rules/analytics.js +306 -0
  188. package/dist/browser/seo/rules/best-practices.d.ts +2 -0
  189. package/dist/browser/seo/rules/best-practices.js +195 -0
  190. package/dist/browser/seo/rules/canonical.d.ts +12 -0
  191. package/dist/browser/seo/rules/canonical.js +258 -0
  192. package/dist/browser/seo/rules/content.d.ts +2 -0
  193. package/dist/browser/seo/rules/content.js +424 -0
  194. package/dist/browser/seo/rules/crawl.d.ts +2 -0
  195. package/dist/browser/seo/rules/crawl.js +435 -0
  196. package/dist/browser/seo/rules/cwv.d.ts +2 -0
  197. package/dist/browser/seo/rules/cwv.js +248 -0
  198. package/dist/browser/seo/rules/ecommerce.d.ts +2 -0
  199. package/dist/browser/seo/rules/ecommerce.js +283 -0
  200. package/dist/browser/seo/rules/i18n.d.ts +2 -0
  201. package/dist/browser/seo/rules/i18n.js +277 -0
  202. package/dist/browser/seo/rules/images.d.ts +2 -0
  203. package/dist/browser/seo/rules/images.js +235 -0
  204. package/dist/browser/seo/rules/index.d.ts +52 -0
  205. package/dist/browser/seo/rules/index.js +159 -0
  206. package/dist/browser/seo/rules/internal-linking.d.ts +2 -0
  207. package/dist/browser/seo/rules/internal-linking.js +394 -0
  208. package/dist/browser/seo/rules/links.d.ts +2 -0
  209. package/dist/browser/seo/rules/links.js +490 -0
  210. package/dist/browser/seo/rules/local.d.ts +2 -0
  211. package/dist/browser/seo/rules/local.js +289 -0
  212. package/dist/browser/seo/rules/meta.d.ts +2 -0
  213. package/dist/browser/seo/rules/meta.js +646 -0
  214. package/dist/browser/seo/rules/mobile.d.ts +2 -0
  215. package/dist/browser/seo/rules/mobile.js +161 -0
  216. package/dist/browser/seo/rules/performance.d.ts +2 -0
  217. package/dist/browser/seo/rules/performance.js +625 -0
  218. package/dist/browser/seo/rules/pwa.d.ts +2 -0
  219. package/dist/browser/seo/rules/pwa.js +299 -0
  220. package/dist/browser/seo/rules/readability.d.ts +2 -0
  221. package/dist/browser/seo/rules/readability.js +264 -0
  222. package/dist/browser/seo/rules/redirects.d.ts +16 -0
  223. package/dist/browser/seo/rules/redirects.js +199 -0
  224. package/dist/browser/seo/rules/resources.d.ts +2 -0
  225. package/dist/browser/seo/rules/resources.js +390 -0
  226. package/dist/browser/seo/rules/schema.d.ts +2 -0
  227. package/dist/browser/seo/rules/schema.js +379 -0
  228. package/dist/browser/seo/rules/security.d.ts +2 -0
  229. package/dist/browser/seo/rules/security.js +877 -0
  230. package/dist/browser/seo/rules/social.d.ts +2 -0
  231. package/dist/browser/seo/rules/social.js +603 -0
  232. package/dist/browser/seo/rules/structural.d.ts +2 -0
  233. package/dist/browser/seo/rules/structural.js +179 -0
  234. package/dist/browser/seo/rules/technical-advanced.d.ts +10 -0
  235. package/dist/browser/seo/rules/technical-advanced.js +288 -0
  236. package/dist/browser/seo/rules/technical.d.ts +2 -0
  237. package/dist/browser/seo/rules/technical.js +480 -0
  238. package/dist/browser/seo/rules/thresholds.d.ts +196 -0
  239. package/dist/browser/seo/rules/thresholds.js +118 -0
  240. package/dist/browser/seo/rules/types.d.ts +498 -0
  241. package/dist/browser/seo/rules/types.js +11 -0
  242. package/dist/browser/seo/types.d.ts +211 -0
  243. package/dist/browser/seo/types.js +1 -0
  244. package/dist/browser/transport/curl.d.ts +4 -0
  245. package/dist/browser/transport/curl.js +101 -0
  246. package/dist/browser/transport/undici.js +1 -2
  247. package/dist/browser/transport/worker.d.ts +18 -0
  248. package/dist/browser/transport/worker.js +278 -0
  249. package/dist/browser/types/index.d.ts +4 -1
  250. package/dist/browser/utils/binary-manager.d.ts +4 -0
  251. package/dist/browser/utils/binary-manager.js +72 -0
  252. package/dist/browser/utils/user-agent.js +2 -13
  253. package/dist/cache/indexed-db.d.ts +10 -0
  254. package/dist/cache/indexed-db.js +88 -0
  255. package/dist/cache/service-worker-cache.d.ts +18 -0
  256. package/dist/cache/service-worker-cache.js +103 -0
  257. package/dist/cli/commands/ai.d.ts +2 -0
  258. package/dist/cli/commands/ai.js +162 -0
  259. package/dist/cli/commands/bench.d.ts +2 -0
  260. package/dist/cli/commands/bench.js +51 -0
  261. package/dist/cli/commands/dns.d.ts +2 -0
  262. package/dist/cli/commands/dns.js +295 -0
  263. package/dist/cli/commands/har.d.ts +2 -0
  264. package/dist/cli/commands/har.js +171 -0
  265. package/dist/cli/commands/hls.d.ts +2 -0
  266. package/dist/cli/commands/hls.js +192 -0
  267. package/dist/cli/commands/network.d.ts +2 -0
  268. package/dist/cli/commands/network.js +288 -0
  269. package/dist/cli/commands/protocols.d.ts +2 -0
  270. package/dist/cli/commands/protocols.js +344 -0
  271. package/dist/cli/commands/scrape.d.ts +2 -0
  272. package/dist/cli/commands/scrape.js +176 -0
  273. package/dist/cli/commands/security.d.ts +2 -0
  274. package/dist/cli/commands/security.js +57 -0
  275. package/dist/cli/commands/seo.d.ts +2 -0
  276. package/dist/cli/commands/seo.js +125 -0
  277. package/dist/cli/commands/serve.d.ts +2 -0
  278. package/dist/cli/commands/serve.js +531 -0
  279. package/dist/cli/commands/spider.d.ts +3 -0
  280. package/dist/cli/commands/spider.js +456 -0
  281. package/dist/cli/commands/utils.d.ts +2 -0
  282. package/dist/cli/commands/utils.js +176 -0
  283. package/dist/cli/commands/vector.d.ts +2 -0
  284. package/dist/cli/commands/vector.js +158 -0
  285. package/dist/cli/handler.d.ts +2 -2
  286. package/dist/cli/handler.js +6 -6
  287. package/dist/cli/helpers.d.ts +7 -0
  288. package/dist/cli/helpers.js +128 -0
  289. package/dist/cli/index.js +96 -5228
  290. package/dist/cli/parser/help.d.ts +2 -0
  291. package/dist/cli/parser/help.js +52 -0
  292. package/dist/cli/parser/index.d.ts +3 -0
  293. package/dist/cli/parser/index.js +3 -0
  294. package/dist/cli/parser/parser.d.ts +4 -0
  295. package/dist/cli/parser/parser.js +146 -0
  296. package/dist/cli/parser/types.d.ts +41 -0
  297. package/dist/cli/parser/types.js +1 -0
  298. package/dist/cli/presets.d.ts +1 -1
  299. package/dist/cli/presets.js +1 -1
  300. package/dist/cli/router.d.ts +36 -0
  301. package/dist/cli/router.js +195 -0
  302. package/dist/cli/tui/ai-chat.js +1 -1
  303. package/dist/cli/tui/commands/context.d.ts +9 -0
  304. package/dist/cli/tui/commands/context.js +1 -0
  305. package/dist/cli/tui/commands/dns.d.ts +10 -0
  306. package/dist/cli/tui/commands/dns.js +461 -0
  307. package/dist/cli/tui/commands/hls.d.ts +2 -0
  308. package/dist/cli/tui/commands/hls.js +162 -0
  309. package/dist/cli/tui/commands/ip.d.ts +2 -0
  310. package/dist/cli/tui/commands/ip.js +45 -0
  311. package/dist/cli/tui/commands/network.d.ts +3 -0
  312. package/dist/cli/tui/commands/network.js +81 -0
  313. package/dist/cli/tui/commands/protocols.d.ts +6 -0
  314. package/dist/cli/tui/commands/protocols.js +531 -0
  315. package/dist/cli/tui/commands/security.d.ts +2 -0
  316. package/dist/cli/tui/commands/security.js +48 -0
  317. package/dist/cli/tui/commands/seo.d.ts +2 -0
  318. package/dist/cli/tui/commands/seo.js +74 -0
  319. package/dist/cli/tui/context.d.ts +12 -0
  320. package/dist/cli/tui/context.js +1 -0
  321. package/dist/cli/tui/shell.d.ts +11 -20
  322. package/dist/cli/tui/shell.js +216 -1873
  323. package/dist/constants/user-agents.d.ts +7 -0
  324. package/dist/constants/user-agents.js +7 -0
  325. package/dist/core/client.d.ts +2 -0
  326. package/dist/core/client.js +19 -1
  327. package/dist/index.d.ts +1 -0
  328. package/dist/index.js +1 -0
  329. package/dist/mcp/cli.js +2 -3
  330. package/dist/mcp/data/embeddings.json +1 -0
  331. package/dist/mcp/tools/network.js +298 -158
  332. package/dist/plugins/har-player.d.ts +23 -0
  333. package/dist/plugins/har-player.js +49 -0
  334. package/dist/plugins/har-recorder.d.ts +37 -3
  335. package/dist/plugins/har-recorder.js +116 -63
  336. package/dist/plugins/network-simulation.d.ts +7 -0
  337. package/dist/plugins/network-simulation.js +13 -0
  338. package/dist/presets/android.d.ts +2 -0
  339. package/dist/presets/android.js +16 -0
  340. package/dist/presets/chaturbate.d.ts +2 -0
  341. package/dist/presets/chaturbate.js +17 -0
  342. package/dist/presets/elevenlabs.d.ts +6 -0
  343. package/dist/presets/elevenlabs.js +20 -0
  344. package/dist/presets/enhancers.d.ts +20 -0
  345. package/dist/presets/enhancers.js +85 -0
  346. package/dist/presets/hubspot.d.ts +9 -0
  347. package/dist/presets/hubspot.js +28 -0
  348. package/dist/presets/index.d.ts +10 -0
  349. package/dist/presets/index.js +10 -0
  350. package/dist/presets/ios.d.ts +2 -0
  351. package/dist/presets/ios.js +13 -0
  352. package/dist/presets/pinecone.d.ts +8 -0
  353. package/dist/presets/pinecone.js +42 -0
  354. package/dist/presets/registry.js +60 -0
  355. package/dist/presets/sendgrid.d.ts +6 -0
  356. package/dist/presets/sendgrid.js +20 -0
  357. package/dist/presets/sentry.d.ts +11 -0
  358. package/dist/presets/sentry.js +48 -0
  359. package/dist/presets/square.d.ts +10 -0
  360. package/dist/presets/square.js +33 -0
  361. package/dist/recker.d.ts +3 -0
  362. package/dist/recker.js +4 -0
  363. package/dist/scrape/document.d.ts +5 -4
  364. package/dist/scrape/document.js +89 -76
  365. package/dist/scrape/element.d.ts +10 -8
  366. package/dist/scrape/element.js +295 -81
  367. package/dist/scrape/extractors.d.ts +11 -11
  368. package/dist/scrape/extractors.js +145 -113
  369. package/dist/scrape/index.d.ts +2 -0
  370. package/dist/scrape/index.js +1 -0
  371. package/dist/scrape/parser/back.d.ts +1 -0
  372. package/dist/scrape/parser/back.js +3 -0
  373. package/dist/scrape/parser/index.d.ts +20 -0
  374. package/dist/scrape/parser/index.js +19 -0
  375. package/dist/scrape/parser/matcher.d.ts +30 -0
  376. package/dist/scrape/parser/matcher.js +99 -0
  377. package/dist/scrape/parser/nodes/comment.d.ts +12 -0
  378. package/dist/scrape/parser/nodes/comment.js +21 -0
  379. package/dist/scrape/parser/nodes/html.d.ts +110 -0
  380. package/dist/scrape/parser/nodes/html.js +978 -0
  381. package/dist/scrape/parser/nodes/node.d.ts +18 -0
  382. package/dist/scrape/parser/nodes/node.js +31 -0
  383. package/dist/scrape/parser/nodes/text.d.ts +14 -0
  384. package/dist/scrape/parser/nodes/text.js +30 -0
  385. package/dist/scrape/parser/nodes/type.d.ts +6 -0
  386. package/dist/scrape/parser/nodes/type.js +7 -0
  387. package/dist/scrape/parser/parse.d.ts +1 -0
  388. package/dist/scrape/parser/parse.js +1 -0
  389. package/dist/scrape/parser/valid.d.ts +2 -0
  390. package/dist/scrape/parser/valid.js +5 -0
  391. package/dist/scrape/parser/void-tag.d.ts +7 -0
  392. package/dist/scrape/parser/void-tag.js +43 -0
  393. package/dist/scrape/spider.d.ts +19 -0
  394. package/dist/scrape/spider.js +28 -3
  395. package/dist/scrape/types.d.ts +7 -0
  396. package/dist/seo/analyzer.d.ts +15 -5
  397. package/dist/seo/analyzer.js +636 -175
  398. package/dist/seo/formatter.d.ts +16 -0
  399. package/dist/seo/formatter.js +228 -0
  400. package/dist/seo/index.d.ts +2 -0
  401. package/dist/seo/index.js +1 -0
  402. package/dist/seo/keywords.d.ts +16 -0
  403. package/dist/seo/keywords.js +55 -0
  404. package/dist/seo/rules/accessibility.js +85 -57
  405. package/dist/seo/rules/ai-search.js +44 -31
  406. package/dist/seo/rules/analytics.d.ts +2 -0
  407. package/dist/seo/rules/analytics.js +306 -0
  408. package/dist/seo/rules/best-practices.js +21 -14
  409. package/dist/seo/rules/canonical.js +31 -22
  410. package/dist/seo/rules/content.js +207 -19
  411. package/dist/seo/rules/crawl.js +55 -40
  412. package/dist/seo/rules/cwv.js +21 -15
  413. package/dist/seo/rules/ecommerce.js +51 -20
  414. package/dist/seo/rules/i18n.js +61 -33
  415. package/dist/seo/rules/images.js +87 -28
  416. package/dist/seo/rules/index.js +2 -0
  417. package/dist/seo/rules/internal-linking.js +58 -39
  418. package/dist/seo/rules/links.js +70 -51
  419. package/dist/seo/rules/local.js +49 -25
  420. package/dist/seo/rules/meta.js +161 -62
  421. package/dist/seo/rules/mobile.js +112 -2
  422. package/dist/seo/rules/performance.js +309 -54
  423. package/dist/seo/rules/pwa.js +36 -39
  424. package/dist/seo/rules/readability.js +31 -22
  425. package/dist/seo/rules/redirects.js +21 -15
  426. package/dist/seo/rules/resources.js +59 -42
  427. package/dist/seo/rules/schema.js +333 -8
  428. package/dist/seo/rules/security.js +142 -80
  429. package/dist/seo/rules/social.js +277 -47
  430. package/dist/seo/rules/structural.js +40 -16
  431. package/dist/seo/rules/technical-advanced.js +27 -22
  432. package/dist/seo/rules/technical.js +243 -42
  433. package/dist/seo/rules/types.d.ts +53 -1
  434. package/dist/seo/seo-spider.d.ts +22 -0
  435. package/dist/seo/seo-spider.js +77 -13
  436. package/dist/seo/types.d.ts +8 -1
  437. package/dist/seo/validators/llms-txt.js +19 -0
  438. package/dist/seo/validators/rss.d.ts +11 -0
  439. package/dist/seo/validators/rss.js +93 -0
  440. package/dist/seo/validators/sitemap.js +36 -26
  441. package/dist/transport/curl.d.ts +4 -0
  442. package/dist/transport/curl.js +101 -0
  443. package/dist/transport/udp.js +0 -1
  444. package/dist/transport/undici.js +1 -2
  445. package/dist/transport/worker.d.ts +18 -0
  446. package/dist/transport/worker.js +278 -0
  447. package/dist/types/index.d.ts +4 -1
  448. package/dist/utils/binary-manager.d.ts +4 -0
  449. package/dist/utils/binary-manager.js +72 -0
  450. package/dist/utils/optional-require.d.ts +7 -8
  451. package/dist/utils/optional-require.js +2 -21
  452. package/dist/utils/upload.d.ts +6 -0
  453. package/dist/utils/upload.js +11 -0
  454. package/dist/utils/user-agent.js +2 -13
  455. package/dist/version.js +1 -1
  456. package/package.json +14 -5
  457. package/dist/browser/utils/optional-require.d.ts +0 -19
  458. package/dist/browser/utils/optional-require.js +0 -105
@@ -0,0 +1,435 @@
1
+ import { createResult } from './types.js';
2
+ export const crawlRules = [
3
+ {
4
+ id: 'crawl-sitemap-reference',
5
+ name: 'Sitemap Reference',
6
+ category: 'technical',
7
+ severity: 'info',
8
+ description: 'Page should reference sitemap location',
9
+ check: (ctx) => {
10
+ if (ctx.hasSitemapLink) {
11
+ return createResult({ id: 'crawl-sitemap-reference', name: 'Sitemap Reference', category: 'technical', severity: 'info' }, 'pass', `Sitemap link found: ${ctx.sitemapUrl || 'referenced'}`);
12
+ }
13
+ if (ctx.robotsHasSitemap) {
14
+ return createResult({ id: 'crawl-sitemap-reference', name: 'Sitemap Reference', category: 'technical', severity: 'info' }, 'pass', 'Sitemap referenced in robots.txt');
15
+ }
16
+ return createResult({ id: 'crawl-sitemap-reference', name: 'Sitemap Reference', category: 'technical', severity: 'info' }, 'info', 'No sitemap reference found', {
17
+ recommendation: 'Add sitemap reference in robots.txt or HTML',
18
+ evidence: {
19
+ expected: 'Sitemap: https://example.com/sitemap.xml in robots.txt',
20
+ example: '<link rel="sitemap" type="application/xml" href="/sitemap.xml">',
21
+ impact: 'Helps search engines discover all pages',
22
+ learnMore: 'https://developers.google.com/search/docs/crawling-indexing/sitemaps/overview',
23
+ },
24
+ });
25
+ },
26
+ },
27
+ {
28
+ id: 'crawl-robots-noindex',
29
+ name: 'Robots Noindex',
30
+ category: 'technical',
31
+ severity: 'error',
32
+ description: 'Check if page is blocked from indexing',
33
+ check: (ctx) => {
34
+ if (!ctx.metaRobots) {
35
+ return createResult({ id: 'crawl-robots-noindex', name: 'Robots Noindex', category: 'technical', severity: 'error' }, 'info', 'Not applicable (robots meta tag not present)', { recommendation: 'This rule checks for noindex directives when robots meta tags are present' });
36
+ }
37
+ const hasNoindex = ctx.metaRobots.some(r => r.toLowerCase().includes('noindex'));
38
+ if (hasNoindex) {
39
+ return createResult({ id: 'crawl-robots-noindex', name: 'Robots Noindex', category: 'technical', severity: 'error' }, 'fail', 'Page is set to noindex', {
40
+ evidence: {
41
+ found: ctx.metaRobots.join(', '),
42
+ issue: 'This page will NOT appear in search results',
43
+ impact: 'Remove noindex if this page should be indexed',
44
+ },
45
+ });
46
+ }
47
+ return createResult({ id: 'crawl-robots-noindex', name: 'Robots Noindex', category: 'technical', severity: 'error' }, 'info', 'Not applicable (noindex not detected)', { recommendation: 'This rule checks for noindex directives that would prevent page indexing' });
48
+ },
49
+ },
50
+ {
51
+ id: 'crawl-robots-nofollow',
52
+ name: 'Robots Nofollow',
53
+ category: 'technical',
54
+ severity: 'warning',
55
+ description: 'Check if page links are blocked from following',
56
+ check: (ctx) => {
57
+ if (!ctx.metaRobots) {
58
+ return createResult({ id: 'crawl-robots-nofollow', name: 'Robots Nofollow', category: 'technical', severity: 'warning' }, 'info', 'Not applicable (robots meta tag not present)', { recommendation: 'This rule checks for nofollow directives when robots meta tags are present' });
59
+ }
60
+ const hasNofollow = ctx.metaRobots.some(r => r.toLowerCase().includes('nofollow'));
61
+ if (hasNofollow) {
62
+ return createResult({ id: 'crawl-robots-nofollow', name: 'Robots Nofollow', category: 'technical', severity: 'warning' }, 'warn', 'Page has nofollow directive', {
63
+ evidence: {
64
+ found: ctx.metaRobots.join(', '),
65
+ issue: 'Links on this page will not pass PageRank',
66
+ impact: 'Internal pages should usually not have nofollow',
67
+ },
68
+ });
69
+ }
70
+ return createResult({ id: 'crawl-robots-nofollow', name: 'Robots Nofollow', category: 'technical', severity: 'warning' }, 'info', 'Not applicable (nofollow not detected)', { recommendation: 'This rule checks for nofollow directives that would prevent link following' });
71
+ },
72
+ },
73
+ {
74
+ id: 'crawl-robots-combined',
75
+ name: 'Robots Directives',
76
+ category: 'technical',
77
+ severity: 'info',
78
+ description: 'Review all robots meta directives',
79
+ check: (ctx) => {
80
+ if (!ctx.metaRobots || ctx.metaRobots.length === 0) {
81
+ return createResult({ id: 'crawl-robots-combined', name: 'Robots Directives', category: 'technical', severity: 'info' }, 'info', 'No robots meta tag (defaults to index, follow)', {
82
+ recommendation: 'Explicitly set robots directives if needed',
83
+ evidence: {
84
+ expected: '<meta name="robots" content="index, follow">',
85
+ impact: 'Default behavior allows full indexing and following',
86
+ },
87
+ });
88
+ }
89
+ const directives = ctx.metaRobots.join(', ').toLowerCase();
90
+ const hasIndex = directives.includes('index') && !directives.includes('noindex');
91
+ const hasNoindex = directives.includes('noindex');
92
+ if (hasIndex && hasNoindex) {
93
+ return createResult({ id: 'crawl-robots-combined', name: 'Robots Directives', category: 'technical', severity: 'info' }, 'warn', 'Conflicting robots directives detected', {
94
+ evidence: {
95
+ found: ctx.metaRobots.join(', '),
96
+ issue: 'Both index and noindex specified',
97
+ },
98
+ });
99
+ }
100
+ return createResult({ id: 'crawl-robots-combined', name: 'Robots Directives', category: 'technical', severity: 'info' }, 'pass', `Robots: ${ctx.metaRobots.join(', ')}`);
101
+ },
102
+ },
103
+ {
104
+ id: 'crawl-x-robots-tag',
105
+ name: 'X-Robots-Tag Header',
106
+ category: 'technical',
107
+ severity: 'warning',
108
+ description: 'Check X-Robots-Tag HTTP header for indexing directives',
109
+ check: (ctx) => {
110
+ if (!ctx.responseHeaders) {
111
+ return createResult({ id: 'crawl-x-robots-tag', name: 'X-Robots-Tag Header', category: 'technical', severity: 'warning' }, 'info', 'Not applicable (response headers unavailable)', { recommendation: 'This rule checks for X-Robots-Tag headers when HTTP response headers are available' });
112
+ }
113
+ const xRobotsTag = ctx.responseHeaders['x-robots-tag'] ||
114
+ ctx.responseHeaders['X-Robots-Tag'];
115
+ if (!xRobotsTag) {
116
+ return createResult({ id: 'crawl-x-robots-tag', name: 'X-Robots-Tag Header', category: 'technical', severity: 'warning' }, 'info', 'Not applicable (X-Robots-Tag header not present)', { recommendation: 'This rule checks for X-Robots-Tag headers when the header is present' });
117
+ }
118
+ const tagValue = Array.isArray(xRobotsTag) ? xRobotsTag.join(', ') : xRobotsTag;
119
+ if (tagValue.toLowerCase().includes('noindex')) {
120
+ return createResult({ id: 'crawl-x-robots-tag', name: 'X-Robots-Tag Header', category: 'technical', severity: 'warning' }, 'fail', 'X-Robots-Tag contains noindex', {
121
+ evidence: {
122
+ found: tagValue,
123
+ issue: 'HTTP header is blocking indexing',
124
+ impact: 'Page will not appear in search results',
125
+ },
126
+ });
127
+ }
128
+ return createResult({ id: 'crawl-x-robots-tag', name: 'X-Robots-Tag Header', category: 'technical', severity: 'warning' }, 'info', `X-Robots-Tag: ${tagValue}`);
129
+ },
130
+ },
131
+ {
132
+ id: 'crawl-canonical-present',
133
+ name: 'Canonical URL',
134
+ category: 'technical',
135
+ severity: 'warning',
136
+ description: 'Pages should have a canonical URL to prevent duplicate content',
137
+ check: (ctx) => {
138
+ if (!ctx.hasCanonical) {
139
+ return createResult({ id: 'crawl-canonical-present', name: 'Canonical URL', category: 'technical', severity: 'warning' }, 'warn', 'Missing canonical URL', {
140
+ recommendation: 'Add a canonical link to prevent duplicate content issues',
141
+ evidence: {
142
+ expected: '<link rel="canonical" href="https://example.com/page">',
143
+ impact: 'Without canonical, search engines may index multiple versions',
144
+ learnMore: 'https://developers.google.com/search/docs/crawling-indexing/canonicalization',
145
+ },
146
+ });
147
+ }
148
+ return createResult({ id: 'crawl-canonical-present', name: 'Canonical URL', category: 'technical', severity: 'warning' }, 'pass', `Canonical: ${ctx.canonicalUrl}`);
149
+ },
150
+ },
151
+ {
152
+ id: 'crawl-canonical-self',
153
+ name: 'Canonical Self-Reference',
154
+ category: 'technical',
155
+ severity: 'info',
156
+ description: 'Canonical should point to the current page or explicit alternate',
157
+ check: (ctx) => {
158
+ if (!ctx.hasCanonical || !ctx.canonicalUrl || !ctx.url) {
159
+ return createResult({ id: 'crawl-canonical-self', name: 'Canonical Self-Reference', category: 'technical', severity: 'info' }, 'info', 'Not applicable (canonical URL or page URL unavailable)', { recommendation: 'This rule checks canonical self-reference when canonical and page URL information is available' });
160
+ }
161
+ const normalizeUrl = (url) => {
162
+ try {
163
+ const u = new URL(url);
164
+ let normalized = u.origin + u.pathname.replace(/\/$/, '');
165
+ return normalized.toLowerCase();
166
+ }
167
+ catch {
168
+ return url.toLowerCase().replace(/\/$/, '');
169
+ }
170
+ };
171
+ const currentNorm = normalizeUrl(ctx.url);
172
+ const canonicalNorm = normalizeUrl(ctx.canonicalUrl);
173
+ if (currentNorm !== canonicalNorm) {
174
+ return createResult({ id: 'crawl-canonical-self', name: 'Canonical Self-Reference', category: 'technical', severity: 'info' }, 'info', 'Canonical points to different URL', {
175
+ evidence: {
176
+ found: [`Current: ${ctx.url}`, `Canonical: ${ctx.canonicalUrl}`],
177
+ issue: 'This page canonicalizes to a different URL',
178
+ impact: 'Ensure this is intentional (e.g., www vs non-www consolidation)',
179
+ },
180
+ });
181
+ }
182
+ return createResult({ id: 'crawl-canonical-self', name: 'Canonical Self-Reference', category: 'technical', severity: 'info' }, 'info', 'Not applicable (canonical is self-referencing)', { recommendation: 'This rule checks for non-self-referencing canonicals which may indicate URL consolidation' });
183
+ },
184
+ },
185
+ {
186
+ id: 'crawl-canonical-absolute',
187
+ name: 'Canonical Absolute URL',
188
+ category: 'technical',
189
+ severity: 'warning',
190
+ description: 'Canonical URL should be absolute, not relative',
191
+ check: (ctx) => {
192
+ if (!ctx.canonicalUrl) {
193
+ return createResult({ id: 'crawl-canonical-absolute', name: 'Canonical Absolute URL', category: 'technical', severity: 'warning' }, 'info', 'Not applicable (canonical URL not present)', { recommendation: 'This rule checks canonical URL format when a canonical URL exists' });
194
+ }
195
+ const isAbsolute = ctx.canonicalUrl.startsWith('http://') ||
196
+ ctx.canonicalUrl.startsWith('https://');
197
+ if (!isAbsolute) {
198
+ return createResult({ id: 'crawl-canonical-absolute', name: 'Canonical Absolute URL', category: 'technical', severity: 'warning' }, 'warn', 'Canonical URL is relative', {
199
+ evidence: {
200
+ found: ctx.canonicalUrl,
201
+ expected: 'Absolute URL starting with https://',
202
+ impact: 'Relative canonicals may be misinterpreted',
203
+ },
204
+ });
205
+ }
206
+ return createResult({ id: 'crawl-canonical-absolute', name: 'Canonical Absolute URL', category: 'technical', severity: 'warning' }, 'info', 'Not applicable (canonical URL is absolute)', { recommendation: 'This rule checks for relative canonical URLs which should be avoided' });
207
+ },
208
+ },
209
+ {
210
+ id: 'crawl-canonical-https',
211
+ name: 'Canonical HTTPS',
212
+ category: 'technical',
213
+ severity: 'warning',
214
+ description: 'Canonical URL should use HTTPS',
215
+ check: (ctx) => {
216
+ if (!ctx.canonicalUrl) {
217
+ return createResult({ id: 'crawl-canonical-https', name: 'Canonical HTTPS', category: 'technical', severity: 'warning' }, 'info', 'Not applicable (canonical URL not present)', { recommendation: 'This rule checks canonical URL protocol when a canonical URL exists' });
218
+ }
219
+ if (ctx.canonicalUrl.startsWith('http://')) {
220
+ return createResult({ id: 'crawl-canonical-https', name: 'Canonical HTTPS', category: 'technical', severity: 'warning' }, 'warn', 'Canonical URL uses HTTP instead of HTTPS', {
221
+ evidence: {
222
+ found: ctx.canonicalUrl,
223
+ expected: 'HTTPS canonical URL',
224
+ impact: 'Google prefers HTTPS URLs for ranking',
225
+ },
226
+ });
227
+ }
228
+ return createResult({ id: 'crawl-canonical-https', name: 'Canonical HTTPS', category: 'technical', severity: 'warning' }, 'info', 'Not applicable (canonical URL uses HTTPS)', { recommendation: 'This rule checks for HTTP canonical URLs which should use HTTPS instead' });
229
+ },
230
+ },
231
+ {
232
+ id: 'crawl-url-parameters',
233
+ name: 'URL Parameters',
234
+ category: 'technical',
235
+ severity: 'info',
236
+ description: 'URLs with tracking parameters should have proper canonical',
237
+ check: (ctx) => {
238
+ if (!ctx.url) {
239
+ return createResult({ id: 'crawl-url-parameters', name: 'URL Parameters', category: 'technical', severity: 'info' }, 'info', 'Not applicable (page URL unavailable)', { recommendation: 'This rule checks for tracking parameters when page URL information is available' });
240
+ }
241
+ try {
242
+ const url = new URL(ctx.url);
243
+ const trackingParams = ['utm_source', 'utm_medium', 'utm_campaign', 'fbclid', 'gclid', 'ref'];
244
+ const hasTracking = trackingParams.some(p => url.searchParams.has(p));
245
+ if (hasTracking && !ctx.hasCanonical) {
246
+ return createResult({ id: 'crawl-url-parameters', name: 'URL Parameters', category: 'technical', severity: 'info' }, 'warn', 'URL has tracking parameters but no canonical', {
247
+ recommendation: 'Add canonical pointing to clean URL without parameters',
248
+ evidence: {
249
+ found: url.search,
250
+ expected: 'Canonical to base URL without tracking params',
251
+ impact: 'Tracking parameters can cause duplicate content',
252
+ },
253
+ });
254
+ }
255
+ }
256
+ catch {
257
+ }
258
+ return createResult({ id: 'crawl-url-parameters', name: 'URL Parameters', category: 'technical', severity: 'info' }, 'info', 'Not applicable (no tracking parameters detected or canonical is present)', { recommendation: 'This rule checks for tracking parameters without canonical tags' });
259
+ },
260
+ },
261
+ {
262
+ id: 'crawl-pagination-rel',
263
+ name: 'Pagination Links',
264
+ category: 'technical',
265
+ severity: 'info',
266
+ description: 'Paginated content should use proper rel attributes',
267
+ check: (ctx) => {
268
+ if (!ctx.isPaginatedPage) {
269
+ return createResult({ id: 'crawl-pagination-rel', name: 'Pagination Links', category: 'technical', severity: 'info' }, 'info', 'Not applicable (page is not paginated)', { recommendation: 'This rule checks pagination links on paginated pages only' });
270
+ }
271
+ const hasPrevNext = ctx.hasRelPrev || ctx.hasRelNext;
272
+ if (!hasPrevNext) {
273
+ return createResult({ id: 'crawl-pagination-rel', name: 'Pagination Links', category: 'technical', severity: 'info' }, 'info', 'Paginated page missing rel="prev/next" (deprecated but still useful)', {
274
+ recommendation: 'Consider using rel="prev" and rel="next" for pagination',
275
+ evidence: {
276
+ example: '<link rel="prev" href="/page/1">\n<link rel="next" href="/page/3">',
277
+ impact: 'Helps search engines understand pagination structure',
278
+ learnMore: 'https://developers.google.com/search/docs/specialty/ecommerce/pagination-and-incremental-page-loading',
279
+ },
280
+ });
281
+ }
282
+ return createResult({ id: 'crawl-pagination-rel', name: 'Pagination Links', category: 'technical', severity: 'info' }, 'pass', 'Pagination links present');
283
+ },
284
+ },
285
+ {
286
+ id: 'crawl-noarchive',
287
+ name: 'Cache Directives',
288
+ category: 'technical',
289
+ severity: 'info',
290
+ description: 'Check for noarchive and nocache directives',
291
+ check: (ctx) => {
292
+ if (!ctx.metaRobots) {
293
+ return createResult({ id: 'crawl-noarchive', name: 'Cache Directives', category: 'technical', severity: 'info' }, 'info', 'Not applicable (robots meta tag not present)', { recommendation: 'This rule checks for cache directives when robots meta tags are present' });
294
+ }
295
+ const directives = ctx.metaRobots.join(', ').toLowerCase();
296
+ const hasNoarchive = directives.includes('noarchive');
297
+ const hasNocache = directives.includes('nocache');
298
+ const hasNosnippet = directives.includes('nosnippet');
299
+ const restrictions = [];
300
+ if (hasNoarchive)
301
+ restrictions.push('noarchive (no cached version)');
302
+ if (hasNocache)
303
+ restrictions.push('nocache (no cache)');
304
+ if (hasNosnippet)
305
+ restrictions.push('nosnippet (no search snippet)');
306
+ if (restrictions.length > 0) {
307
+ return createResult({ id: 'crawl-noarchive', name: 'Cache Directives', category: 'technical', severity: 'info' }, 'info', `Search restrictions: ${restrictions.join(', ')}`, {
308
+ evidence: {
309
+ found: restrictions,
310
+ impact: 'These directives limit how search engines display your page',
311
+ },
312
+ });
313
+ }
314
+ return createResult({ id: 'crawl-noarchive', name: 'Cache Directives', category: 'technical', severity: 'info' }, 'info', 'Not applicable (no cache restriction directives present)', { recommendation: 'This rule checks for cache restriction directives in robots meta tags' });
315
+ },
316
+ },
317
+ {
318
+ id: 'robots-txt-exists',
319
+ name: 'robots.txt Exists',
320
+ category: 'crawlability',
321
+ severity: 'info',
322
+ description: 'Website should have a robots.txt file',
323
+ check: (ctx) => {
324
+ if (ctx.robotsTxtExists === undefined) {
325
+ return createResult({ id: 'robots-txt-exists', name: 'robots.txt Exists', category: 'crawlability', severity: 'info' }, 'info', 'Not applicable (robots.txt status unavailable)', { recommendation: 'This rule checks for robots.txt file when robots.txt information is available' });
326
+ }
327
+ if (!ctx.robotsTxtExists) {
328
+ return createResult({ id: 'robots-txt-exists', name: 'robots.txt Exists', category: 'crawlability', severity: 'info' }, 'info', 'No robots.txt file found', {
329
+ recommendation: 'Create a robots.txt file to control search engine crawling',
330
+ evidence: {
331
+ expected: '/robots.txt',
332
+ impact: 'Without robots.txt, search engines will crawl everything by default',
333
+ learnMore: 'https://developers.google.com/search/docs/crawling-indexing/robots/intro'
334
+ }
335
+ });
336
+ }
337
+ return createResult({ id: 'robots-txt-exists', name: 'robots.txt Exists', category: 'crawlability', severity: 'info' }, 'pass', 'robots.txt file exists');
338
+ },
339
+ },
340
+ {
341
+ id: 'sitemap-in-robots',
342
+ name: 'Sitemap Reference in robots.txt',
343
+ category: 'crawlability',
344
+ severity: 'warning',
345
+ description: 'robots.txt should reference sitemap.xml location',
346
+ check: (ctx) => {
347
+ if (ctx.robotsTxtHasSitemap === undefined) {
348
+ return createResult({ id: 'sitemap-in-robots', name: 'Sitemap Reference in robots.txt', category: 'crawlability', severity: 'warning' }, 'info', 'Not applicable (robots.txt sitemap reference status unavailable)', { recommendation: 'This rule checks for sitemap references in robots.txt when robots.txt information is available' });
349
+ }
350
+ if (!ctx.robotsTxtHasSitemap) {
351
+ return createResult({ id: 'sitemap-in-robots', name: 'Sitemap Reference in robots.txt', category: 'crawlability', severity: 'warning' }, 'warn', 'robots.txt does not reference sitemap.xml', {
352
+ recommendation: 'Add sitemap location to robots.txt',
353
+ evidence: {
354
+ expected: 'Sitemap: https://example.com/sitemap.xml',
355
+ impact: 'Search engines may not discover your sitemap automatically',
356
+ learnMore: 'https://developers.google.com/search/docs/crawling-indexing/sitemaps/build-sitemap'
357
+ }
358
+ });
359
+ }
360
+ return createResult({ id: 'sitemap-in-robots', name: 'Sitemap Reference in robots.txt', category: 'crawlability', severity: 'warning' }, 'pass', 'Sitemap is referenced in robots.txt');
361
+ },
362
+ },
363
+ {
364
+ id: 'blocked-resources',
365
+ name: 'Blocked Resources',
366
+ category: 'crawlability',
367
+ severity: 'warning',
368
+ description: 'CSS/JS resources should not be blocked by robots.txt',
369
+ check: (ctx) => {
370
+ if (ctx.blockedResources === undefined) {
371
+ return createResult({ id: 'blocked-resources', name: 'Blocked Resources', category: 'crawlability', severity: 'warning' }, 'info', 'Not applicable (blocked resources information unavailable)', { recommendation: 'This rule checks for blocked resources when resource blocking information is available' });
372
+ }
373
+ if (ctx.blockedResources > 0) {
374
+ return createResult({ id: 'blocked-resources', name: 'Blocked Resources', category: 'crawlability', severity: 'warning' }, 'warn', `${ctx.blockedResources} resources blocked by robots.txt`, {
375
+ value: ctx.blockedResources,
376
+ recommendation: 'Update robots.txt to allow crawling of CSS and JS files',
377
+ evidence: {
378
+ found: `${ctx.blockedResources} blocked resources`,
379
+ expected: 'CSS, JS, and image files should be crawlable',
380
+ impact: 'Blocked resources prevent proper page rendering and indexing',
381
+ learnMore: 'https://developers.google.com/search/docs/crawling-indexing/robots/intro'
382
+ }
383
+ });
384
+ }
385
+ return createResult({ id: 'blocked-resources', name: 'Blocked Resources', category: 'crawlability', severity: 'warning' }, 'info', 'Not applicable (no resources blocked by robots.txt)', { recommendation: 'This rule checks for CSS/JS resources blocked by robots.txt' });
386
+ },
387
+ },
388
+ {
389
+ id: 'x-robots-tag-noindex',
390
+ name: 'X-Robots-Tag Noindex',
391
+ category: 'crawlability',
392
+ severity: 'warning',
393
+ description: 'X-Robots-Tag should not block important pages',
394
+ check: (ctx) => {
395
+ if (!ctx.xRobotsTag) {
396
+ return createResult({ id: 'x-robots-tag-noindex', name: 'X-Robots-Tag Noindex', category: 'crawlability', severity: 'warning' }, 'info', 'Not applicable (X-Robots-Tag header not present)', { recommendation: 'This rule checks X-Robots-Tag for noindex directives when the header is present' });
397
+ }
398
+ const tag = ctx.xRobotsTag.toLowerCase();
399
+ if (tag.includes('noindex')) {
400
+ return createResult({ id: 'x-robots-tag-noindex', name: 'X-Robots-Tag Noindex', category: 'crawlability', severity: 'warning' }, 'warn', 'Page blocked by X-Robots-Tag: noindex', {
401
+ value: ctx.xRobotsTag,
402
+ recommendation: 'Verify this page should not be indexed; remove X-Robots-Tag if unintentional',
403
+ evidence: {
404
+ found: `X-Robots-Tag: ${ctx.xRobotsTag}`,
405
+ impact: 'This page will not appear in search results'
406
+ }
407
+ });
408
+ }
409
+ return createResult({ id: 'x-robots-tag-noindex', name: 'X-Robots-Tag Noindex', category: 'crawlability', severity: 'warning' }, 'info', 'Not applicable (X-Robots-Tag does not contain noindex)', { recommendation: 'This rule checks for noindex in X-Robots-Tag header' });
410
+ },
411
+ },
412
+ {
413
+ id: 'blocked-external-resources',
414
+ name: 'Blocked External Resources',
415
+ category: 'crawlability',
416
+ severity: 'info',
417
+ description: 'External resources blocked by robots.txt may affect rendering',
418
+ check: (ctx) => {
419
+ if (ctx.blockedExternalResources === undefined) {
420
+ return createResult({ id: 'blocked-external-resources', name: 'Blocked External Resources', category: 'crawlability', severity: 'info' }, 'info', 'Not applicable (blocked external resources information unavailable)', { recommendation: 'This rule checks for blocked external resources when resource blocking information is available' });
421
+ }
422
+ if (ctx.blockedExternalResources > 0) {
423
+ return createResult({ id: 'blocked-external-resources', name: 'Blocked External Resources', category: 'crawlability', severity: 'info' }, 'info', `${ctx.blockedExternalResources} external resources blocked by robots.txt`, {
424
+ value: ctx.blockedExternalResources,
425
+ recommendation: 'Verify blocked resources are not critical for page rendering',
426
+ evidence: {
427
+ found: `${ctx.blockedExternalResources} blocked external resources`,
428
+ impact: 'If critical resources are blocked, search engines may not render pages correctly'
429
+ }
430
+ });
431
+ }
432
+ return createResult({ id: 'blocked-external-resources', name: 'Blocked External Resources', category: 'crawlability', severity: 'info' }, 'info', 'Not applicable (no external resources blocked by robots.txt)', { recommendation: 'This rule checks for external resources blocked by robots.txt' });
433
+ },
434
+ },
435
+ ];
@@ -0,0 +1,2 @@
1
+ import { SeoRule } from './types.js';
2
+ export declare const cwvRules: SeoRule[];