recker 1.0.43 → 1.0.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (459) hide show
  1. package/README.md +47 -0
  2. package/dist/bin/recker-linux-x64 +0 -0
  3. package/dist/bin/recker-macos-x64 +0 -0
  4. package/dist/bin/recker-win-x64.exe +0 -0
  5. package/dist/bin/rek.cjs +85152 -100207
  6. package/dist/browser/ai/adaptive-timeout.d.ts +50 -0
  7. package/dist/browser/ai/adaptive-timeout.js +208 -0
  8. package/dist/browser/ai/client.d.ts +22 -0
  9. package/dist/browser/ai/client.js +294 -0
  10. package/dist/browser/ai/index.d.ts +14 -0
  11. package/dist/browser/ai/index.js +11 -0
  12. package/dist/browser/ai/providers/anthropic.d.ts +63 -0
  13. package/dist/browser/ai/providers/anthropic.js +370 -0
  14. package/dist/browser/ai/providers/base.d.ts +48 -0
  15. package/dist/browser/ai/providers/base.js +150 -0
  16. package/dist/browser/ai/providers/google.d.ts +59 -0
  17. package/dist/browser/ai/providers/google.js +305 -0
  18. package/dist/browser/ai/providers/ollama.d.ts +44 -0
  19. package/dist/browser/ai/providers/ollama.js +240 -0
  20. package/dist/browser/ai/providers/openai.d.ts +64 -0
  21. package/dist/browser/ai/providers/openai.js +298 -0
  22. package/dist/browser/ai/rate-limiter.d.ts +43 -0
  23. package/dist/browser/ai/rate-limiter.js +215 -0
  24. package/dist/browser/ai/vector/index.d.ts +2 -0
  25. package/dist/browser/ai/vector/index.js +2 -0
  26. package/dist/browser/ai/vector/similarity.d.ts +2 -0
  27. package/dist/browser/ai/vector/similarity.js +27 -0
  28. package/dist/browser/ai/vector/store.d.ts +27 -0
  29. package/dist/browser/ai/vector/store.js +82 -0
  30. package/dist/browser/browser/cache.d.ts +2 -40
  31. package/dist/browser/browser/cache.js +2 -199
  32. package/dist/browser/browser/index.d.ts +8 -0
  33. package/dist/browser/browser/index.js +8 -0
  34. package/dist/browser/browser/recker.d.ts +8 -1
  35. package/dist/browser/browser/recker.js +8 -2
  36. package/dist/browser/cache/indexed-db.d.ts +10 -0
  37. package/dist/browser/cache/indexed-db.js +88 -0
  38. package/dist/browser/cache/service-worker-cache.d.ts +18 -0
  39. package/dist/browser/cache/service-worker-cache.js +103 -0
  40. package/dist/browser/cache.d.ts +2 -40
  41. package/dist/browser/cache.js +2 -199
  42. package/dist/browser/constants/user-agents.d.ts +7 -0
  43. package/dist/browser/constants/user-agents.js +7 -0
  44. package/dist/browser/core/client.d.ts +2 -0
  45. package/dist/browser/core/client.js +19 -1
  46. package/dist/browser/index.d.ts +8 -0
  47. package/dist/browser/index.js +8 -0
  48. package/dist/browser/plugins/har-recorder.d.ts +40 -0
  49. package/dist/browser/plugins/har-recorder.js +120 -0
  50. package/dist/browser/plugins/network-simulation.d.ts +7 -0
  51. package/dist/browser/plugins/network-simulation.js +13 -0
  52. package/dist/browser/presets/android.d.ts +2 -0
  53. package/dist/browser/presets/android.js +16 -0
  54. package/dist/browser/presets/anthropic.d.ts +8 -0
  55. package/dist/browser/presets/anthropic.js +27 -0
  56. package/dist/browser/presets/aws.d.ts +19 -0
  57. package/dist/browser/presets/aws.js +68 -0
  58. package/dist/browser/presets/azure-openai.d.ts +10 -0
  59. package/dist/browser/presets/azure-openai.js +35 -0
  60. package/dist/browser/presets/azure.d.ts +41 -0
  61. package/dist/browser/presets/azure.js +104 -0
  62. package/dist/browser/presets/chaturbate.d.ts +2 -0
  63. package/dist/browser/presets/chaturbate.js +17 -0
  64. package/dist/browser/presets/cloudflare.d.ts +12 -0
  65. package/dist/browser/presets/cloudflare.js +39 -0
  66. package/dist/browser/presets/cohere.d.ts +7 -0
  67. package/dist/browser/presets/cohere.js +22 -0
  68. package/dist/browser/presets/deepseek.d.ts +7 -0
  69. package/dist/browser/presets/deepseek.js +22 -0
  70. package/dist/browser/presets/digitalocean.d.ts +5 -0
  71. package/dist/browser/presets/digitalocean.js +16 -0
  72. package/dist/browser/presets/discord.d.ts +6 -0
  73. package/dist/browser/presets/discord.js +17 -0
  74. package/dist/browser/presets/elevenlabs.d.ts +6 -0
  75. package/dist/browser/presets/elevenlabs.js +20 -0
  76. package/dist/browser/presets/enhancers.d.ts +20 -0
  77. package/dist/browser/presets/enhancers.js +85 -0
  78. package/dist/browser/presets/fireworks.d.ts +7 -0
  79. package/dist/browser/presets/fireworks.js +22 -0
  80. package/dist/browser/presets/gcp.d.ts +34 -0
  81. package/dist/browser/presets/gcp.js +91 -0
  82. package/dist/browser/presets/gemini.d.ts +7 -0
  83. package/dist/browser/presets/gemini.js +23 -0
  84. package/dist/browser/presets/github.d.ts +6 -0
  85. package/dist/browser/presets/github.js +17 -0
  86. package/dist/browser/presets/gitlab.d.ts +6 -0
  87. package/dist/browser/presets/gitlab.js +16 -0
  88. package/dist/browser/presets/groq.d.ts +7 -0
  89. package/dist/browser/presets/groq.js +22 -0
  90. package/dist/browser/presets/hubspot.d.ts +9 -0
  91. package/dist/browser/presets/hubspot.js +28 -0
  92. package/dist/browser/presets/huggingface.d.ts +7 -0
  93. package/dist/browser/presets/huggingface.js +23 -0
  94. package/dist/browser/presets/index.d.ts +47 -0
  95. package/dist/browser/presets/index.js +47 -0
  96. package/dist/browser/presets/ios.d.ts +2 -0
  97. package/dist/browser/presets/ios.js +13 -0
  98. package/dist/browser/presets/linear.d.ts +5 -0
  99. package/dist/browser/presets/linear.js +16 -0
  100. package/dist/browser/presets/mailgun.d.ts +7 -0
  101. package/dist/browser/presets/mailgun.js +20 -0
  102. package/dist/browser/presets/meta.d.ts +10 -0
  103. package/dist/browser/presets/meta.js +33 -0
  104. package/dist/browser/presets/mistral.d.ts +7 -0
  105. package/dist/browser/presets/mistral.js +22 -0
  106. package/dist/browser/presets/notion.d.ts +6 -0
  107. package/dist/browser/presets/notion.js +17 -0
  108. package/dist/browser/presets/openai.d.ts +9 -0
  109. package/dist/browser/presets/openai.js +30 -0
  110. package/dist/browser/presets/oracle.d.ts +19 -0
  111. package/dist/browser/presets/oracle.js +117 -0
  112. package/dist/browser/presets/perplexity.d.ts +7 -0
  113. package/dist/browser/presets/perplexity.js +22 -0
  114. package/dist/browser/presets/pinecone.d.ts +8 -0
  115. package/dist/browser/presets/pinecone.js +42 -0
  116. package/dist/browser/presets/registry.d.ts +23 -0
  117. package/dist/browser/presets/registry.js +519 -0
  118. package/dist/browser/presets/replicate.d.ts +7 -0
  119. package/dist/browser/presets/replicate.js +23 -0
  120. package/dist/browser/presets/sendgrid.d.ts +6 -0
  121. package/dist/browser/presets/sendgrid.js +20 -0
  122. package/dist/browser/presets/sentry.d.ts +11 -0
  123. package/dist/browser/presets/sentry.js +48 -0
  124. package/dist/browser/presets/sinch.d.ts +9 -0
  125. package/dist/browser/presets/sinch.js +39 -0
  126. package/dist/browser/presets/slack.d.ts +5 -0
  127. package/dist/browser/presets/slack.js +16 -0
  128. package/dist/browser/presets/square.d.ts +10 -0
  129. package/dist/browser/presets/square.js +33 -0
  130. package/dist/browser/presets/stripe.d.ts +7 -0
  131. package/dist/browser/presets/stripe.js +23 -0
  132. package/dist/browser/presets/supabase.d.ts +6 -0
  133. package/dist/browser/presets/supabase.js +18 -0
  134. package/dist/browser/presets/tiktok.d.ts +10 -0
  135. package/dist/browser/presets/tiktok.js +38 -0
  136. package/dist/browser/presets/together.d.ts +7 -0
  137. package/dist/browser/presets/together.js +22 -0
  138. package/dist/browser/presets/twilio.d.ts +6 -0
  139. package/dist/browser/presets/twilio.js +17 -0
  140. package/dist/browser/presets/vercel.d.ts +6 -0
  141. package/dist/browser/presets/vercel.js +23 -0
  142. package/dist/browser/presets/vultr.d.ts +5 -0
  143. package/dist/browser/presets/vultr.js +16 -0
  144. package/dist/browser/presets/xai.d.ts +8 -0
  145. package/dist/browser/presets/xai.js +23 -0
  146. package/dist/browser/presets/youtube.d.ts +5 -0
  147. package/dist/browser/presets/youtube.js +20 -0
  148. package/dist/browser/recker.d.ts +8 -1
  149. package/dist/browser/recker.js +8 -2
  150. package/dist/browser/scrape/document.d.ts +5 -4
  151. package/dist/browser/scrape/document.js +89 -76
  152. package/dist/browser/scrape/element.d.ts +10 -8
  153. package/dist/browser/scrape/element.js +295 -81
  154. package/dist/browser/scrape/extractors.d.ts +11 -11
  155. package/dist/browser/scrape/extractors.js +145 -113
  156. package/dist/browser/scrape/parser/back.d.ts +1 -0
  157. package/dist/browser/scrape/parser/back.js +3 -0
  158. package/dist/browser/scrape/parser/index.d.ts +20 -0
  159. package/dist/browser/scrape/parser/index.js +19 -0
  160. package/dist/browser/scrape/parser/matcher.d.ts +30 -0
  161. package/dist/browser/scrape/parser/matcher.js +99 -0
  162. package/dist/browser/scrape/parser/nodes/comment.d.ts +12 -0
  163. package/dist/browser/scrape/parser/nodes/comment.js +21 -0
  164. package/dist/browser/scrape/parser/nodes/html.d.ts +110 -0
  165. package/dist/browser/scrape/parser/nodes/html.js +978 -0
  166. package/dist/browser/scrape/parser/nodes/node.d.ts +18 -0
  167. package/dist/browser/scrape/parser/nodes/node.js +31 -0
  168. package/dist/browser/scrape/parser/nodes/text.d.ts +14 -0
  169. package/dist/browser/scrape/parser/nodes/text.js +30 -0
  170. package/dist/browser/scrape/parser/nodes/type.d.ts +6 -0
  171. package/dist/browser/scrape/parser/nodes/type.js +7 -0
  172. package/dist/browser/scrape/parser/parse.d.ts +1 -0
  173. package/dist/browser/scrape/parser/parse.js +1 -0
  174. package/dist/browser/scrape/parser/valid.d.ts +2 -0
  175. package/dist/browser/scrape/parser/valid.js +5 -0
  176. package/dist/browser/scrape/parser/void-tag.d.ts +7 -0
  177. package/dist/browser/scrape/parser/void-tag.js +43 -0
  178. package/dist/browser/scrape/types.d.ts +7 -0
  179. package/dist/browser/seo/analyzer.d.ts +59 -0
  180. package/dist/browser/seo/analyzer.js +1399 -0
  181. package/dist/browser/seo/keywords.d.ts +16 -0
  182. package/dist/browser/seo/keywords.js +55 -0
  183. package/dist/browser/seo/rules/accessibility.d.ts +2 -0
  184. package/dist/browser/seo/rules/accessibility.js +733 -0
  185. package/dist/browser/seo/rules/ai-search.d.ts +2 -0
  186. package/dist/browser/seo/rules/ai-search.js +436 -0
  187. package/dist/browser/seo/rules/analytics.d.ts +2 -0
  188. package/dist/browser/seo/rules/analytics.js +306 -0
  189. package/dist/browser/seo/rules/best-practices.d.ts +2 -0
  190. package/dist/browser/seo/rules/best-practices.js +195 -0
  191. package/dist/browser/seo/rules/canonical.d.ts +12 -0
  192. package/dist/browser/seo/rules/canonical.js +270 -0
  193. package/dist/browser/seo/rules/content.d.ts +2 -0
  194. package/dist/browser/seo/rules/content.js +522 -0
  195. package/dist/browser/seo/rules/crawl.d.ts +2 -0
  196. package/dist/browser/seo/rules/crawl.js +435 -0
  197. package/dist/browser/seo/rules/cwv.d.ts +2 -0
  198. package/dist/browser/seo/rules/cwv.js +248 -0
  199. package/dist/browser/seo/rules/ecommerce.d.ts +2 -0
  200. package/dist/browser/seo/rules/ecommerce.js +312 -0
  201. package/dist/browser/seo/rules/i18n.d.ts +2 -0
  202. package/dist/browser/seo/rules/i18n.js +288 -0
  203. package/dist/browser/seo/rules/images.d.ts +2 -0
  204. package/dist/browser/seo/rules/images.js +255 -0
  205. package/dist/browser/seo/rules/index.d.ts +52 -0
  206. package/dist/browser/seo/rules/index.js +159 -0
  207. package/dist/browser/seo/rules/internal-linking.d.ts +2 -0
  208. package/dist/browser/seo/rules/internal-linking.js +394 -0
  209. package/dist/browser/seo/rules/links.d.ts +2 -0
  210. package/dist/browser/seo/rules/links.js +498 -0
  211. package/dist/browser/seo/rules/local.d.ts +2 -0
  212. package/dist/browser/seo/rules/local.js +289 -0
  213. package/dist/browser/seo/rules/meta.d.ts +2 -0
  214. package/dist/browser/seo/rules/meta.js +805 -0
  215. package/dist/browser/seo/rules/mobile.d.ts +2 -0
  216. package/dist/browser/seo/rules/mobile.js +161 -0
  217. package/dist/browser/seo/rules/performance.d.ts +2 -0
  218. package/dist/browser/seo/rules/performance.js +738 -0
  219. package/dist/browser/seo/rules/pwa.d.ts +2 -0
  220. package/dist/browser/seo/rules/pwa.js +299 -0
  221. package/dist/browser/seo/rules/readability.d.ts +2 -0
  222. package/dist/browser/seo/rules/readability.js +264 -0
  223. package/dist/browser/seo/rules/redirects.d.ts +16 -0
  224. package/dist/browser/seo/rules/redirects.js +199 -0
  225. package/dist/browser/seo/rules/resources.d.ts +2 -0
  226. package/dist/browser/seo/rules/resources.js +390 -0
  227. package/dist/browser/seo/rules/schema.d.ts +2 -0
  228. package/dist/browser/seo/rules/schema.js +379 -0
  229. package/dist/browser/seo/rules/security.d.ts +2 -0
  230. package/dist/browser/seo/rules/security.js +877 -0
  231. package/dist/browser/seo/rules/social.d.ts +2 -0
  232. package/dist/browser/seo/rules/social.js +603 -0
  233. package/dist/browser/seo/rules/structural.d.ts +2 -0
  234. package/dist/browser/seo/rules/structural.js +223 -0
  235. package/dist/browser/seo/rules/technical-advanced.d.ts +10 -0
  236. package/dist/browser/seo/rules/technical-advanced.js +289 -0
  237. package/dist/browser/seo/rules/technical.d.ts +2 -0
  238. package/dist/browser/seo/rules/technical.js +480 -0
  239. package/dist/browser/seo/rules/thresholds.d.ts +196 -0
  240. package/dist/browser/seo/rules/thresholds.js +118 -0
  241. package/dist/browser/seo/rules/types.d.ts +498 -0
  242. package/dist/browser/seo/rules/types.js +11 -0
  243. package/dist/browser/seo/types.d.ts +211 -0
  244. package/dist/browser/seo/types.js +1 -0
  245. package/dist/browser/transport/curl.d.ts +4 -0
  246. package/dist/browser/transport/curl.js +101 -0
  247. package/dist/browser/transport/undici.js +1 -2
  248. package/dist/browser/transport/worker.d.ts +18 -0
  249. package/dist/browser/transport/worker.js +278 -0
  250. package/dist/browser/types/index.d.ts +4 -1
  251. package/dist/browser/utils/binary-manager.d.ts +4 -0
  252. package/dist/browser/utils/binary-manager.js +72 -0
  253. package/dist/browser/utils/user-agent.js +2 -13
  254. package/dist/cache/indexed-db.d.ts +10 -0
  255. package/dist/cache/indexed-db.js +88 -0
  256. package/dist/cache/service-worker-cache.d.ts +18 -0
  257. package/dist/cache/service-worker-cache.js +103 -0
  258. package/dist/cli/commands/ai.d.ts +2 -0
  259. package/dist/cli/commands/ai.js +162 -0
  260. package/dist/cli/commands/bench.d.ts +2 -0
  261. package/dist/cli/commands/bench.js +51 -0
  262. package/dist/cli/commands/dns.d.ts +2 -0
  263. package/dist/cli/commands/dns.js +295 -0
  264. package/dist/cli/commands/har.d.ts +2 -0
  265. package/dist/cli/commands/har.js +171 -0
  266. package/dist/cli/commands/hls.d.ts +2 -0
  267. package/dist/cli/commands/hls.js +192 -0
  268. package/dist/cli/commands/network.d.ts +2 -0
  269. package/dist/cli/commands/network.js +288 -0
  270. package/dist/cli/commands/protocols.d.ts +2 -0
  271. package/dist/cli/commands/protocols.js +344 -0
  272. package/dist/cli/commands/scrape.d.ts +2 -0
  273. package/dist/cli/commands/scrape.js +176 -0
  274. package/dist/cli/commands/security.d.ts +2 -0
  275. package/dist/cli/commands/security.js +57 -0
  276. package/dist/cli/commands/seo.d.ts +2 -0
  277. package/dist/cli/commands/seo.js +125 -0
  278. package/dist/cli/commands/serve.d.ts +2 -0
  279. package/dist/cli/commands/serve.js +531 -0
  280. package/dist/cli/commands/spider.d.ts +3 -0
  281. package/dist/cli/commands/spider.js +456 -0
  282. package/dist/cli/commands/utils.d.ts +2 -0
  283. package/dist/cli/commands/utils.js +176 -0
  284. package/dist/cli/commands/vector.d.ts +2 -0
  285. package/dist/cli/commands/vector.js +158 -0
  286. package/dist/cli/handler.d.ts +2 -2
  287. package/dist/cli/handler.js +6 -6
  288. package/dist/cli/helpers.d.ts +7 -0
  289. package/dist/cli/helpers.js +128 -0
  290. package/dist/cli/index.js +96 -5228
  291. package/dist/cli/parser/help.d.ts +2 -0
  292. package/dist/cli/parser/help.js +52 -0
  293. package/dist/cli/parser/index.d.ts +3 -0
  294. package/dist/cli/parser/index.js +3 -0
  295. package/dist/cli/parser/parser.d.ts +4 -0
  296. package/dist/cli/parser/parser.js +146 -0
  297. package/dist/cli/parser/types.d.ts +41 -0
  298. package/dist/cli/parser/types.js +1 -0
  299. package/dist/cli/presets.d.ts +1 -1
  300. package/dist/cli/presets.js +1 -1
  301. package/dist/cli/router.d.ts +36 -0
  302. package/dist/cli/router.js +195 -0
  303. package/dist/cli/tui/ai-chat.js +1 -1
  304. package/dist/cli/tui/commands/context.d.ts +9 -0
  305. package/dist/cli/tui/commands/context.js +1 -0
  306. package/dist/cli/tui/commands/dns.d.ts +10 -0
  307. package/dist/cli/tui/commands/dns.js +461 -0
  308. package/dist/cli/tui/commands/hls.d.ts +2 -0
  309. package/dist/cli/tui/commands/hls.js +162 -0
  310. package/dist/cli/tui/commands/ip.d.ts +2 -0
  311. package/dist/cli/tui/commands/ip.js +45 -0
  312. package/dist/cli/tui/commands/network.d.ts +3 -0
  313. package/dist/cli/tui/commands/network.js +81 -0
  314. package/dist/cli/tui/commands/protocols.d.ts +6 -0
  315. package/dist/cli/tui/commands/protocols.js +531 -0
  316. package/dist/cli/tui/commands/security.d.ts +2 -0
  317. package/dist/cli/tui/commands/security.js +48 -0
  318. package/dist/cli/tui/commands/seo.d.ts +2 -0
  319. package/dist/cli/tui/commands/seo.js +74 -0
  320. package/dist/cli/tui/context.d.ts +12 -0
  321. package/dist/cli/tui/context.js +1 -0
  322. package/dist/cli/tui/shell.d.ts +11 -20
  323. package/dist/cli/tui/shell.js +216 -1873
  324. package/dist/constants/user-agents.d.ts +7 -0
  325. package/dist/constants/user-agents.js +7 -0
  326. package/dist/core/client.d.ts +2 -0
  327. package/dist/core/client.js +19 -1
  328. package/dist/index.d.ts +1 -0
  329. package/dist/index.js +1 -0
  330. package/dist/mcp/cli.js +2 -3
  331. package/dist/mcp/data/embeddings.json +1 -1
  332. package/dist/mcp/tools/network.js +298 -158
  333. package/dist/plugins/har-player.d.ts +23 -0
  334. package/dist/plugins/har-player.js +49 -0
  335. package/dist/plugins/har-recorder.d.ts +37 -3
  336. package/dist/plugins/har-recorder.js +116 -63
  337. package/dist/plugins/network-simulation.d.ts +7 -0
  338. package/dist/plugins/network-simulation.js +13 -0
  339. package/dist/presets/android.d.ts +2 -0
  340. package/dist/presets/android.js +16 -0
  341. package/dist/presets/chaturbate.d.ts +2 -0
  342. package/dist/presets/chaturbate.js +17 -0
  343. package/dist/presets/elevenlabs.d.ts +6 -0
  344. package/dist/presets/elevenlabs.js +20 -0
  345. package/dist/presets/enhancers.d.ts +20 -0
  346. package/dist/presets/enhancers.js +85 -0
  347. package/dist/presets/hubspot.d.ts +9 -0
  348. package/dist/presets/hubspot.js +28 -0
  349. package/dist/presets/index.d.ts +10 -0
  350. package/dist/presets/index.js +10 -0
  351. package/dist/presets/ios.d.ts +2 -0
  352. package/dist/presets/ios.js +13 -0
  353. package/dist/presets/pinecone.d.ts +8 -0
  354. package/dist/presets/pinecone.js +42 -0
  355. package/dist/presets/registry.js +60 -0
  356. package/dist/presets/sendgrid.d.ts +6 -0
  357. package/dist/presets/sendgrid.js +20 -0
  358. package/dist/presets/sentry.d.ts +11 -0
  359. package/dist/presets/sentry.js +48 -0
  360. package/dist/presets/square.d.ts +10 -0
  361. package/dist/presets/square.js +33 -0
  362. package/dist/recker.d.ts +3 -0
  363. package/dist/recker.js +4 -0
  364. package/dist/scrape/document.d.ts +5 -4
  365. package/dist/scrape/document.js +89 -76
  366. package/dist/scrape/element.d.ts +10 -8
  367. package/dist/scrape/element.js +295 -81
  368. package/dist/scrape/extractors.d.ts +11 -11
  369. package/dist/scrape/extractors.js +145 -113
  370. package/dist/scrape/index.d.ts +2 -0
  371. package/dist/scrape/index.js +1 -0
  372. package/dist/scrape/parser/back.d.ts +1 -0
  373. package/dist/scrape/parser/back.js +3 -0
  374. package/dist/scrape/parser/index.d.ts +20 -0
  375. package/dist/scrape/parser/index.js +19 -0
  376. package/dist/scrape/parser/matcher.d.ts +30 -0
  377. package/dist/scrape/parser/matcher.js +99 -0
  378. package/dist/scrape/parser/nodes/comment.d.ts +12 -0
  379. package/dist/scrape/parser/nodes/comment.js +21 -0
  380. package/dist/scrape/parser/nodes/html.d.ts +110 -0
  381. package/dist/scrape/parser/nodes/html.js +978 -0
  382. package/dist/scrape/parser/nodes/node.d.ts +18 -0
  383. package/dist/scrape/parser/nodes/node.js +31 -0
  384. package/dist/scrape/parser/nodes/text.d.ts +14 -0
  385. package/dist/scrape/parser/nodes/text.js +30 -0
  386. package/dist/scrape/parser/nodes/type.d.ts +6 -0
  387. package/dist/scrape/parser/nodes/type.js +7 -0
  388. package/dist/scrape/parser/parse.d.ts +1 -0
  389. package/dist/scrape/parser/parse.js +1 -0
  390. package/dist/scrape/parser/valid.d.ts +2 -0
  391. package/dist/scrape/parser/valid.js +5 -0
  392. package/dist/scrape/parser/void-tag.d.ts +7 -0
  393. package/dist/scrape/parser/void-tag.js +43 -0
  394. package/dist/scrape/spider.d.ts +19 -0
  395. package/dist/scrape/spider.js +28 -3
  396. package/dist/scrape/types.d.ts +7 -0
  397. package/dist/seo/analyzer.d.ts +15 -5
  398. package/dist/seo/analyzer.js +636 -175
  399. package/dist/seo/formatter.d.ts +16 -0
  400. package/dist/seo/formatter.js +228 -0
  401. package/dist/seo/index.d.ts +2 -0
  402. package/dist/seo/index.js +1 -0
  403. package/dist/seo/keywords.d.ts +16 -0
  404. package/dist/seo/keywords.js +55 -0
  405. package/dist/seo/rules/accessibility.js +96 -57
  406. package/dist/seo/rules/ai-search.js +44 -31
  407. package/dist/seo/rules/analytics.d.ts +2 -0
  408. package/dist/seo/rules/analytics.js +306 -0
  409. package/dist/seo/rules/best-practices.js +21 -14
  410. package/dist/seo/rules/canonical.js +53 -32
  411. package/dist/seo/rules/content.js +317 -31
  412. package/dist/seo/rules/crawl.js +55 -40
  413. package/dist/seo/rules/cwv.js +21 -15
  414. package/dist/seo/rules/ecommerce.js +82 -22
  415. package/dist/seo/rules/i18n.js +75 -36
  416. package/dist/seo/rules/images.js +109 -30
  417. package/dist/seo/rules/index.js +2 -0
  418. package/dist/seo/rules/internal-linking.js +58 -39
  419. package/dist/seo/rules/links.js +79 -52
  420. package/dist/seo/rules/local.js +49 -25
  421. package/dist/seo/rules/meta.js +339 -81
  422. package/dist/seo/rules/mobile.js +112 -2
  423. package/dist/seo/rules/performance.js +434 -66
  424. package/dist/seo/rules/pwa.js +36 -39
  425. package/dist/seo/rules/readability.js +31 -22
  426. package/dist/seo/rules/redirects.js +21 -15
  427. package/dist/seo/rules/resources.js +59 -42
  428. package/dist/seo/rules/schema.js +333 -8
  429. package/dist/seo/rules/security.js +142 -80
  430. package/dist/seo/rules/social.js +277 -47
  431. package/dist/seo/rules/structural.js +87 -19
  432. package/dist/seo/rules/technical-advanced.js +30 -24
  433. package/dist/seo/rules/technical.js +243 -42
  434. package/dist/seo/rules/types.d.ts +53 -1
  435. package/dist/seo/seo-spider.d.ts +22 -0
  436. package/dist/seo/seo-spider.js +77 -13
  437. package/dist/seo/types.d.ts +8 -1
  438. package/dist/seo/validators/llms-txt.js +19 -0
  439. package/dist/seo/validators/rss.d.ts +11 -0
  440. package/dist/seo/validators/rss.js +93 -0
  441. package/dist/seo/validators/sitemap.js +36 -26
  442. package/dist/transport/curl.d.ts +4 -0
  443. package/dist/transport/curl.js +101 -0
  444. package/dist/transport/udp.js +0 -1
  445. package/dist/transport/undici.js +1 -2
  446. package/dist/transport/worker.d.ts +18 -0
  447. package/dist/transport/worker.js +278 -0
  448. package/dist/types/index.d.ts +4 -1
  449. package/dist/utils/binary-manager.d.ts +4 -0
  450. package/dist/utils/binary-manager.js +72 -0
  451. package/dist/utils/optional-require.d.ts +7 -8
  452. package/dist/utils/optional-require.js +2 -21
  453. package/dist/utils/upload.d.ts +6 -0
  454. package/dist/utils/upload.js +11 -0
  455. package/dist/utils/user-agent.js +2 -13
  456. package/dist/version.js +1 -1
  457. package/package.json +12 -6
  458. package/dist/browser/utils/optional-require.d.ts +0 -19
  459. package/dist/browser/utils/optional-require.js +0 -105
@@ -0,0 +1,522 @@
1
+ import { createResult } from './types.js';
2
+ import { SEO_THRESHOLDS } from './thresholds.js';
3
+ export const contentRules = [
4
+ {
5
+ id: 'content-depth-word-count',
6
+ name: 'Content Depth (Word Count)',
7
+ category: 'content',
8
+ severity: 'warning',
9
+ description: 'Page should meet minimum word count for its purpose.',
10
+ check: (ctx) => {
11
+ if (ctx.wordCount === undefined) {
12
+ return createResult({ id: 'content-depth-word-count', name: 'Content Depth (Word Count)', category: 'content', severity: 'warning' }, 'info', 'Not applicable (word count not available)', { recommendation: 'This rule checks content depth based on word count when available' });
13
+ }
14
+ const { minWordsSimple, minWordsRanking, minWordsAuthority } = SEO_THRESHOLDS.content;
15
+ const veryThinWords = SEO_THRESHOLDS.thinContent.veryThinWords;
16
+ if (ctx.wordCount < veryThinWords) {
17
+ return createResult({ id: 'content-depth-word-count', name: 'Content Depth', category: 'content', severity: 'error' }, 'fail', `Very thin content (${ctx.wordCount} words, min: ${veryThinWords})`, {
18
+ recommendation: 'Add more substantial content (at least 150-300 words).',
19
+ evidence: {
20
+ found: `${ctx.wordCount} words on page`,
21
+ expected: `At least ${veryThinWords} words for minimal content`,
22
+ impact: 'Search engines may consider very thin content as low-quality and not rank it well. Users expect more comprehensive information.',
23
+ example: 'Pages with <100 words are often flagged as thin content by Google and may be penalized in rankings.',
24
+ },
25
+ });
26
+ }
27
+ if (ctx.wordCount < minWordsSimple) {
28
+ return createResult({ id: 'content-depth-word-count', name: 'Content Depth', category: 'content', severity: 'warning' }, 'warn', `Thin content (${ctx.wordCount} words, min for simple: ${minWordsSimple})`, {
29
+ recommendation: `Consider expanding to at least ${minWordsSimple} words for simple pages, or more for ranking.`,
30
+ evidence: {
31
+ found: `${ctx.wordCount} words on page`,
32
+ expected: `At least ${minWordsSimple} words for simple pages, ${minWordsRanking}+ for competitive ranking`,
33
+ impact: 'Thin content may rank poorly for competitive keywords. More comprehensive content typically performs better.',
34
+ example: 'Simple landing pages need 300+ words, blog posts need 600+ for ranking potential.',
35
+ },
36
+ });
37
+ }
38
+ if (ctx.wordCount < minWordsRanking) {
39
+ return createResult({ id: 'content-depth-word-count', name: 'Content Depth', category: 'content', severity: 'info' }, 'warn', `Content may be too short to rank (${ctx.wordCount} words, min for ranking: ${minWordsRanking})`, {
40
+ recommendation: `Expand content to at least ${minWordsRanking} words for competitive keywords.`,
41
+ evidence: {
42
+ found: `${ctx.wordCount} words on page`,
43
+ expected: `At least ${minWordsRanking} words for competitive ranking`,
44
+ impact: 'Pages competing for competitive keywords typically need 600-1000+ words. Shorter content may not rank in top positions.',
45
+ example: 'Top-ranking blog posts average 1,500-2,500 words. Product pages need 600-1,000 words.',
46
+ },
47
+ });
48
+ }
49
+ if (ctx.wordCount < minWordsAuthority) {
50
+ return createResult({ id: 'content-depth-word-count', name: 'Content Depth', category: 'content', severity: 'info' }, 'info', `Content is not authority-level (${ctx.wordCount} words, min for authority: ${minWordsAuthority})`, { recommendation: `For authority content, aim for ${minWordsAuthority} words or more.` });
51
+ }
52
+ return createResult({ id: 'content-depth-word-count', name: 'Content Depth', category: 'content', severity: 'info' }, 'pass', `Good content depth (${ctx.wordCount} words)`, { value: ctx.wordCount });
53
+ },
54
+ },
55
+ {
56
+ id: 'content-readability-sentence-length',
57
+ name: 'Readability (Sentence Length)',
58
+ category: 'content',
59
+ severity: 'info',
60
+ description: 'Sentences should average under 25 words for better readability.',
61
+ check: (ctx) => {
62
+ if (ctx.avgWordsPerSentence === undefined) {
63
+ return createResult({ id: 'content-readability-sentence-length', name: 'Readability (Sentence Length)', category: 'content', severity: 'info' }, 'info', 'Not applicable (sentence metrics not available)', { recommendation: 'This rule checks average sentence length when metrics are available' });
64
+ }
65
+ const max = SEO_THRESHOLDS.content.maxWordsPerSentence;
66
+ if (ctx.avgWordsPerSentence > max) {
67
+ return createResult({ id: 'content-readability-sentence-length', name: 'Readability', category: 'content', severity: 'info' }, 'warn', `Long sentences (avg ${ctx.avgWordsPerSentence} words/sentence)`, {
68
+ value: ctx.avgWordsPerSentence,
69
+ recommendation: `Aim for under ${max} words per sentence for better readability.`,
70
+ evidence: {
71
+ found: `Average sentence length: ${ctx.avgWordsPerSentence.toFixed(1)} words`,
72
+ expected: `Target: under ${max} words per sentence`,
73
+ impact: 'Long sentences reduce readability, especially on mobile. They can increase bounce rates and decrease user engagement.',
74
+ example: 'Break up complex sentences: "Our service helps businesses grow by providing tools, analytics, and support" → "Our service helps businesses grow. We provide tools, analytics, and support."',
75
+ },
76
+ });
77
+ }
78
+ return createResult({ id: 'content-readability-sentence-length', name: 'Readability', category: 'content', severity: 'info' }, 'pass', `Good sentence length (avg ${ctx.avgWordsPerSentence} words)`, { value: ctx.avgWordsPerSentence });
79
+ },
80
+ },
81
+ {
82
+ id: 'content-paragraph-length',
83
+ name: 'Paragraph Length',
84
+ category: 'content',
85
+ severity: 'info',
86
+ description: 'Paragraphs should be concise (ideal 40-90 words) for mobile readability.',
87
+ check: (ctx) => {
88
+ if (!ctx.paragraphWordCounts || ctx.paragraphWordCounts.length === 0) {
89
+ return createResult({ id: 'content-paragraph-length', name: 'Paragraph Length', category: 'content', severity: 'info' }, 'info', 'Not applicable (paragraph data not available)', { recommendation: 'This rule checks paragraph length when paragraph data is available' });
90
+ }
91
+ const { minWordsPerParagraph, maxWordsPerParagraph } = SEO_THRESHOLDS.content;
92
+ let tooShort = 0;
93
+ let tooLong = 0;
94
+ ctx.paragraphWordCounts.forEach(count => {
95
+ if (count < minWordsPerParagraph)
96
+ tooShort++;
97
+ if (count > maxWordsPerParagraph)
98
+ tooLong++;
99
+ });
100
+ if (tooShort > 0 || tooLong > 0) {
101
+ let message = '';
102
+ const recs = [];
103
+ if (tooShort > 0) {
104
+ message += `${tooShort} paragraph(s) are too short (min: ${minWordsPerParagraph} words). `;
105
+ recs.push('Expand short paragraphs.');
106
+ }
107
+ if (tooLong > 0) {
108
+ message += `${tooLong} paragraph(s) are too long (max: ${maxWordsPerParagraph} words).`;
109
+ recs.push('Break long paragraphs into smaller ones.');
110
+ }
111
+ return createResult({ id: 'content-paragraph-length', name: 'Paragraph Length', category: 'content', severity: 'warning' }, 'warn', message.trim(), {
112
+ recommendation: `Aim for paragraphs between ${minWordsPerParagraph}-${maxWordsPerParagraph} words. ${recs.join(' ')}`,
113
+ evidence: {
114
+ found: `${tooShort} paragraph(s) too short, ${tooLong} paragraph(s) too long`,
115
+ expected: `All paragraphs should be ${minWordsPerParagraph}-${maxWordsPerParagraph} words`,
116
+ impact: 'Very short paragraphs appear incomplete. Very long paragraphs are hard to read on mobile and reduce engagement.',
117
+ example: '<p>Short.</p> is too brief. <p>Very long paragraph with 200+ words that goes on and on...</p> should be broken into 2-3 smaller paragraphs.',
118
+ },
119
+ });
120
+ }
121
+ return createResult({ id: 'content-paragraph-length', name: 'Paragraph Length', category: 'content', severity: 'warning' }, 'info', 'Not applicable (paragraph lengths are appropriate)', { recommendation: 'This rule checks for paragraphs that are too short or too long' });
122
+ },
123
+ },
124
+ {
125
+ id: 'content-lists-presence',
126
+ name: 'Lists Usage',
127
+ category: 'content',
128
+ severity: 'info',
129
+ description: 'Use lists (ul/ol) to improve readability and SGE compatibility.',
130
+ check: (ctx) => {
131
+ if (ctx.listCount === undefined || ctx.listCount === 0) {
132
+ return createResult({ id: 'content-lists-presence', name: 'Lists Usage', category: 'content', severity: 'info' }, 'info', 'No lists (ul/ol) found', { recommendation: 'Consider using bullet points or numbered lists for better scannability and AI summarization.' });
133
+ }
134
+ return createResult({ id: 'content-lists-presence', name: 'Lists Usage', category: 'content', severity: 'info' }, 'info', 'Not applicable (page has lists)', { recommendation: 'This rule checks for the presence of ul/ol lists' });
135
+ },
136
+ },
137
+ {
138
+ id: 'content-subheading-frequency',
139
+ name: 'Subheading Frequency',
140
+ category: 'content',
141
+ severity: 'info',
142
+ description: 'Subheadings (H2/H3) should be used frequently to break up content.',
143
+ check: (ctx) => {
144
+ const idealFrequencyPer100Words = 0.5;
145
+ if (ctx.wordCount && ctx.wordCount > 300 && (ctx.subheadingFrequency ?? 0) < idealFrequencyPer100Words) {
146
+ return createResult({ id: 'content-subheading-frequency', name: 'Subheading Frequency', category: 'content', severity: 'info' }, 'warn', `Low subheading frequency (${(ctx.subheadingFrequency ?? 0).toFixed(2)} per 100 words)`, {
147
+ recommendation: `Add more subheadings (H2/H3) to break up long text blocks.`,
148
+ evidence: {
149
+ found: `${(ctx.subheadingFrequency ?? 0).toFixed(2)} subheadings per 100 words`,
150
+ expected: `At least ${idealFrequencyPer100Words} subheadings per 100 words (roughly 1 every 200 words)`,
151
+ impact: 'Insufficient subheadings make content difficult to scan. Users and search engines prefer well-structured, scannable content.',
152
+ example: '<h2>Main Topic</h2><p>300+ words...</p> → Add <h3> subheadings every 150-200 words to improve structure.',
153
+ },
154
+ });
155
+ }
156
+ return createResult({ id: 'content-subheading-frequency', name: 'Subheading Frequency', category: 'content', severity: 'info' }, 'info', 'Not applicable (subheading frequency is adequate or content is short)', { recommendation: 'This rule checks subheading frequency for long content' });
157
+ },
158
+ },
159
+ {
160
+ id: 'content-emphasis-tags',
161
+ name: 'Emphasis Tags Usage',
162
+ category: 'content',
163
+ severity: 'info',
164
+ description: 'Use strong/em tags moderately for emphasis, avoid keyword stuffing.',
165
+ check: (ctx) => {
166
+ const totalEmphasisTags = (ctx.strongTagCount ?? 0) + (ctx.emTagCount ?? 0);
167
+ if (ctx.wordCount && ctx.wordCount > 200 && totalEmphasisTags === 0) {
168
+ return createResult({ id: 'content-emphasis-tags', name: 'Emphasis Tags Usage', category: 'content', severity: 'info' }, 'info', 'No strong/em tags found in substantial content', { recommendation: 'Use <strong> or <em> tags to highlight important keywords or phrases.' });
169
+ }
170
+ const emphasisRatio = totalEmphasisTags / (ctx.wordCount || 1);
171
+ const maxEmphasisRatio = 0.05;
172
+ if (ctx.wordCount && ctx.wordCount > 100 && emphasisRatio > maxEmphasisRatio) {
173
+ return createResult({ id: 'content-emphasis-tags', name: 'Emphasis Tags Usage', category: 'content', severity: 'warning' }, 'warn', `Potentially excessive emphasis tags (${totalEmphasisTags} tags for ${ctx.wordCount} words)`, {
174
+ recommendation: 'Moderate the use of <strong> and <em> tags to avoid over-optimization.',
175
+ evidence: {
176
+ found: `${totalEmphasisTags} emphasis tags in ${ctx.wordCount} words (${(emphasisRatio * 100).toFixed(1)}% of content)`,
177
+ expected: `Less than ${(maxEmphasisRatio * 100).toFixed(0)}% of content should be emphasized`,
178
+ impact: 'Excessive emphasis can be seen as keyword stuffing and may trigger spam filters. It also reduces the impact of actual important content.',
179
+ example: '<strong>best</strong> <strong>product</strong> for <strong>keyword</strong> → Use emphasis sparingly on truly important terms only.',
180
+ },
181
+ });
182
+ }
183
+ return createResult({ id: 'content-emphasis-tags', name: 'Emphasis Tags Usage', category: 'content', severity: 'info' }, 'info', 'Not applicable (emphasis tag usage is appropriate)', { recommendation: 'This rule checks for proper use of strong/em tags' });
184
+ },
185
+ },
186
+ {
187
+ id: 'multimedia-video-audio',
188
+ name: 'Multimedia Content',
189
+ category: 'content',
190
+ severity: 'info',
191
+ description: 'Rich content with videos and audio can enhance user experience.',
192
+ check: (ctx) => {
193
+ const totalMultimedia = (ctx.videoCount ?? 0) + (ctx.audioCount ?? 0);
194
+ if (totalMultimedia === 0 && ctx.wordCount && ctx.wordCount > 500) {
195
+ return createResult({ id: 'multimedia-video-audio', name: 'Multimedia Content', category: 'content', severity: 'info' }, 'info', 'No video or audio elements found for substantial content', { recommendation: 'Consider adding relevant videos, audio, or other rich media to engage users.' });
196
+ }
197
+ return createResult({ id: 'multimedia-video-audio', name: 'Multimedia Content', category: 'content', severity: 'info' }, 'info', 'Not applicable (page has multimedia or content is short)', { recommendation: 'This rule checks for video/audio elements in substantial content' });
198
+ },
199
+ },
200
+ {
201
+ id: 'content-sge-optimization',
202
+ name: 'AI Overview (SGE) Optimization',
203
+ category: 'content',
204
+ severity: 'info',
205
+ description: 'Optimize content for Google AI Overviews by using question-based headings and clear lists.',
206
+ check: (ctx) => {
207
+ if (ctx.wordCount && ctx.wordCount > 300) {
208
+ let messages = [];
209
+ let recommendation = '';
210
+ if (!ctx.hasQuestionHeadings) {
211
+ messages.push('No question-based headings (H2/H3) found.');
212
+ recommendation += 'Use H2/H3 headings that phrase common questions (e.g., "What is X?", "How to do Y?"). ';
213
+ }
214
+ if (ctx.listCount === 0) {
215
+ messages.push('No lists (ul/ol) found.');
216
+ recommendation += 'Incorporate clear numbered or bulleted lists for steps, features, or summaries.';
217
+ }
218
+ if (messages.length > 0) {
219
+ return createResult({ id: 'content-sge-optimization', name: 'AI Overview Optimization', category: 'content', severity: 'info' }, 'info', `Content could be better optimized for AI Overviews: ${messages.join(' ')}`, { recommendation: recommendation.trim() });
220
+ }
221
+ }
222
+ return createResult({ id: 'content-sge-optimization', name: 'AI Overview (SGE) Optimization', category: 'content', severity: 'info' }, 'info', 'Not applicable (content is optimized for AI Overviews or content is short)', { recommendation: 'This rule checks for question-based headings and lists for AI compatibility' });
223
+ },
224
+ },
225
+ {
226
+ id: 'content-flesch-readability',
227
+ name: 'Flesch Reading Ease Score',
228
+ category: 'content',
229
+ severity: 'info',
230
+ description: 'Measures readability, aiming for a Flesch score above 60 for broad audiences.',
231
+ check: (ctx) => {
232
+ if (ctx.fleschReadingEase === undefined) {
233
+ return createResult({ id: 'content-flesch-readability', name: 'Flesch Reading Ease', category: 'content', severity: 'info' }, 'info', 'Flesch Reading Ease score could not be calculated.', { recommendation: 'To enable Flesch score analysis, ensure the analyzer has capabilities to count syllables per word.' });
234
+ }
235
+ const score = ctx.fleschReadingEase;
236
+ if (score < 60) {
237
+ return createResult({ id: 'content-flesch-readability', name: 'Flesch Reading Ease', category: 'content', severity: 'warning' }, 'warn', `Low Flesch Reading Ease score: ${score.toFixed(2)} (target > 60)`, {
238
+ value: score,
239
+ recommendation: 'Simplify sentence structure and vocabulary to improve readability for a broader audience.',
240
+ evidence: {
241
+ found: `Flesch Reading Ease score: ${score.toFixed(1)}`,
242
+ expected: 'Target score: 60+ (plain English, 8th-9th grade level)',
243
+ impact: 'Low readability scores indicate difficult text. Most web users prefer content at 8th-9th grade reading level. Difficult content increases bounce rates.',
244
+ example: 'Score 0-30: College graduate. 60-70: 8th-9th grade. 90-100: 5th grade. Aim for 60+ for broad appeal.',
245
+ },
246
+ });
247
+ }
248
+ return createResult({ id: 'content-flesch-readability', name: 'Flesch Reading Ease', category: 'content', severity: 'info' }, 'pass', `Good Flesch Reading Ease score: ${score.toFixed(2)}`, { value: score });
249
+ },
250
+ },
251
+ {
252
+ id: 'content-faq-mandatory',
253
+ name: 'Mandatory FAQ Section',
254
+ category: 'content',
255
+ severity: 'info',
256
+ description: 'For comprehensive content, an FAQ section is recommended.',
257
+ check: (ctx) => {
258
+ if (ctx.wordCount && ctx.wordCount > SEO_THRESHOLDS.content.minWordsAuthority && (ctx.faqCount ?? 0) < 3) {
259
+ return createResult({ id: 'content-faq-mandatory', name: 'FAQ Section', category: 'content', severity: 'info' }, 'warn', 'Consider adding a dedicated FAQ section for comprehensive content', {
260
+ recommendation: 'Include 3-7 common questions as H3s and optionally use Schema.org FAQPage markup for rich results.',
261
+ evidence: {
262
+ found: `${ctx.faqCount ?? 0} FAQ items detected`,
263
+ expected: 'At least 3-7 FAQ items for comprehensive content (1000+ words)',
264
+ impact: 'FAQ sections help with featured snippets, voice search, and AI overviews. They address common user questions directly.',
265
+ example: '<h2>Frequently Asked Questions</h2>\n<h3>What is X?</h3>\n<p>Answer...</p>\n<h3>How do I use Y?</h3>\n<p>Answer...</p>',
266
+ },
267
+ });
268
+ }
269
+ return createResult({ id: 'content-faq-mandatory', name: 'Mandatory FAQ Section', category: 'content', severity: 'info' }, 'info', 'Not applicable (page has FAQ or content is not comprehensive)', { recommendation: 'This rule checks for FAQ sections in comprehensive content' });
270
+ },
271
+ },
272
+ {
273
+ id: 'content-image-text-proportion',
274
+ name: 'Image-Text Proportion',
275
+ category: 'content',
276
+ severity: 'info',
277
+ description: 'Maintain a healthy image-to-text ratio for engaging content.',
278
+ check: (ctx) => {
279
+ if (ctx.wordCount === undefined || ctx.wordCount < 200 || ctx.totalImages === undefined || ctx.totalImages === 0) {
280
+ return createResult({ id: 'content-image-text-proportion', name: 'Image-Text Proportion', category: 'content', severity: 'info' }, 'info', 'Not applicable (insufficient content, word count, or no images)', { recommendation: 'This rule checks image-to-text ratio for substantial content with images' });
281
+ }
282
+ const { min: minWords, max: maxWords } = SEO_THRESHOLDS.content.imageWordRatio;
283
+ const actualWordsPerImage = ctx.wordCount / ctx.totalImages;
284
+ if (actualWordsPerImage > maxWords) {
285
+ return createResult({ id: 'content-image-text-proportion', name: 'Image-Text Proportion', category: 'content', severity: 'info' }, 'warn', `Low image density (1 image per ${actualWordsPerImage.toFixed(0)} words, ideal: 1 per ${minWords}-${maxWords})`, {
286
+ recommendation: 'Add more relevant images to break up text and improve engagement.',
287
+ evidence: {
288
+ found: `1 image per ${actualWordsPerImage.toFixed(0)} words (${ctx.totalImages} images for ${ctx.wordCount} words)`,
289
+ expected: `Ideal ratio: 1 image per ${minWords}-${maxWords} words`,
290
+ impact: 'Too few images makes content appear text-heavy and less engaging. Images help break up content and improve visual appeal.',
291
+ example: 'For a 1,000-word article, include 3-5 relevant images (screenshots, diagrams, photos) to maintain reader interest.',
292
+ },
293
+ });
294
+ }
295
+ if (actualWordsPerImage < minWords) {
296
+ return createResult({ id: 'content-image-text-proportion', name: 'Image-Text Proportion', category: 'content', severity: 'info' }, 'warn', `High image density (1 image per ${actualWordsPerImage.toFixed(0)} words, ideal: 1 per ${minWords}-${maxWords})`, {
297
+ recommendation: 'Ensure images are relevant and not excessive, as too many can be distracting.',
298
+ evidence: {
299
+ found: `1 image per ${actualWordsPerImage.toFixed(0)} words (${ctx.totalImages} images for ${ctx.wordCount} words)`,
300
+ expected: `Ideal ratio: 1 image per ${minWords}-${maxWords} words`,
301
+ impact: 'Too many images can slow page load, distract from content, and reduce text-to-HTML ratio. Quality over quantity.',
302
+ example: 'A 500-word article with 10 images is excessive. Aim for 2-3 high-quality, relevant images instead.',
303
+ },
304
+ });
305
+ }
306
+ return createResult({ id: 'content-image-text-proportion', name: 'Image-Text Proportion', category: 'content', severity: 'info' }, 'info', 'Not applicable (image-text proportion is balanced)', { recommendation: 'This rule checks if images are proportional to content' });
307
+ },
308
+ },
309
+ {
310
+ id: 'content-main-keyword-redundancy',
311
+ name: 'Main Keyword Redundancy',
312
+ category: 'content',
313
+ severity: 'info',
314
+ description: 'Avoid keyword stuffing in key areas (title, H1, first paragraph, image alt).',
315
+ check: (ctx) => {
316
+ if (!ctx.mainKeyword || !ctx.title || !ctx.h1Text || !ctx.metaDescription || !ctx.paragraphWordCounts || ctx.paragraphWordCounts.length === 0) {
317
+ return createResult({ id: 'content-main-keyword-redundancy', name: 'Main Keyword Redundancy', category: 'content', severity: 'info' }, 'info', 'Not applicable (insufficient data for keyword redundancy check)', { recommendation: 'This rule checks keyword stuffing when main keyword and content data are available' });
318
+ }
319
+ const keyword = ctx.mainKeyword.toLowerCase();
320
+ let redundancyCount = 0;
321
+ if (ctx.title.toLowerCase().includes(keyword))
322
+ redundancyCount++;
323
+ if (ctx.h1Text.toLowerCase().includes(keyword))
324
+ redundancyCount++;
325
+ if (redundancyCount > SEO_THRESHOLDS.content.redundancyTolerance) {
326
+ return createResult({ id: 'content-main-keyword-redundancy', name: 'Main Keyword Redundancy', category: 'content', severity: 'warning' }, 'warn', `Main keyword "${ctx.mainKeyword}" appears too often in key SEO elements.`, {
327
+ recommendation: 'Ensure natural language use of keywords. Avoid over-optimization (keyword stuffing) in title, H1, meta description, and alt texts.',
328
+ evidence: {
329
+ found: `Keyword appears in ${redundancyCount} of ${SEO_THRESHOLDS.content.redundancyTolerance} tracked elements`,
330
+ expected: `Keyword should appear naturally, not forced into every element`,
331
+ impact: 'Keyword stuffing across all SEO elements can trigger over-optimization penalties. Use synonyms and natural language.',
332
+ example: 'Instead of repeating "best dog food" everywhere, vary with "quality pet nutrition", "premium canine meals", etc.',
333
+ },
334
+ });
335
+ }
336
+ return createResult({ id: 'content-main-keyword-redundancy', name: 'Main Keyword Redundancy', category: 'content', severity: 'warning' }, 'info', 'Not applicable (keyword usage is natural)', { recommendation: 'This rule checks for excessive keyword repetition in key SEO elements' });
337
+ },
338
+ },
339
+ {
340
+ id: 'content-freshness',
341
+ name: 'Content Freshness',
342
+ category: 'content',
343
+ severity: 'info',
344
+ description: 'Content should have indicators of recency (dates).',
345
+ check: (ctx) => {
346
+ const hasDate = ctx.lastModified || ctx.ogArticlePublishedTime;
347
+ if (!hasDate) {
348
+ return createResult({ id: 'content-freshness', name: 'Content Freshness', category: 'content', severity: 'info' }, 'info', 'No content freshness information found (Last-Modified, og:updated_time)', { recommendation: 'Keep content updated and ensure dates are visible to crawlers (meta tags, sitemap, or headers).' });
349
+ }
350
+ return createResult({ id: 'content-freshness', name: 'Content Freshness', category: 'content', severity: 'info' }, 'pass', 'Content freshness signal found', { value: hasDate });
351
+ },
352
+ },
353
+ {
354
+ id: 'email-privacy',
355
+ name: 'Email Privacy',
356
+ category: 'content',
357
+ severity: 'warning',
358
+ description: 'Avoid plain text email addresses to prevent spam.',
359
+ check: (ctx) => {
360
+ if (ctx.emailsFound && ctx.emailsFound.length > 0) {
361
+ return createResult({ id: 'email-privacy', name: 'Email Privacy', category: 'content', severity: 'warning' }, 'warn', `${ctx.emailsFound.length} plain text email address(es) found`, {
362
+ value: ctx.emailsFound.length,
363
+ recommendation: 'Remove plain text emails. Use contact forms or obfuscation (e.g., "user [at] domain").',
364
+ evidence: {
365
+ found: ctx.emailsFound.slice(0, 3),
366
+ impact: 'Spam bots scrape plain text emails.'
367
+ }
368
+ });
369
+ }
370
+ return createResult({ id: 'email-privacy', name: 'Email Privacy', category: 'content', severity: 'info' }, 'pass', 'No plain text emails found');
371
+ },
372
+ },
373
+ {
374
+ id: 'keyword-in-url',
375
+ name: 'Keyword in URL',
376
+ category: 'content',
377
+ severity: 'info',
378
+ description: 'URLs should contain relevant keywords for better SEO signals.',
379
+ check: (ctx) => {
380
+ if (!ctx.topKeywords || ctx.topKeywords.length === 0) {
381
+ return createResult({ id: 'keyword-in-url', name: 'Keyword in URL', category: 'content', severity: 'info' }, 'info', 'Not applicable (no keyword data available)', { recommendation: 'This rule checks if keywords are present in URL when keyword data is available' });
382
+ }
383
+ if (ctx.keywordsInUrl === undefined) {
384
+ return createResult({ id: 'keyword-in-url', name: 'Keyword in URL', category: 'content', severity: 'info' }, 'info', 'Not applicable (keyword URL check not performed)', { recommendation: 'This rule checks URL for relevant keywords when data is available' });
385
+ }
386
+ const mainKeyword = ctx.mainKeyword || ctx.topKeywords[0];
387
+ if (!ctx.keywordsInUrl) {
388
+ return createResult({ id: 'keyword-in-url', name: 'Keyword in URL', category: 'content', severity: 'info' }, 'warn', 'Main keyword not found in URL', {
389
+ recommendation: `Include "${mainKeyword}" or related keywords in your URL slug for better SEO.`,
390
+ evidence: {
391
+ found: 'No keywords in URL path',
392
+ expected: 'URL slug should contain target keywords',
393
+ impact: 'URLs with keywords rank slightly better and are more descriptive to users.',
394
+ learnMore: 'https://developers.google.com/search/docs/crawling-indexing/url-structure',
395
+ },
396
+ });
397
+ }
398
+ return createResult({ id: 'keyword-in-url', name: 'Keyword in URL', category: 'content', severity: 'info' }, 'pass', 'URL contains relevant keywords');
399
+ },
400
+ },
401
+ {
402
+ id: 'keyword-consistency',
403
+ name: 'Keyword Consistency Score',
404
+ category: 'content',
405
+ severity: 'warning',
406
+ description: 'Main keyword should appear consistently across title, description, H1, URL, first paragraph, and image alt text.',
407
+ check: (ctx) => {
408
+ if (ctx.keywordConsistencyScore === undefined || !ctx.keywordConsistencyDetails) {
409
+ return createResult({ id: 'keyword-consistency', name: 'Keyword Consistency Score', category: 'content', severity: 'warning' }, 'info', 'Not applicable (keyword consistency data not available)', { recommendation: 'This rule checks keyword consistency when data is available' });
410
+ }
411
+ if (!ctx.mainKeyword) {
412
+ return createResult({ id: 'keyword-consistency', name: 'Keyword Consistency Score', category: 'content', severity: 'warning' }, 'info', 'Not applicable (no main keyword specified)', { recommendation: 'This rule checks keyword consistency when a main keyword is provided' });
413
+ }
414
+ const score = ctx.keywordConsistencyScore;
415
+ const details = ctx.keywordConsistencyDetails;
416
+ const maxScore = 6;
417
+ const missing = [];
418
+ if (!details.inTitle)
419
+ missing.push('title');
420
+ if (!details.inDescription)
421
+ missing.push('meta description');
422
+ if (!details.inH1)
423
+ missing.push('H1 heading');
424
+ if (!details.inUrl)
425
+ missing.push('URL');
426
+ if (!details.inFirstParagraph)
427
+ missing.push('first paragraph');
428
+ if (!details.inAltText)
429
+ missing.push('image alt text');
430
+ const present = [];
431
+ if (details.inTitle)
432
+ present.push('title');
433
+ if (details.inDescription)
434
+ present.push('meta description');
435
+ if (details.inH1)
436
+ present.push('H1');
437
+ if (details.inUrl)
438
+ present.push('URL');
439
+ if (details.inFirstParagraph)
440
+ present.push('first paragraph');
441
+ if (details.inAltText)
442
+ present.push('alt text');
443
+ if (score <= 2) {
444
+ return createResult({ id: 'keyword-consistency', name: 'Keyword Consistency', category: 'content', severity: 'warning' }, 'fail', `Low keyword consistency (${score}/${maxScore}): "${ctx.mainKeyword}" missing from most key locations`, {
445
+ value: score,
446
+ recommendation: `Add "${ctx.mainKeyword}" to: ${missing.join(', ')}`,
447
+ evidence: {
448
+ found: present.length > 0 ? `Found in: ${present.join(', ')}` : 'Not found in any key location',
449
+ expected: 'Keyword should appear in at least 4-5 of: title, description, H1, URL, first paragraph, image alt',
450
+ impact: 'Consistent keyword placement signals topical relevance to search engines.',
451
+ },
452
+ });
453
+ }
454
+ if (score <= 4) {
455
+ return createResult({ id: 'keyword-consistency', name: 'Keyword Consistency', category: 'content', severity: 'info' }, 'warn', `Moderate keyword consistency (${score}/${maxScore}): "${ctx.mainKeyword}" missing from some locations`, {
456
+ value: score,
457
+ recommendation: `Consider adding "${ctx.mainKeyword}" to: ${missing.join(', ')}`,
458
+ evidence: {
459
+ found: `Found in: ${present.join(', ')}`,
460
+ expected: 'Keyword should appear in at least 5-6 key locations for optimal SEO',
461
+ },
462
+ });
463
+ }
464
+ return createResult({ id: 'keyword-consistency', name: 'Keyword Consistency', category: 'content', severity: 'info' }, 'pass', `Excellent keyword consistency (${score}/${maxScore}): "${ctx.mainKeyword}" found in ${present.join(', ')}`, { value: score });
465
+ },
466
+ },
467
+ {
468
+ id: 'keyword-in-first-paragraph',
469
+ name: 'Keyword in First Paragraph',
470
+ category: 'content',
471
+ severity: 'info',
472
+ description: 'Including the main keyword in the first paragraph helps establish topic relevance.',
473
+ check: (ctx) => {
474
+ if (!ctx.topKeywords || ctx.topKeywords.length === 0) {
475
+ return createResult({ id: 'keyword-in-first-paragraph', name: 'Keyword in First Paragraph', category: 'content', severity: 'info' }, 'info', 'Not applicable (no keyword data available)', { recommendation: 'This rule checks first paragraph for keywords when keyword data is available' });
476
+ }
477
+ if (ctx.keywordsInFirstParagraph === undefined) {
478
+ return createResult({ id: 'keyword-in-first-paragraph', name: 'Keyword in First Paragraph', category: 'content', severity: 'info' }, 'info', 'Not applicable (first paragraph keyword check not performed)', { recommendation: 'This rule checks for keywords in first paragraph when data is available' });
479
+ }
480
+ const mainKeyword = ctx.mainKeyword || ctx.topKeywords[0];
481
+ if (!ctx.keywordsInFirstParagraph) {
482
+ return createResult({ id: 'keyword-in-first-paragraph', name: 'Keyword in First Paragraph', category: 'content', severity: 'info' }, 'warn', 'Main keyword not found in first paragraph', {
483
+ recommendation: `Include "${mainKeyword}" naturally in your opening paragraph to establish topic relevance.`,
484
+ evidence: {
485
+ expected: 'Main keyword should appear in the first 100 words',
486
+ impact: 'Early keyword placement signals the main topic to search engines and readers.',
487
+ },
488
+ });
489
+ }
490
+ return createResult({ id: 'keyword-in-first-paragraph', name: 'Keyword in First Paragraph', category: 'content', severity: 'info' }, 'pass', 'Keyword found in first paragraph');
491
+ },
492
+ },
493
+ {
494
+ id: 'keyword-in-image-alt',
495
+ name: 'Keyword in Image Alt Text',
496
+ category: 'content',
497
+ severity: 'info',
498
+ description: 'At least one image should have alt text containing the main keyword.',
499
+ check: (ctx) => {
500
+ if (!ctx.topKeywords || ctx.topKeywords.length === 0) {
501
+ return createResult({ id: 'keyword-in-image-alt', name: 'Keyword in Image Alt Text', category: 'content', severity: 'info' }, 'info', 'Not applicable (no keyword data available)', { recommendation: 'This rule checks image alt text for keywords when keyword data is available' });
502
+ }
503
+ if (ctx.keywordsInAltText === undefined) {
504
+ return createResult({ id: 'keyword-in-image-alt', name: 'Keyword in Image Alt Text', category: 'content', severity: 'info' }, 'info', 'Not applicable (alt text keyword check not performed)', { recommendation: 'This rule checks for keywords in image alt text when data is available' });
505
+ }
506
+ if (ctx.totalImages === 0) {
507
+ return createResult({ id: 'keyword-in-image-alt', name: 'Keyword in Image Alt Text', category: 'content', severity: 'info' }, 'info', 'Not applicable (no images on page)', { recommendation: 'This rule checks image alt text for keywords when images are present' });
508
+ }
509
+ const mainKeyword = ctx.mainKeyword || ctx.topKeywords[0];
510
+ if (!ctx.keywordsInAltText) {
511
+ return createResult({ id: 'keyword-in-image-alt', name: 'Keyword in Image Alt', category: 'content', severity: 'info' }, 'warn', 'Main keyword not found in any image alt text', {
512
+ recommendation: `Include "${mainKeyword}" naturally in at least one image alt attribute.`,
513
+ evidence: {
514
+ expected: 'At least one image alt should contain the target keyword',
515
+ impact: 'Image alt text contributes to keyword relevance and helps with image search rankings.',
516
+ },
517
+ });
518
+ }
519
+ return createResult({ id: 'keyword-in-image-alt', name: 'Keyword in Image Alt', category: 'content', severity: 'info' }, 'pass', 'Keyword found in image alt text');
520
+ },
521
+ },
522
+ ];
@@ -0,0 +1,2 @@
1
+ import { SeoRule } from './types.js';
2
+ export declare const crawlRules: SeoRule[];