recker 1.0.43 → 1.0.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -0
- package/dist/bin/recker-linux-x64 +0 -0
- package/dist/bin/recker-macos-x64 +0 -0
- package/dist/bin/recker-win-x64.exe +0 -0
- package/dist/bin/rek.cjs +85152 -100207
- package/dist/browser/ai/adaptive-timeout.d.ts +50 -0
- package/dist/browser/ai/adaptive-timeout.js +208 -0
- package/dist/browser/ai/client.d.ts +22 -0
- package/dist/browser/ai/client.js +294 -0
- package/dist/browser/ai/index.d.ts +14 -0
- package/dist/browser/ai/index.js +11 -0
- package/dist/browser/ai/providers/anthropic.d.ts +63 -0
- package/dist/browser/ai/providers/anthropic.js +370 -0
- package/dist/browser/ai/providers/base.d.ts +48 -0
- package/dist/browser/ai/providers/base.js +150 -0
- package/dist/browser/ai/providers/google.d.ts +59 -0
- package/dist/browser/ai/providers/google.js +305 -0
- package/dist/browser/ai/providers/ollama.d.ts +44 -0
- package/dist/browser/ai/providers/ollama.js +240 -0
- package/dist/browser/ai/providers/openai.d.ts +64 -0
- package/dist/browser/ai/providers/openai.js +298 -0
- package/dist/browser/ai/rate-limiter.d.ts +43 -0
- package/dist/browser/ai/rate-limiter.js +215 -0
- package/dist/browser/ai/vector/index.d.ts +2 -0
- package/dist/browser/ai/vector/index.js +2 -0
- package/dist/browser/ai/vector/similarity.d.ts +2 -0
- package/dist/browser/ai/vector/similarity.js +27 -0
- package/dist/browser/ai/vector/store.d.ts +27 -0
- package/dist/browser/ai/vector/store.js +82 -0
- package/dist/browser/browser/cache.d.ts +2 -40
- package/dist/browser/browser/cache.js +2 -199
- package/dist/browser/browser/index.d.ts +8 -0
- package/dist/browser/browser/index.js +8 -0
- package/dist/browser/browser/recker.d.ts +8 -1
- package/dist/browser/browser/recker.js +8 -2
- package/dist/browser/cache/indexed-db.d.ts +10 -0
- package/dist/browser/cache/indexed-db.js +88 -0
- package/dist/browser/cache/service-worker-cache.d.ts +18 -0
- package/dist/browser/cache/service-worker-cache.js +103 -0
- package/dist/browser/cache.d.ts +2 -40
- package/dist/browser/cache.js +2 -199
- package/dist/browser/constants/user-agents.d.ts +7 -0
- package/dist/browser/constants/user-agents.js +7 -0
- package/dist/browser/core/client.d.ts +2 -0
- package/dist/browser/core/client.js +19 -1
- package/dist/browser/index.d.ts +8 -0
- package/dist/browser/index.js +8 -0
- package/dist/browser/plugins/har-recorder.d.ts +40 -0
- package/dist/browser/plugins/har-recorder.js +120 -0
- package/dist/browser/plugins/network-simulation.d.ts +7 -0
- package/dist/browser/plugins/network-simulation.js +13 -0
- package/dist/browser/presets/android.d.ts +2 -0
- package/dist/browser/presets/android.js +16 -0
- package/dist/browser/presets/anthropic.d.ts +8 -0
- package/dist/browser/presets/anthropic.js +27 -0
- package/dist/browser/presets/aws.d.ts +19 -0
- package/dist/browser/presets/aws.js +68 -0
- package/dist/browser/presets/azure-openai.d.ts +10 -0
- package/dist/browser/presets/azure-openai.js +35 -0
- package/dist/browser/presets/azure.d.ts +41 -0
- package/dist/browser/presets/azure.js +104 -0
- package/dist/browser/presets/chaturbate.d.ts +2 -0
- package/dist/browser/presets/chaturbate.js +17 -0
- package/dist/browser/presets/cloudflare.d.ts +12 -0
- package/dist/browser/presets/cloudflare.js +39 -0
- package/dist/browser/presets/cohere.d.ts +7 -0
- package/dist/browser/presets/cohere.js +22 -0
- package/dist/browser/presets/deepseek.d.ts +7 -0
- package/dist/browser/presets/deepseek.js +22 -0
- package/dist/browser/presets/digitalocean.d.ts +5 -0
- package/dist/browser/presets/digitalocean.js +16 -0
- package/dist/browser/presets/discord.d.ts +6 -0
- package/dist/browser/presets/discord.js +17 -0
- package/dist/browser/presets/elevenlabs.d.ts +6 -0
- package/dist/browser/presets/elevenlabs.js +20 -0
- package/dist/browser/presets/enhancers.d.ts +20 -0
- package/dist/browser/presets/enhancers.js +85 -0
- package/dist/browser/presets/fireworks.d.ts +7 -0
- package/dist/browser/presets/fireworks.js +22 -0
- package/dist/browser/presets/gcp.d.ts +34 -0
- package/dist/browser/presets/gcp.js +91 -0
- package/dist/browser/presets/gemini.d.ts +7 -0
- package/dist/browser/presets/gemini.js +23 -0
- package/dist/browser/presets/github.d.ts +6 -0
- package/dist/browser/presets/github.js +17 -0
- package/dist/browser/presets/gitlab.d.ts +6 -0
- package/dist/browser/presets/gitlab.js +16 -0
- package/dist/browser/presets/groq.d.ts +7 -0
- package/dist/browser/presets/groq.js +22 -0
- package/dist/browser/presets/hubspot.d.ts +9 -0
- package/dist/browser/presets/hubspot.js +28 -0
- package/dist/browser/presets/huggingface.d.ts +7 -0
- package/dist/browser/presets/huggingface.js +23 -0
- package/dist/browser/presets/index.d.ts +47 -0
- package/dist/browser/presets/index.js +47 -0
- package/dist/browser/presets/ios.d.ts +2 -0
- package/dist/browser/presets/ios.js +13 -0
- package/dist/browser/presets/linear.d.ts +5 -0
- package/dist/browser/presets/linear.js +16 -0
- package/dist/browser/presets/mailgun.d.ts +7 -0
- package/dist/browser/presets/mailgun.js +20 -0
- package/dist/browser/presets/meta.d.ts +10 -0
- package/dist/browser/presets/meta.js +33 -0
- package/dist/browser/presets/mistral.d.ts +7 -0
- package/dist/browser/presets/mistral.js +22 -0
- package/dist/browser/presets/notion.d.ts +6 -0
- package/dist/browser/presets/notion.js +17 -0
- package/dist/browser/presets/openai.d.ts +9 -0
- package/dist/browser/presets/openai.js +30 -0
- package/dist/browser/presets/oracle.d.ts +19 -0
- package/dist/browser/presets/oracle.js +117 -0
- package/dist/browser/presets/perplexity.d.ts +7 -0
- package/dist/browser/presets/perplexity.js +22 -0
- package/dist/browser/presets/pinecone.d.ts +8 -0
- package/dist/browser/presets/pinecone.js +42 -0
- package/dist/browser/presets/registry.d.ts +23 -0
- package/dist/browser/presets/registry.js +519 -0
- package/dist/browser/presets/replicate.d.ts +7 -0
- package/dist/browser/presets/replicate.js +23 -0
- package/dist/browser/presets/sendgrid.d.ts +6 -0
- package/dist/browser/presets/sendgrid.js +20 -0
- package/dist/browser/presets/sentry.d.ts +11 -0
- package/dist/browser/presets/sentry.js +48 -0
- package/dist/browser/presets/sinch.d.ts +9 -0
- package/dist/browser/presets/sinch.js +39 -0
- package/dist/browser/presets/slack.d.ts +5 -0
- package/dist/browser/presets/slack.js +16 -0
- package/dist/browser/presets/square.d.ts +10 -0
- package/dist/browser/presets/square.js +33 -0
- package/dist/browser/presets/stripe.d.ts +7 -0
- package/dist/browser/presets/stripe.js +23 -0
- package/dist/browser/presets/supabase.d.ts +6 -0
- package/dist/browser/presets/supabase.js +18 -0
- package/dist/browser/presets/tiktok.d.ts +10 -0
- package/dist/browser/presets/tiktok.js +38 -0
- package/dist/browser/presets/together.d.ts +7 -0
- package/dist/browser/presets/together.js +22 -0
- package/dist/browser/presets/twilio.d.ts +6 -0
- package/dist/browser/presets/twilio.js +17 -0
- package/dist/browser/presets/vercel.d.ts +6 -0
- package/dist/browser/presets/vercel.js +23 -0
- package/dist/browser/presets/vultr.d.ts +5 -0
- package/dist/browser/presets/vultr.js +16 -0
- package/dist/browser/presets/xai.d.ts +8 -0
- package/dist/browser/presets/xai.js +23 -0
- package/dist/browser/presets/youtube.d.ts +5 -0
- package/dist/browser/presets/youtube.js +20 -0
- package/dist/browser/recker.d.ts +8 -1
- package/dist/browser/recker.js +8 -2
- package/dist/browser/scrape/document.d.ts +5 -4
- package/dist/browser/scrape/document.js +89 -76
- package/dist/browser/scrape/element.d.ts +10 -8
- package/dist/browser/scrape/element.js +295 -81
- package/dist/browser/scrape/extractors.d.ts +11 -11
- package/dist/browser/scrape/extractors.js +145 -113
- package/dist/browser/scrape/parser/back.d.ts +1 -0
- package/dist/browser/scrape/parser/back.js +3 -0
- package/dist/browser/scrape/parser/index.d.ts +20 -0
- package/dist/browser/scrape/parser/index.js +19 -0
- package/dist/browser/scrape/parser/matcher.d.ts +30 -0
- package/dist/browser/scrape/parser/matcher.js +99 -0
- package/dist/browser/scrape/parser/nodes/comment.d.ts +12 -0
- package/dist/browser/scrape/parser/nodes/comment.js +21 -0
- package/dist/browser/scrape/parser/nodes/html.d.ts +110 -0
- package/dist/browser/scrape/parser/nodes/html.js +978 -0
- package/dist/browser/scrape/parser/nodes/node.d.ts +18 -0
- package/dist/browser/scrape/parser/nodes/node.js +31 -0
- package/dist/browser/scrape/parser/nodes/text.d.ts +14 -0
- package/dist/browser/scrape/parser/nodes/text.js +30 -0
- package/dist/browser/scrape/parser/nodes/type.d.ts +6 -0
- package/dist/browser/scrape/parser/nodes/type.js +7 -0
- package/dist/browser/scrape/parser/parse.d.ts +1 -0
- package/dist/browser/scrape/parser/parse.js +1 -0
- package/dist/browser/scrape/parser/valid.d.ts +2 -0
- package/dist/browser/scrape/parser/valid.js +5 -0
- package/dist/browser/scrape/parser/void-tag.d.ts +7 -0
- package/dist/browser/scrape/parser/void-tag.js +43 -0
- package/dist/browser/scrape/types.d.ts +7 -0
- package/dist/browser/seo/analyzer.d.ts +59 -0
- package/dist/browser/seo/analyzer.js +1399 -0
- package/dist/browser/seo/keywords.d.ts +16 -0
- package/dist/browser/seo/keywords.js +55 -0
- package/dist/browser/seo/rules/accessibility.d.ts +2 -0
- package/dist/browser/seo/rules/accessibility.js +733 -0
- package/dist/browser/seo/rules/ai-search.d.ts +2 -0
- package/dist/browser/seo/rules/ai-search.js +436 -0
- package/dist/browser/seo/rules/analytics.d.ts +2 -0
- package/dist/browser/seo/rules/analytics.js +306 -0
- package/dist/browser/seo/rules/best-practices.d.ts +2 -0
- package/dist/browser/seo/rules/best-practices.js +195 -0
- package/dist/browser/seo/rules/canonical.d.ts +12 -0
- package/dist/browser/seo/rules/canonical.js +270 -0
- package/dist/browser/seo/rules/content.d.ts +2 -0
- package/dist/browser/seo/rules/content.js +522 -0
- package/dist/browser/seo/rules/crawl.d.ts +2 -0
- package/dist/browser/seo/rules/crawl.js +435 -0
- package/dist/browser/seo/rules/cwv.d.ts +2 -0
- package/dist/browser/seo/rules/cwv.js +248 -0
- package/dist/browser/seo/rules/ecommerce.d.ts +2 -0
- package/dist/browser/seo/rules/ecommerce.js +312 -0
- package/dist/browser/seo/rules/i18n.d.ts +2 -0
- package/dist/browser/seo/rules/i18n.js +288 -0
- package/dist/browser/seo/rules/images.d.ts +2 -0
- package/dist/browser/seo/rules/images.js +255 -0
- package/dist/browser/seo/rules/index.d.ts +52 -0
- package/dist/browser/seo/rules/index.js +159 -0
- package/dist/browser/seo/rules/internal-linking.d.ts +2 -0
- package/dist/browser/seo/rules/internal-linking.js +394 -0
- package/dist/browser/seo/rules/links.d.ts +2 -0
- package/dist/browser/seo/rules/links.js +498 -0
- package/dist/browser/seo/rules/local.d.ts +2 -0
- package/dist/browser/seo/rules/local.js +289 -0
- package/dist/browser/seo/rules/meta.d.ts +2 -0
- package/dist/browser/seo/rules/meta.js +805 -0
- package/dist/browser/seo/rules/mobile.d.ts +2 -0
- package/dist/browser/seo/rules/mobile.js +161 -0
- package/dist/browser/seo/rules/performance.d.ts +2 -0
- package/dist/browser/seo/rules/performance.js +738 -0
- package/dist/browser/seo/rules/pwa.d.ts +2 -0
- package/dist/browser/seo/rules/pwa.js +299 -0
- package/dist/browser/seo/rules/readability.d.ts +2 -0
- package/dist/browser/seo/rules/readability.js +264 -0
- package/dist/browser/seo/rules/redirects.d.ts +16 -0
- package/dist/browser/seo/rules/redirects.js +199 -0
- package/dist/browser/seo/rules/resources.d.ts +2 -0
- package/dist/browser/seo/rules/resources.js +390 -0
- package/dist/browser/seo/rules/schema.d.ts +2 -0
- package/dist/browser/seo/rules/schema.js +379 -0
- package/dist/browser/seo/rules/security.d.ts +2 -0
- package/dist/browser/seo/rules/security.js +877 -0
- package/dist/browser/seo/rules/social.d.ts +2 -0
- package/dist/browser/seo/rules/social.js +603 -0
- package/dist/browser/seo/rules/structural.d.ts +2 -0
- package/dist/browser/seo/rules/structural.js +223 -0
- package/dist/browser/seo/rules/technical-advanced.d.ts +10 -0
- package/dist/browser/seo/rules/technical-advanced.js +289 -0
- package/dist/browser/seo/rules/technical.d.ts +2 -0
- package/dist/browser/seo/rules/technical.js +480 -0
- package/dist/browser/seo/rules/thresholds.d.ts +196 -0
- package/dist/browser/seo/rules/thresholds.js +118 -0
- package/dist/browser/seo/rules/types.d.ts +498 -0
- package/dist/browser/seo/rules/types.js +11 -0
- package/dist/browser/seo/types.d.ts +211 -0
- package/dist/browser/seo/types.js +1 -0
- package/dist/browser/transport/curl.d.ts +4 -0
- package/dist/browser/transport/curl.js +101 -0
- package/dist/browser/transport/undici.js +1 -2
- package/dist/browser/transport/worker.d.ts +18 -0
- package/dist/browser/transport/worker.js +278 -0
- package/dist/browser/types/index.d.ts +4 -1
- package/dist/browser/utils/binary-manager.d.ts +4 -0
- package/dist/browser/utils/binary-manager.js +72 -0
- package/dist/browser/utils/user-agent.js +2 -13
- package/dist/cache/indexed-db.d.ts +10 -0
- package/dist/cache/indexed-db.js +88 -0
- package/dist/cache/service-worker-cache.d.ts +18 -0
- package/dist/cache/service-worker-cache.js +103 -0
- package/dist/cli/commands/ai.d.ts +2 -0
- package/dist/cli/commands/ai.js +162 -0
- package/dist/cli/commands/bench.d.ts +2 -0
- package/dist/cli/commands/bench.js +51 -0
- package/dist/cli/commands/dns.d.ts +2 -0
- package/dist/cli/commands/dns.js +295 -0
- package/dist/cli/commands/har.d.ts +2 -0
- package/dist/cli/commands/har.js +171 -0
- package/dist/cli/commands/hls.d.ts +2 -0
- package/dist/cli/commands/hls.js +192 -0
- package/dist/cli/commands/network.d.ts +2 -0
- package/dist/cli/commands/network.js +288 -0
- package/dist/cli/commands/protocols.d.ts +2 -0
- package/dist/cli/commands/protocols.js +344 -0
- package/dist/cli/commands/scrape.d.ts +2 -0
- package/dist/cli/commands/scrape.js +176 -0
- package/dist/cli/commands/security.d.ts +2 -0
- package/dist/cli/commands/security.js +57 -0
- package/dist/cli/commands/seo.d.ts +2 -0
- package/dist/cli/commands/seo.js +125 -0
- package/dist/cli/commands/serve.d.ts +2 -0
- package/dist/cli/commands/serve.js +531 -0
- package/dist/cli/commands/spider.d.ts +3 -0
- package/dist/cli/commands/spider.js +456 -0
- package/dist/cli/commands/utils.d.ts +2 -0
- package/dist/cli/commands/utils.js +176 -0
- package/dist/cli/commands/vector.d.ts +2 -0
- package/dist/cli/commands/vector.js +158 -0
- package/dist/cli/handler.d.ts +2 -2
- package/dist/cli/handler.js +6 -6
- package/dist/cli/helpers.d.ts +7 -0
- package/dist/cli/helpers.js +128 -0
- package/dist/cli/index.js +96 -5228
- package/dist/cli/parser/help.d.ts +2 -0
- package/dist/cli/parser/help.js +52 -0
- package/dist/cli/parser/index.d.ts +3 -0
- package/dist/cli/parser/index.js +3 -0
- package/dist/cli/parser/parser.d.ts +4 -0
- package/dist/cli/parser/parser.js +146 -0
- package/dist/cli/parser/types.d.ts +41 -0
- package/dist/cli/parser/types.js +1 -0
- package/dist/cli/presets.d.ts +1 -1
- package/dist/cli/presets.js +1 -1
- package/dist/cli/router.d.ts +36 -0
- package/dist/cli/router.js +195 -0
- package/dist/cli/tui/ai-chat.js +1 -1
- package/dist/cli/tui/commands/context.d.ts +9 -0
- package/dist/cli/tui/commands/context.js +1 -0
- package/dist/cli/tui/commands/dns.d.ts +10 -0
- package/dist/cli/tui/commands/dns.js +461 -0
- package/dist/cli/tui/commands/hls.d.ts +2 -0
- package/dist/cli/tui/commands/hls.js +162 -0
- package/dist/cli/tui/commands/ip.d.ts +2 -0
- package/dist/cli/tui/commands/ip.js +45 -0
- package/dist/cli/tui/commands/network.d.ts +3 -0
- package/dist/cli/tui/commands/network.js +81 -0
- package/dist/cli/tui/commands/protocols.d.ts +6 -0
- package/dist/cli/tui/commands/protocols.js +531 -0
- package/dist/cli/tui/commands/security.d.ts +2 -0
- package/dist/cli/tui/commands/security.js +48 -0
- package/dist/cli/tui/commands/seo.d.ts +2 -0
- package/dist/cli/tui/commands/seo.js +74 -0
- package/dist/cli/tui/context.d.ts +12 -0
- package/dist/cli/tui/context.js +1 -0
- package/dist/cli/tui/shell.d.ts +11 -20
- package/dist/cli/tui/shell.js +216 -1873
- package/dist/constants/user-agents.d.ts +7 -0
- package/dist/constants/user-agents.js +7 -0
- package/dist/core/client.d.ts +2 -0
- package/dist/core/client.js +19 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/mcp/cli.js +2 -3
- package/dist/mcp/data/embeddings.json +1 -1
- package/dist/mcp/tools/network.js +298 -158
- package/dist/plugins/har-player.d.ts +23 -0
- package/dist/plugins/har-player.js +49 -0
- package/dist/plugins/har-recorder.d.ts +37 -3
- package/dist/plugins/har-recorder.js +116 -63
- package/dist/plugins/network-simulation.d.ts +7 -0
- package/dist/plugins/network-simulation.js +13 -0
- package/dist/presets/android.d.ts +2 -0
- package/dist/presets/android.js +16 -0
- package/dist/presets/chaturbate.d.ts +2 -0
- package/dist/presets/chaturbate.js +17 -0
- package/dist/presets/elevenlabs.d.ts +6 -0
- package/dist/presets/elevenlabs.js +20 -0
- package/dist/presets/enhancers.d.ts +20 -0
- package/dist/presets/enhancers.js +85 -0
- package/dist/presets/hubspot.d.ts +9 -0
- package/dist/presets/hubspot.js +28 -0
- package/dist/presets/index.d.ts +10 -0
- package/dist/presets/index.js +10 -0
- package/dist/presets/ios.d.ts +2 -0
- package/dist/presets/ios.js +13 -0
- package/dist/presets/pinecone.d.ts +8 -0
- package/dist/presets/pinecone.js +42 -0
- package/dist/presets/registry.js +60 -0
- package/dist/presets/sendgrid.d.ts +6 -0
- package/dist/presets/sendgrid.js +20 -0
- package/dist/presets/sentry.d.ts +11 -0
- package/dist/presets/sentry.js +48 -0
- package/dist/presets/square.d.ts +10 -0
- package/dist/presets/square.js +33 -0
- package/dist/recker.d.ts +3 -0
- package/dist/recker.js +4 -0
- package/dist/scrape/document.d.ts +5 -4
- package/dist/scrape/document.js +89 -76
- package/dist/scrape/element.d.ts +10 -8
- package/dist/scrape/element.js +295 -81
- package/dist/scrape/extractors.d.ts +11 -11
- package/dist/scrape/extractors.js +145 -113
- package/dist/scrape/index.d.ts +2 -0
- package/dist/scrape/index.js +1 -0
- package/dist/scrape/parser/back.d.ts +1 -0
- package/dist/scrape/parser/back.js +3 -0
- package/dist/scrape/parser/index.d.ts +20 -0
- package/dist/scrape/parser/index.js +19 -0
- package/dist/scrape/parser/matcher.d.ts +30 -0
- package/dist/scrape/parser/matcher.js +99 -0
- package/dist/scrape/parser/nodes/comment.d.ts +12 -0
- package/dist/scrape/parser/nodes/comment.js +21 -0
- package/dist/scrape/parser/nodes/html.d.ts +110 -0
- package/dist/scrape/parser/nodes/html.js +978 -0
- package/dist/scrape/parser/nodes/node.d.ts +18 -0
- package/dist/scrape/parser/nodes/node.js +31 -0
- package/dist/scrape/parser/nodes/text.d.ts +14 -0
- package/dist/scrape/parser/nodes/text.js +30 -0
- package/dist/scrape/parser/nodes/type.d.ts +6 -0
- package/dist/scrape/parser/nodes/type.js +7 -0
- package/dist/scrape/parser/parse.d.ts +1 -0
- package/dist/scrape/parser/parse.js +1 -0
- package/dist/scrape/parser/valid.d.ts +2 -0
- package/dist/scrape/parser/valid.js +5 -0
- package/dist/scrape/parser/void-tag.d.ts +7 -0
- package/dist/scrape/parser/void-tag.js +43 -0
- package/dist/scrape/spider.d.ts +19 -0
- package/dist/scrape/spider.js +28 -3
- package/dist/scrape/types.d.ts +7 -0
- package/dist/seo/analyzer.d.ts +15 -5
- package/dist/seo/analyzer.js +636 -175
- package/dist/seo/formatter.d.ts +16 -0
- package/dist/seo/formatter.js +228 -0
- package/dist/seo/index.d.ts +2 -0
- package/dist/seo/index.js +1 -0
- package/dist/seo/keywords.d.ts +16 -0
- package/dist/seo/keywords.js +55 -0
- package/dist/seo/rules/accessibility.js +96 -57
- package/dist/seo/rules/ai-search.js +44 -31
- package/dist/seo/rules/analytics.d.ts +2 -0
- package/dist/seo/rules/analytics.js +306 -0
- package/dist/seo/rules/best-practices.js +21 -14
- package/dist/seo/rules/canonical.js +53 -32
- package/dist/seo/rules/content.js +317 -31
- package/dist/seo/rules/crawl.js +55 -40
- package/dist/seo/rules/cwv.js +21 -15
- package/dist/seo/rules/ecommerce.js +82 -22
- package/dist/seo/rules/i18n.js +75 -36
- package/dist/seo/rules/images.js +109 -30
- package/dist/seo/rules/index.js +2 -0
- package/dist/seo/rules/internal-linking.js +58 -39
- package/dist/seo/rules/links.js +79 -52
- package/dist/seo/rules/local.js +49 -25
- package/dist/seo/rules/meta.js +339 -81
- package/dist/seo/rules/mobile.js +112 -2
- package/dist/seo/rules/performance.js +434 -66
- package/dist/seo/rules/pwa.js +36 -39
- package/dist/seo/rules/readability.js +31 -22
- package/dist/seo/rules/redirects.js +21 -15
- package/dist/seo/rules/resources.js +59 -42
- package/dist/seo/rules/schema.js +333 -8
- package/dist/seo/rules/security.js +142 -80
- package/dist/seo/rules/social.js +277 -47
- package/dist/seo/rules/structural.js +87 -19
- package/dist/seo/rules/technical-advanced.js +30 -24
- package/dist/seo/rules/technical.js +243 -42
- package/dist/seo/rules/types.d.ts +53 -1
- package/dist/seo/seo-spider.d.ts +22 -0
- package/dist/seo/seo-spider.js +77 -13
- package/dist/seo/types.d.ts +8 -1
- package/dist/seo/validators/llms-txt.js +19 -0
- package/dist/seo/validators/rss.d.ts +11 -0
- package/dist/seo/validators/rss.js +93 -0
- package/dist/seo/validators/sitemap.js +36 -26
- package/dist/transport/curl.d.ts +4 -0
- package/dist/transport/curl.js +101 -0
- package/dist/transport/udp.js +0 -1
- package/dist/transport/undici.js +1 -2
- package/dist/transport/worker.d.ts +18 -0
- package/dist/transport/worker.js +278 -0
- package/dist/types/index.d.ts +4 -1
- package/dist/utils/binary-manager.d.ts +4 -0
- package/dist/utils/binary-manager.js +72 -0
- package/dist/utils/optional-require.d.ts +7 -8
- package/dist/utils/optional-require.js +2 -21
- package/dist/utils/upload.d.ts +6 -0
- package/dist/utils/upload.js +11 -0
- package/dist/utils/user-agent.js +2 -13
- package/dist/version.js +1 -1
- package/package.json +12 -6
- package/dist/browser/utils/optional-require.d.ts +0 -19
- package/dist/browser/utils/optional-require.js +0 -105
|
@@ -3,6 +3,23 @@ import type { ExtractedLink } from '../../scrape/types.js';
|
|
|
3
3
|
export type RuleSeverity = 'error' | 'warning' | 'info';
|
|
4
4
|
export type RuleCategory = 'title' | 'meta' | 'og' | 'twitter' | 'headings' | 'images' | 'links' | 'content' | 'technical' | 'security' | 'mobile' | 'structured-data' | 'performance' | 'accessibility' | 'ai-search' | 'resources' | 'crawlability' | 'canonicalization';
|
|
5
5
|
export interface RuleContext {
|
|
6
|
+
keywordsInTitle?: boolean;
|
|
7
|
+
keywordsInDescription?: boolean;
|
|
8
|
+
keywordsInH1?: boolean;
|
|
9
|
+
keywordsInUrl?: boolean;
|
|
10
|
+
keywordsInFirstParagraph?: boolean;
|
|
11
|
+
keywordsInAltText?: boolean;
|
|
12
|
+
keywordConsistencyScore?: number;
|
|
13
|
+
keywordConsistencyDetails?: {
|
|
14
|
+
inTitle: boolean;
|
|
15
|
+
inDescription: boolean;
|
|
16
|
+
inH1: boolean;
|
|
17
|
+
inUrl: boolean;
|
|
18
|
+
inFirstParagraph: boolean;
|
|
19
|
+
inAltText: boolean;
|
|
20
|
+
};
|
|
21
|
+
topKeywords?: string[];
|
|
22
|
+
mainKeyword?: string;
|
|
6
23
|
title?: string;
|
|
7
24
|
titleLength?: number;
|
|
8
25
|
metaDescription?: string;
|
|
@@ -34,9 +51,12 @@ export interface RuleContext {
|
|
|
34
51
|
imagesWithDimensions?: number;
|
|
35
52
|
imagesMissingDimensions?: number;
|
|
36
53
|
imagesWithEmptyAlt?: number;
|
|
54
|
+
imagesWithSrcset?: number;
|
|
55
|
+
largeBase64ImagesCount?: number;
|
|
37
56
|
imagesDecorativeCount?: number;
|
|
38
57
|
imagesUsingModernFormats?: number;
|
|
39
58
|
altTextLengths?: number[];
|
|
59
|
+
imageAltTexts?: string[];
|
|
40
60
|
imageFilenames?: string[];
|
|
41
61
|
imagesWithAsyncDecoding?: number;
|
|
42
62
|
brokenExternalImages?: number;
|
|
@@ -70,6 +90,8 @@ export interface RuleContext {
|
|
|
70
90
|
totalLinks?: number;
|
|
71
91
|
internalLinks?: number;
|
|
72
92
|
externalLinks?: number;
|
|
93
|
+
internalHttpLinks?: number;
|
|
94
|
+
internalHttpLinkUrls?: string[];
|
|
73
95
|
linksWithoutText?: number;
|
|
74
96
|
nofollowLinks?: number;
|
|
75
97
|
sponsoredLinks?: number;
|
|
@@ -89,6 +111,8 @@ export interface RuleContext {
|
|
|
89
111
|
missingNoreferrer?: ExtractedLink[];
|
|
90
112
|
};
|
|
91
113
|
wordCount?: number;
|
|
114
|
+
emailsFound?: string[];
|
|
115
|
+
socialLinksFound?: string[];
|
|
92
116
|
characterCount?: number;
|
|
93
117
|
sentenceCount?: number;
|
|
94
118
|
paragraphCount?: number;
|
|
@@ -102,7 +126,6 @@ export interface RuleContext {
|
|
|
102
126
|
avgSentenceLength?: number;
|
|
103
127
|
faqCount?: number;
|
|
104
128
|
imagePerWordRatio?: number;
|
|
105
|
-
mainKeyword?: string;
|
|
106
129
|
keywordDensity?: number;
|
|
107
130
|
fleschReadingEase?: number;
|
|
108
131
|
hasQuestionHeadings?: boolean;
|
|
@@ -120,6 +143,7 @@ export interface RuleContext {
|
|
|
120
143
|
hasBreadcrumbsSchema?: boolean;
|
|
121
144
|
videoCount?: number;
|
|
122
145
|
audioCount?: number;
|
|
146
|
+
hasAutoplay?: boolean;
|
|
123
147
|
hasCanonical?: boolean;
|
|
124
148
|
canonicalUrl?: string;
|
|
125
149
|
hasViewport?: boolean;
|
|
@@ -134,7 +158,10 @@ export interface RuleContext {
|
|
|
134
158
|
textHtmlRatio?: number;
|
|
135
159
|
hasDeprecatedPlugins?: boolean;
|
|
136
160
|
deprecatedPluginTypes?: string[];
|
|
161
|
+
deprecatedTagsCount?: number;
|
|
162
|
+
deprecatedTagsFound?: string[];
|
|
137
163
|
hasFrameTags?: boolean;
|
|
164
|
+
iframeCount?: number;
|
|
138
165
|
hasFavicon?: boolean;
|
|
139
166
|
faviconUrl?: string;
|
|
140
167
|
hasPreconnect?: boolean;
|
|
@@ -321,6 +348,21 @@ export interface RuleContext {
|
|
|
321
348
|
pinterestRichPinSupport?: boolean;
|
|
322
349
|
hasPinterestNopin?: boolean;
|
|
323
350
|
fbAppId?: string;
|
|
351
|
+
totalSocialLinks?: number;
|
|
352
|
+
socialLinksInHeader?: number;
|
|
353
|
+
socialLinksInFooter?: number;
|
|
354
|
+
socialLinksWithoutAccessibility?: number;
|
|
355
|
+
socialLinksWithoutNewTab?: number;
|
|
356
|
+
socialLinksWithoutNoopener?: number;
|
|
357
|
+
platformsFound?: string[];
|
|
358
|
+
socialLinkDetails?: Array<{
|
|
359
|
+
href: string;
|
|
360
|
+
platform: string;
|
|
361
|
+
hasAccessibility: boolean;
|
|
362
|
+
hasNewTab: boolean;
|
|
363
|
+
hasNoopener: boolean;
|
|
364
|
+
location: 'header' | 'footer' | 'body';
|
|
365
|
+
}>;
|
|
324
366
|
navLinkCount?: number;
|
|
325
367
|
footerLinkCount?: number;
|
|
326
368
|
contextualLinkCount?: number;
|
|
@@ -408,6 +450,16 @@ export interface RuleContext {
|
|
|
408
450
|
tlsVersion?: string;
|
|
409
451
|
hasPasswordField?: boolean;
|
|
410
452
|
formsOnHttp?: number;
|
|
453
|
+
analyticsDetected?: boolean;
|
|
454
|
+
analyticsProviders?: string[];
|
|
455
|
+
hasRssFeed?: boolean;
|
|
456
|
+
rssFeedUrl?: string;
|
|
457
|
+
hasAtomFeed?: boolean;
|
|
458
|
+
atomFeedUrl?: string;
|
|
459
|
+
ctaButtonsCount?: number;
|
|
460
|
+
formCount?: number;
|
|
461
|
+
hasWhatsAppLink?: boolean;
|
|
462
|
+
pageInSitemap?: boolean;
|
|
411
463
|
}
|
|
412
464
|
export interface RuleEvidence {
|
|
413
465
|
found?: string | number | string[];
|
package/dist/seo/seo-spider.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { SpiderOptions, SpiderResult, SpiderPageResult } from '../scrape/spider.js';
|
|
2
2
|
import type { SeoReport } from './types.js';
|
|
3
|
+
import { type SitemapValidationResult } from './validators/sitemap.js';
|
|
3
4
|
export interface SeoSpiderOptions extends SpiderOptions {
|
|
4
5
|
seo?: boolean;
|
|
5
6
|
output?: string;
|
|
@@ -20,6 +21,25 @@ export interface SiteWideIssue {
|
|
|
20
21
|
export interface SeoSpiderResult extends Omit<SpiderResult, 'pages'> {
|
|
21
22
|
pages: SeoPageResult[];
|
|
22
23
|
siteWideIssues: SiteWideIssue[];
|
|
24
|
+
txtFiles?: {
|
|
25
|
+
humans: {
|
|
26
|
+
found: boolean;
|
|
27
|
+
content?: string;
|
|
28
|
+
url: string;
|
|
29
|
+
};
|
|
30
|
+
llms: {
|
|
31
|
+
found: boolean;
|
|
32
|
+
content?: string;
|
|
33
|
+
url: string;
|
|
34
|
+
};
|
|
35
|
+
};
|
|
36
|
+
rssFeeds?: Array<{
|
|
37
|
+
url: string;
|
|
38
|
+
type: 'rss' | 'atom' | 'unknown';
|
|
39
|
+
title?: string;
|
|
40
|
+
itemCount: number;
|
|
41
|
+
}>;
|
|
42
|
+
sitemapValidation?: SitemapValidationResult;
|
|
23
43
|
summary: {
|
|
24
44
|
totalPages: number;
|
|
25
45
|
pagesWithErrors: number;
|
|
@@ -37,6 +57,8 @@ export declare class SeoSpider {
|
|
|
37
57
|
private seoResults;
|
|
38
58
|
constructor(options?: SeoSpiderOptions);
|
|
39
59
|
crawl(startUrl: string): Promise<SeoSpiderResult>;
|
|
60
|
+
private checkTextFiles;
|
|
61
|
+
private validateSitemap;
|
|
40
62
|
private analyzePages;
|
|
41
63
|
private createReportFromPageData;
|
|
42
64
|
private detectSiteWideIssues;
|
package/dist/seo/seo-spider.js
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { Spider } from '../scrape/spider.js';
|
|
2
2
|
import { analyzeSeo } from './analyzer.js';
|
|
3
3
|
import { createClient } from '../core/client.js';
|
|
4
|
+
import { discoverFeeds } from './validators/rss.js';
|
|
5
|
+
import { fetchAndValidateSitemap } from './validators/sitemap.js';
|
|
4
6
|
import * as fs from 'fs/promises';
|
|
5
7
|
export class SeoSpider {
|
|
6
8
|
spider;
|
|
@@ -32,10 +34,23 @@ export class SeoSpider {
|
|
|
32
34
|
const seoPages = await this.analyzePages(result.pages);
|
|
33
35
|
const siteWideIssues = this.detectSiteWideIssues(seoPages);
|
|
34
36
|
const summary = this.calculateSummary(seoPages, siteWideIssues);
|
|
37
|
+
const txtFiles = await this.checkTextFiles(startUrl);
|
|
38
|
+
let homeHtml = '';
|
|
39
|
+
try {
|
|
40
|
+
const client = createClient({ timeout: 10000 });
|
|
41
|
+
const res = await client.get(startUrl);
|
|
42
|
+
homeHtml = await res.text();
|
|
43
|
+
}
|
|
44
|
+
catch { }
|
|
45
|
+
const rssFeeds = await discoverFeeds(new URL(startUrl).origin, homeHtml);
|
|
46
|
+
const sitemapValidation = await this.validateSitemap(startUrl);
|
|
35
47
|
const seoResult = {
|
|
36
48
|
...result,
|
|
37
49
|
pages: seoPages,
|
|
38
50
|
siteWideIssues,
|
|
51
|
+
txtFiles,
|
|
52
|
+
rssFeeds,
|
|
53
|
+
sitemapValidation,
|
|
39
54
|
summary,
|
|
40
55
|
};
|
|
41
56
|
if (this.options.output) {
|
|
@@ -43,6 +58,57 @@ export class SeoSpider {
|
|
|
43
58
|
}
|
|
44
59
|
return seoResult;
|
|
45
60
|
}
|
|
61
|
+
async checkTextFiles(startUrl) {
|
|
62
|
+
try {
|
|
63
|
+
const baseUrl = new URL(startUrl).origin;
|
|
64
|
+
const client = createClient({ timeout: 5000 });
|
|
65
|
+
const results = {
|
|
66
|
+
humans: { found: false, content: undefined, url: `${baseUrl}/humans.txt` },
|
|
67
|
+
llms: { found: false, content: undefined, url: `${baseUrl}/llms.txt` },
|
|
68
|
+
};
|
|
69
|
+
try {
|
|
70
|
+
const res = await client.get(results.humans.url);
|
|
71
|
+
if (res.status === 200) {
|
|
72
|
+
results.humans.found = true;
|
|
73
|
+
results.humans.content = await res.text();
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
catch { }
|
|
77
|
+
try {
|
|
78
|
+
const res = await client.get(results.llms.url);
|
|
79
|
+
if (res.status === 200) {
|
|
80
|
+
results.llms.found = true;
|
|
81
|
+
results.llms.content = await res.text();
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
catch { }
|
|
85
|
+
return results;
|
|
86
|
+
}
|
|
87
|
+
catch {
|
|
88
|
+
return undefined;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
async validateSitemap(startUrl) {
|
|
92
|
+
try {
|
|
93
|
+
const baseUrl = new URL(startUrl).origin;
|
|
94
|
+
const sitemapUrl = `${baseUrl}/sitemap.xml`;
|
|
95
|
+
const client = createClient({ timeout: this.options.timeout || 15000 });
|
|
96
|
+
const fetcher = async (url) => {
|
|
97
|
+
const res = await client.get(url);
|
|
98
|
+
const text = await res.text();
|
|
99
|
+
return {
|
|
100
|
+
status: res.status,
|
|
101
|
+
text,
|
|
102
|
+
headers: Object.fromEntries([...res.headers.entries()]),
|
|
103
|
+
};
|
|
104
|
+
};
|
|
105
|
+
const result = await fetchAndValidateSitemap(sitemapUrl, fetcher);
|
|
106
|
+
return result;
|
|
107
|
+
}
|
|
108
|
+
catch {
|
|
109
|
+
return undefined;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
46
112
|
async analyzePages(pages) {
|
|
47
113
|
const results = [];
|
|
48
114
|
const client = createClient({
|
|
@@ -93,8 +159,9 @@ export class SeoSpider {
|
|
|
93
159
|
if (titleLength < 30) {
|
|
94
160
|
checks.push({
|
|
95
161
|
name: 'Title Length',
|
|
162
|
+
category: 'title',
|
|
96
163
|
status: 'warn',
|
|
97
|
-
message: `Title is
|
|
164
|
+
message: `Title is ${titleLength} characters`,
|
|
98
165
|
value: titleLength,
|
|
99
166
|
recommendation: 'Title should be 50-60 characters',
|
|
100
167
|
});
|
|
@@ -102,6 +169,7 @@ export class SeoSpider {
|
|
|
102
169
|
else if (titleLength > 60) {
|
|
103
170
|
checks.push({
|
|
104
171
|
name: 'Title Length',
|
|
172
|
+
category: 'title',
|
|
105
173
|
status: 'warn',
|
|
106
174
|
message: `Title is too long (${titleLength} chars)`,
|
|
107
175
|
value: titleLength,
|
|
@@ -111,6 +179,7 @@ export class SeoSpider {
|
|
|
111
179
|
else {
|
|
112
180
|
checks.push({
|
|
113
181
|
name: 'Title Length',
|
|
182
|
+
category: 'title',
|
|
114
183
|
status: 'pass',
|
|
115
184
|
message: `Good title length (${titleLength} chars)`,
|
|
116
185
|
value: titleLength,
|
|
@@ -120,6 +189,7 @@ export class SeoSpider {
|
|
|
120
189
|
else {
|
|
121
190
|
checks.push({
|
|
122
191
|
name: 'Title',
|
|
192
|
+
category: 'title',
|
|
123
193
|
status: 'fail',
|
|
124
194
|
message: 'Page has no title',
|
|
125
195
|
recommendation: 'Add a descriptive <title> tag',
|
|
@@ -130,6 +200,7 @@ export class SeoSpider {
|
|
|
130
200
|
if (internalLinks === 0) {
|
|
131
201
|
checks.push({
|
|
132
202
|
name: 'Internal Links',
|
|
203
|
+
category: 'links',
|
|
133
204
|
status: 'warn',
|
|
134
205
|
message: 'No internal links found',
|
|
135
206
|
recommendation: 'Add internal links to improve site structure',
|
|
@@ -138,6 +209,7 @@ export class SeoSpider {
|
|
|
138
209
|
else {
|
|
139
210
|
checks.push({
|
|
140
211
|
name: 'Internal Links',
|
|
212
|
+
category: 'links',
|
|
141
213
|
status: 'pass',
|
|
142
214
|
message: `${internalLinks} internal links found`,
|
|
143
215
|
value: internalLinks,
|
|
@@ -232,27 +304,19 @@ export class SeoSpider {
|
|
|
232
304
|
missingDimensions: 0,
|
|
233
305
|
modernFormats: 0,
|
|
234
306
|
altTextLengths: [],
|
|
307
|
+
imageAltTexts: [],
|
|
235
308
|
imageFilenames: [],
|
|
236
309
|
imagesWithAsyncDecoding: 0,
|
|
237
310
|
},
|
|
238
311
|
social: {
|
|
239
312
|
openGraph: {
|
|
240
|
-
present: false,
|
|
241
|
-
hasTitle: false,
|
|
242
|
-
hasDescription: false,
|
|
243
|
-
hasImage: false,
|
|
244
|
-
hasUrl: false,
|
|
245
|
-
issues: [],
|
|
313
|
+
present: false, hasTitle: false, hasDescription: false, hasImage: false, hasUrl: false, issues: []
|
|
246
314
|
},
|
|
247
315
|
twitterCard: {
|
|
248
|
-
present: false,
|
|
249
|
-
hasCard: false,
|
|
250
|
-
hasTitle: false,
|
|
251
|
-
hasDescription: false,
|
|
252
|
-
hasImage: false,
|
|
253
|
-
issues: [],
|
|
316
|
+
present: false, hasCard: false, hasTitle: false, hasDescription: false, hasImage: false, issues: []
|
|
254
317
|
},
|
|
255
318
|
},
|
|
319
|
+
keywords: { totalWords: 0, uniqueWords: 0, topKeywords: [] },
|
|
256
320
|
technical: {
|
|
257
321
|
hasCanonical: false,
|
|
258
322
|
hasRobotsMeta: false,
|
package/dist/seo/types.d.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { KeywordCloud } from './keywords.js';
|
|
2
|
+
export type { KeywordCloud, KeywordItem } from './keywords.js';
|
|
1
3
|
export type SeoStatus = 'pass' | 'warn' | 'fail' | 'info';
|
|
2
4
|
export interface SeoCheckEvidence {
|
|
3
5
|
found?: string | number | string[];
|
|
@@ -10,6 +12,7 @@ export interface SeoCheckEvidence {
|
|
|
10
12
|
}
|
|
11
13
|
export interface SeoCheckResult {
|
|
12
14
|
name: string;
|
|
15
|
+
category: string;
|
|
13
16
|
status: SeoStatus;
|
|
14
17
|
message: string;
|
|
15
18
|
value?: string | number;
|
|
@@ -50,6 +53,8 @@ export interface LinkAnalysis {
|
|
|
50
53
|
withoutText: number;
|
|
51
54
|
sponsoredLinks: number;
|
|
52
55
|
ugcLinks: number;
|
|
56
|
+
internalHttpLinks?: number;
|
|
57
|
+
internalHttpLinkUrls?: string[];
|
|
53
58
|
}
|
|
54
59
|
export interface ImageAnalysis {
|
|
55
60
|
total: number;
|
|
@@ -59,6 +64,7 @@ export interface ImageAnalysis {
|
|
|
59
64
|
missingDimensions: number;
|
|
60
65
|
modernFormats: number;
|
|
61
66
|
altTextLengths: number[];
|
|
67
|
+
imageAltTexts: string[];
|
|
62
68
|
imageFilenames: string[];
|
|
63
69
|
imagesWithAsyncDecoding: number;
|
|
64
70
|
}
|
|
@@ -172,8 +178,9 @@ export interface SeoReport {
|
|
|
172
178
|
types: string[];
|
|
173
179
|
items: Record<string, unknown>[];
|
|
174
180
|
};
|
|
175
|
-
headings: HeadingAnalysis;
|
|
176
181
|
content: ContentMetrics;
|
|
182
|
+
headings: HeadingAnalysis;
|
|
183
|
+
keywords: KeywordCloud;
|
|
177
184
|
links: LinkAnalysis;
|
|
178
185
|
images: ImageAnalysis;
|
|
179
186
|
social: SocialMetaAnalysis;
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
const MAX_FILE_SIZE = 100 * 1024;
|
|
2
2
|
const MIN_DESCRIPTION_LENGTH = 50;
|
|
3
3
|
const MAX_DESCRIPTION_LENGTH = 500;
|
|
4
|
+
const OPTIMAL_MIN_LINKS = 10;
|
|
5
|
+
const OPTIMAL_MAX_LINKS = 30;
|
|
4
6
|
export function parseLlmsTxt(content) {
|
|
5
7
|
const errors = [];
|
|
6
8
|
const warnings = [];
|
|
@@ -181,6 +183,23 @@ export function validateLlmsTxt(content, baseUrl) {
|
|
|
181
183
|
seenUrls.add(normalized);
|
|
182
184
|
}
|
|
183
185
|
}
|
|
186
|
+
const linkCount = parseResult.links.length;
|
|
187
|
+
if (linkCount > 0 && linkCount < OPTIMAL_MIN_LINKS) {
|
|
188
|
+
issues.push({
|
|
189
|
+
type: 'info',
|
|
190
|
+
code: 'FEW_LINKS',
|
|
191
|
+
message: `Only ${linkCount} link(s) found in llms.txt`,
|
|
192
|
+
recommendation: `Consider adding ${OPTIMAL_MIN_LINKS}-${OPTIMAL_MAX_LINKS} of your most valuable pages for better AI coverage`,
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
else if (linkCount > OPTIMAL_MAX_LINKS) {
|
|
196
|
+
issues.push({
|
|
197
|
+
type: 'info',
|
|
198
|
+
code: 'MANY_LINKS',
|
|
199
|
+
message: `${linkCount} links found in llms.txt`,
|
|
200
|
+
recommendation: `Focus on quality over quantity. ${OPTIMAL_MIN_LINKS}-${OPTIMAL_MAX_LINKS} high-value links are recommended to help AI systems identify your truly important content`,
|
|
201
|
+
});
|
|
202
|
+
}
|
|
184
203
|
return {
|
|
185
204
|
valid: issues.filter(i => i.type === 'error').length === 0,
|
|
186
205
|
issues,
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export interface RssFeed {
|
|
2
|
+
url: string;
|
|
3
|
+
type: 'rss' | 'atom' | 'unknown';
|
|
4
|
+
title?: string;
|
|
5
|
+
description?: string;
|
|
6
|
+
itemCount: number;
|
|
7
|
+
lastBuildDate?: string;
|
|
8
|
+
isValid: boolean;
|
|
9
|
+
error?: string;
|
|
10
|
+
}
|
|
11
|
+
export declare function discoverFeeds(baseUrl: string, html?: string): Promise<RssFeed[]>;
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { createClient } from '../../core/client.js';
|
|
2
|
+
const COMMON_PATHS = [
|
|
3
|
+
'/rss.xml',
|
|
4
|
+
'/feed.xml',
|
|
5
|
+
'/rss',
|
|
6
|
+
'/feed',
|
|
7
|
+
'/atom.xml',
|
|
8
|
+
'/feeds/posts/default',
|
|
9
|
+
'/index.xml'
|
|
10
|
+
];
|
|
11
|
+
export async function discoverFeeds(baseUrl, html) {
|
|
12
|
+
const candidateUrls = new Set();
|
|
13
|
+
const feeds = [];
|
|
14
|
+
if (html) {
|
|
15
|
+
const linkRegex = /<link[^>]+?type=["']application\/(rss\+xml|atom\+xml)["'][^>]*?>/gi;
|
|
16
|
+
const hrefRegex = /href=["']([^"']+)["']/;
|
|
17
|
+
const titleRegex = /title=["']([^"']+)["']/;
|
|
18
|
+
let match;
|
|
19
|
+
while ((match = linkRegex.exec(html)) !== null) {
|
|
20
|
+
const tag = match[0];
|
|
21
|
+
const hrefMatch = hrefRegex.exec(tag);
|
|
22
|
+
if (hrefMatch) {
|
|
23
|
+
let href = hrefMatch[1];
|
|
24
|
+
try {
|
|
25
|
+
href = new URL(href, baseUrl).toString();
|
|
26
|
+
candidateUrls.add(href);
|
|
27
|
+
}
|
|
28
|
+
catch { }
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
if (candidateUrls.size === 0) {
|
|
33
|
+
for (const path of COMMON_PATHS) {
|
|
34
|
+
try {
|
|
35
|
+
const url = new URL(path, baseUrl).toString();
|
|
36
|
+
candidateUrls.add(url);
|
|
37
|
+
}
|
|
38
|
+
catch { }
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
const client = createClient({ timeout: 8000 });
|
|
42
|
+
await Promise.all(Array.from(candidateUrls).map(async (url) => {
|
|
43
|
+
try {
|
|
44
|
+
const response = await client.get(url);
|
|
45
|
+
if (response.status !== 200)
|
|
46
|
+
return;
|
|
47
|
+
const contentType = response.headers.get('content-type') || '';
|
|
48
|
+
const text = await response.text();
|
|
49
|
+
let type = 'unknown';
|
|
50
|
+
let isValid = false;
|
|
51
|
+
let itemCount = 0;
|
|
52
|
+
let title;
|
|
53
|
+
let description;
|
|
54
|
+
let lastBuildDate;
|
|
55
|
+
if (text.includes('<rss') && text.includes('version="2.0"')) {
|
|
56
|
+
type = 'rss';
|
|
57
|
+
isValid = true;
|
|
58
|
+
itemCount = (text.match(/<item>/g) || []).length;
|
|
59
|
+
const titleMatch = text.match(/<channel>[\s\S]*?<title>(.*?)<\/title>/);
|
|
60
|
+
if (titleMatch)
|
|
61
|
+
title = titleMatch[1].replace(/<!\[CDATA\[(.*?)\]\]>/g, '$1').trim();
|
|
62
|
+
const descMatch = text.match(/<channel>[\s\S]*?<description>(.*?)<\/description>/);
|
|
63
|
+
if (descMatch)
|
|
64
|
+
description = descMatch[1].replace(/<!\[CDATA\[(.*?)\]\]>/g, '$1').trim();
|
|
65
|
+
const dateMatch = text.match(/<lastBuildDate>(.*?)<\/lastBuildDate>/);
|
|
66
|
+
if (dateMatch)
|
|
67
|
+
lastBuildDate = dateMatch[1];
|
|
68
|
+
}
|
|
69
|
+
else if (text.includes('<feed') && text.includes('xmlns="http://www.w3.org/2005/Atom"')) {
|
|
70
|
+
type = 'atom';
|
|
71
|
+
isValid = true;
|
|
72
|
+
itemCount = (text.match(/<entry>/g) || []).length;
|
|
73
|
+
const titleMatch = text.match(/<title>(.*?)<\/title>/);
|
|
74
|
+
if (titleMatch)
|
|
75
|
+
title = titleMatch[1].trim();
|
|
76
|
+
}
|
|
77
|
+
if (isValid) {
|
|
78
|
+
feeds.push({
|
|
79
|
+
url,
|
|
80
|
+
type,
|
|
81
|
+
isValid,
|
|
82
|
+
title,
|
|
83
|
+
description,
|
|
84
|
+
itemCount,
|
|
85
|
+
lastBuildDate
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
catch (err) {
|
|
90
|
+
}
|
|
91
|
+
}));
|
|
92
|
+
return feeds;
|
|
93
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { parse } from '../../scrape/parser/index.js';
|
|
2
2
|
const VALID_CHANGEFREQ = ['always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never'];
|
|
3
3
|
const MAX_URLS_PER_SITEMAP = 50000;
|
|
4
4
|
const MAX_SITEMAP_SIZE = 50 * 1024 * 1024;
|
|
@@ -9,12 +9,12 @@ export function parseSitemap(content, compressed = false) {
|
|
|
9
9
|
const sitemaps = [];
|
|
10
10
|
let type = 'unknown';
|
|
11
11
|
try {
|
|
12
|
-
const
|
|
13
|
-
if (
|
|
12
|
+
const root = parse(content, { lowerCaseTagName: false });
|
|
13
|
+
if (root.querySelectorAll('urlset').length > 0) {
|
|
14
14
|
type = 'urlset';
|
|
15
|
-
|
|
16
|
-
const
|
|
17
|
-
const loc =
|
|
15
|
+
root.querySelectorAll('url').forEach((urlElem) => {
|
|
16
|
+
const locElem = urlElem.querySelector('loc');
|
|
17
|
+
const loc = locElem ? locElem.text.trim() : '';
|
|
18
18
|
if (!loc) {
|
|
19
19
|
errors.push('URL entry missing <loc> element');
|
|
20
20
|
return;
|
|
@@ -27,7 +27,8 @@ export function parseSitemap(content, compressed = false) {
|
|
|
27
27
|
return;
|
|
28
28
|
}
|
|
29
29
|
const url = { loc };
|
|
30
|
-
const
|
|
30
|
+
const lastmodElem = urlElem.querySelector('lastmod');
|
|
31
|
+
const lastmod = lastmodElem ? lastmodElem.text.trim() : '';
|
|
31
32
|
if (lastmod) {
|
|
32
33
|
if (isValidDate(lastmod)) {
|
|
33
34
|
url.lastmod = lastmod;
|
|
@@ -36,7 +37,8 @@ export function parseSitemap(content, compressed = false) {
|
|
|
36
37
|
warnings.push(`Invalid lastmod date for ${loc}: ${lastmod}`);
|
|
37
38
|
}
|
|
38
39
|
}
|
|
39
|
-
const
|
|
40
|
+
const changefreqElem = urlElem.querySelector('changefreq');
|
|
41
|
+
const changefreq = changefreqElem ? changefreqElem.text.trim().toLowerCase() : '';
|
|
40
42
|
if (changefreq) {
|
|
41
43
|
if (VALID_CHANGEFREQ.includes(changefreq)) {
|
|
42
44
|
url.changefreq = changefreq;
|
|
@@ -45,7 +47,8 @@ export function parseSitemap(content, compressed = false) {
|
|
|
45
47
|
warnings.push(`Invalid changefreq for ${loc}: ${changefreq}`);
|
|
46
48
|
}
|
|
47
49
|
}
|
|
48
|
-
const
|
|
50
|
+
const priorityElem = urlElem.querySelector('priority');
|
|
51
|
+
const priority = priorityElem ? priorityElem.text.trim() : '';
|
|
49
52
|
if (priority) {
|
|
50
53
|
const p = parseFloat(priority);
|
|
51
54
|
if (isNaN(p) || p < 0 || p > 1) {
|
|
@@ -56,14 +59,17 @@ export function parseSitemap(content, compressed = false) {
|
|
|
56
59
|
}
|
|
57
60
|
}
|
|
58
61
|
const images = [];
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
const
|
|
62
|
+
const imageElems = urlElem.querySelectorAll('image\:image, image');
|
|
63
|
+
imageElems.forEach((imgElem) => {
|
|
64
|
+
const locEl = imgElem.querySelector('image\:loc, loc');
|
|
65
|
+
const imgLoc = locEl ? locEl.text.trim() : '';
|
|
62
66
|
if (imgLoc) {
|
|
67
|
+
const captionEl = imgElem.querySelector('image\:caption, caption');
|
|
68
|
+
const titleEl = imgElem.querySelector('image\:title, title');
|
|
63
69
|
images.push({
|
|
64
70
|
loc: imgLoc,
|
|
65
|
-
caption:
|
|
66
|
-
title:
|
|
71
|
+
caption: captionEl ? captionEl.text.trim() || undefined : undefined,
|
|
72
|
+
title: titleEl ? titleEl.text.trim() || undefined : undefined,
|
|
67
73
|
});
|
|
68
74
|
}
|
|
69
75
|
});
|
|
@@ -71,10 +77,10 @@ export function parseSitemap(content, compressed = false) {
|
|
|
71
77
|
url.images = images;
|
|
72
78
|
}
|
|
73
79
|
const alternates = [];
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
const hreflang =
|
|
77
|
-
const href =
|
|
80
|
+
const linkElems = urlElem.querySelectorAll('xhtml\:link[rel="alternate"], link[rel="alternate"]');
|
|
81
|
+
linkElems.forEach((linkElem) => {
|
|
82
|
+
const hreflang = linkElem.getAttribute('hreflang');
|
|
83
|
+
const href = linkElem.getAttribute('href');
|
|
78
84
|
if (hreflang && href) {
|
|
79
85
|
alternates.push({ hreflang, href });
|
|
80
86
|
}
|
|
@@ -85,17 +91,18 @@ export function parseSitemap(content, compressed = false) {
|
|
|
85
91
|
urls.push(url);
|
|
86
92
|
});
|
|
87
93
|
}
|
|
88
|
-
else if (
|
|
94
|
+
else if (root.querySelectorAll('sitemapindex').length > 0) {
|
|
89
95
|
type = 'sitemapindex';
|
|
90
|
-
|
|
91
|
-
const
|
|
92
|
-
const loc =
|
|
96
|
+
root.querySelectorAll('sitemap').forEach((sitemapElem) => {
|
|
97
|
+
const locElem = sitemapElem.querySelector('loc');
|
|
98
|
+
const loc = locElem ? locElem.text.trim() : '';
|
|
93
99
|
if (!loc) {
|
|
94
100
|
errors.push('Sitemap entry missing <loc> element');
|
|
95
101
|
return;
|
|
96
102
|
}
|
|
97
103
|
const sitemap = { loc };
|
|
98
|
-
const
|
|
104
|
+
const lastmodElem = sitemapElem.querySelector('lastmod');
|
|
105
|
+
const lastmod = lastmodElem ? lastmodElem.text.trim() : '';
|
|
99
106
|
if (lastmod) {
|
|
100
107
|
if (isValidDate(lastmod)) {
|
|
101
108
|
sitemap.lastmod = lastmod;
|
|
@@ -285,6 +292,9 @@ export async function discoverSitemaps(baseUrl, robotsTxtContent, fetcher) {
|
|
|
285
292
|
}
|
|
286
293
|
}
|
|
287
294
|
}
|
|
295
|
+
if (discovered.size > 0) {
|
|
296
|
+
return Array.from(discovered);
|
|
297
|
+
}
|
|
288
298
|
for (const path of commonLocations) {
|
|
289
299
|
const url = new URL(path, base).href;
|
|
290
300
|
try {
|
|
@@ -399,9 +409,9 @@ export async function fetchAndValidateSitemap(url, fetcher) {
|
|
|
399
409
|
}
|
|
400
410
|
function isValidDate(dateString) {
|
|
401
411
|
const patterns = [
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
412
|
+
/\d{4}-\d{2}-\d{2}$/,
|
|
413
|
+
/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}([+-]\d{2}:\d{2}|Z)$/,
|
|
414
|
+
/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}([+-]\d{2}:\d{2}|Z)$/,
|
|
405
415
|
];
|
|
406
416
|
if (!patterns.some(p => p.test(dateString))) {
|
|
407
417
|
return false;
|