recker 1.0.42 → 1.0.43-next.0b080a5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -0
- package/dist/bin/recker-linux-x64 +0 -0
- package/dist/bin/recker-macos-x64 +0 -0
- package/dist/bin/recker-win-x64.exe +0 -0
- package/dist/bin/rek.cjs +94465 -0
- package/dist/browser/ai/adaptive-timeout.d.ts +50 -0
- package/dist/browser/ai/adaptive-timeout.js +208 -0
- package/dist/browser/ai/client.d.ts +22 -0
- package/dist/browser/ai/client.js +294 -0
- package/dist/browser/ai/index.d.ts +14 -0
- package/dist/browser/ai/index.js +11 -0
- package/dist/browser/ai/providers/anthropic.d.ts +63 -0
- package/dist/browser/ai/providers/anthropic.js +370 -0
- package/dist/browser/ai/providers/base.d.ts +48 -0
- package/dist/browser/ai/providers/base.js +150 -0
- package/dist/browser/ai/providers/google.d.ts +59 -0
- package/dist/browser/ai/providers/google.js +305 -0
- package/dist/browser/ai/providers/ollama.d.ts +44 -0
- package/dist/browser/ai/providers/ollama.js +240 -0
- package/dist/browser/ai/providers/openai.d.ts +64 -0
- package/dist/browser/ai/providers/openai.js +298 -0
- package/dist/browser/ai/rate-limiter.d.ts +43 -0
- package/dist/browser/ai/rate-limiter.js +215 -0
- package/dist/browser/ai/vector/index.d.ts +2 -0
- package/dist/browser/ai/vector/index.js +2 -0
- package/dist/browser/ai/vector/similarity.d.ts +2 -0
- package/dist/browser/ai/vector/similarity.js +27 -0
- package/dist/browser/ai/vector/store.d.ts +27 -0
- package/dist/browser/ai/vector/store.js +82 -0
- package/dist/browser/browser/cache.d.ts +2 -40
- package/dist/browser/browser/cache.js +2 -199
- package/dist/browser/browser/index.d.ts +8 -0
- package/dist/browser/browser/index.js +8 -0
- package/dist/browser/browser/recker.d.ts +8 -1
- package/dist/browser/browser/recker.js +8 -2
- package/dist/browser/cache/indexed-db.d.ts +10 -0
- package/dist/browser/cache/indexed-db.js +88 -0
- package/dist/browser/cache/service-worker-cache.d.ts +18 -0
- package/dist/browser/cache/service-worker-cache.js +103 -0
- package/dist/browser/cache.d.ts +2 -40
- package/dist/browser/cache.js +2 -199
- package/dist/browser/constants/user-agents.d.ts +7 -0
- package/dist/browser/constants/user-agents.js +7 -0
- package/dist/browser/core/client.d.ts +2 -0
- package/dist/browser/core/client.js +19 -1
- package/dist/browser/index.d.ts +8 -0
- package/dist/browser/index.js +8 -0
- package/dist/browser/plugins/har-recorder.d.ts +40 -0
- package/dist/browser/plugins/har-recorder.js +120 -0
- package/dist/browser/plugins/network-simulation.d.ts +7 -0
- package/dist/browser/plugins/network-simulation.js +13 -0
- package/dist/browser/presets/android.d.ts +2 -0
- package/dist/browser/presets/android.js +16 -0
- package/dist/browser/presets/anthropic.d.ts +8 -0
- package/dist/browser/presets/anthropic.js +27 -0
- package/dist/browser/presets/aws.d.ts +19 -0
- package/dist/browser/presets/aws.js +68 -0
- package/dist/browser/presets/azure-openai.d.ts +10 -0
- package/dist/browser/presets/azure-openai.js +35 -0
- package/dist/browser/presets/azure.d.ts +41 -0
- package/dist/browser/presets/azure.js +104 -0
- package/dist/browser/presets/chaturbate.d.ts +2 -0
- package/dist/browser/presets/chaturbate.js +17 -0
- package/dist/browser/presets/cloudflare.d.ts +12 -0
- package/dist/browser/presets/cloudflare.js +39 -0
- package/dist/browser/presets/cohere.d.ts +7 -0
- package/dist/browser/presets/cohere.js +22 -0
- package/dist/browser/presets/deepseek.d.ts +7 -0
- package/dist/browser/presets/deepseek.js +22 -0
- package/dist/browser/presets/digitalocean.d.ts +5 -0
- package/dist/browser/presets/digitalocean.js +16 -0
- package/dist/browser/presets/discord.d.ts +6 -0
- package/dist/browser/presets/discord.js +17 -0
- package/dist/browser/presets/elevenlabs.d.ts +6 -0
- package/dist/browser/presets/elevenlabs.js +20 -0
- package/dist/browser/presets/enhancers.d.ts +20 -0
- package/dist/browser/presets/enhancers.js +85 -0
- package/dist/browser/presets/fireworks.d.ts +7 -0
- package/dist/browser/presets/fireworks.js +22 -0
- package/dist/browser/presets/gcp.d.ts +34 -0
- package/dist/browser/presets/gcp.js +91 -0
- package/dist/browser/presets/gemini.d.ts +7 -0
- package/dist/browser/presets/gemini.js +23 -0
- package/dist/browser/presets/github.d.ts +6 -0
- package/dist/browser/presets/github.js +17 -0
- package/dist/browser/presets/gitlab.d.ts +6 -0
- package/dist/browser/presets/gitlab.js +16 -0
- package/dist/browser/presets/groq.d.ts +7 -0
- package/dist/browser/presets/groq.js +22 -0
- package/dist/browser/presets/hubspot.d.ts +9 -0
- package/dist/browser/presets/hubspot.js +28 -0
- package/dist/browser/presets/huggingface.d.ts +7 -0
- package/dist/browser/presets/huggingface.js +23 -0
- package/dist/browser/presets/index.d.ts +47 -0
- package/dist/browser/presets/index.js +47 -0
- package/dist/browser/presets/ios.d.ts +2 -0
- package/dist/browser/presets/ios.js +13 -0
- package/dist/browser/presets/linear.d.ts +5 -0
- package/dist/browser/presets/linear.js +16 -0
- package/dist/browser/presets/mailgun.d.ts +7 -0
- package/dist/browser/presets/mailgun.js +20 -0
- package/dist/browser/presets/meta.d.ts +10 -0
- package/dist/browser/presets/meta.js +33 -0
- package/dist/browser/presets/mistral.d.ts +7 -0
- package/dist/browser/presets/mistral.js +22 -0
- package/dist/browser/presets/notion.d.ts +6 -0
- package/dist/browser/presets/notion.js +17 -0
- package/dist/browser/presets/openai.d.ts +9 -0
- package/dist/browser/presets/openai.js +30 -0
- package/dist/browser/presets/oracle.d.ts +19 -0
- package/dist/browser/presets/oracle.js +117 -0
- package/dist/browser/presets/perplexity.d.ts +7 -0
- package/dist/browser/presets/perplexity.js +22 -0
- package/dist/browser/presets/pinecone.d.ts +8 -0
- package/dist/browser/presets/pinecone.js +42 -0
- package/dist/browser/presets/registry.d.ts +23 -0
- package/dist/browser/presets/registry.js +519 -0
- package/dist/browser/presets/replicate.d.ts +7 -0
- package/dist/browser/presets/replicate.js +23 -0
- package/dist/browser/presets/sendgrid.d.ts +6 -0
- package/dist/browser/presets/sendgrid.js +20 -0
- package/dist/browser/presets/sentry.d.ts +11 -0
- package/dist/browser/presets/sentry.js +48 -0
- package/dist/browser/presets/sinch.d.ts +9 -0
- package/dist/browser/presets/sinch.js +39 -0
- package/dist/browser/presets/slack.d.ts +5 -0
- package/dist/browser/presets/slack.js +16 -0
- package/dist/browser/presets/square.d.ts +10 -0
- package/dist/browser/presets/square.js +33 -0
- package/dist/browser/presets/stripe.d.ts +7 -0
- package/dist/browser/presets/stripe.js +23 -0
- package/dist/browser/presets/supabase.d.ts +6 -0
- package/dist/browser/presets/supabase.js +18 -0
- package/dist/browser/presets/tiktok.d.ts +10 -0
- package/dist/browser/presets/tiktok.js +38 -0
- package/dist/browser/presets/together.d.ts +7 -0
- package/dist/browser/presets/together.js +22 -0
- package/dist/browser/presets/twilio.d.ts +6 -0
- package/dist/browser/presets/twilio.js +17 -0
- package/dist/browser/presets/vercel.d.ts +6 -0
- package/dist/browser/presets/vercel.js +23 -0
- package/dist/browser/presets/vultr.d.ts +5 -0
- package/dist/browser/presets/vultr.js +16 -0
- package/dist/browser/presets/xai.d.ts +8 -0
- package/dist/browser/presets/xai.js +23 -0
- package/dist/browser/presets/youtube.d.ts +5 -0
- package/dist/browser/presets/youtube.js +20 -0
- package/dist/browser/recker.d.ts +8 -1
- package/dist/browser/recker.js +8 -2
- package/dist/browser/scrape/document.d.ts +5 -4
- package/dist/browser/scrape/document.js +89 -76
- package/dist/browser/scrape/element.d.ts +10 -8
- package/dist/browser/scrape/element.js +295 -81
- package/dist/browser/scrape/extractors.d.ts +11 -11
- package/dist/browser/scrape/extractors.js +145 -113
- package/dist/browser/scrape/parser/back.d.ts +1 -0
- package/dist/browser/scrape/parser/back.js +3 -0
- package/dist/browser/scrape/parser/index.d.ts +20 -0
- package/dist/browser/scrape/parser/index.js +19 -0
- package/dist/browser/scrape/parser/matcher.d.ts +30 -0
- package/dist/browser/scrape/parser/matcher.js +99 -0
- package/dist/browser/scrape/parser/nodes/comment.d.ts +12 -0
- package/dist/browser/scrape/parser/nodes/comment.js +21 -0
- package/dist/browser/scrape/parser/nodes/html.d.ts +110 -0
- package/dist/browser/scrape/parser/nodes/html.js +978 -0
- package/dist/browser/scrape/parser/nodes/node.d.ts +18 -0
- package/dist/browser/scrape/parser/nodes/node.js +31 -0
- package/dist/browser/scrape/parser/nodes/text.d.ts +14 -0
- package/dist/browser/scrape/parser/nodes/text.js +30 -0
- package/dist/browser/scrape/parser/nodes/type.d.ts +6 -0
- package/dist/browser/scrape/parser/nodes/type.js +7 -0
- package/dist/browser/scrape/parser/parse.d.ts +1 -0
- package/dist/browser/scrape/parser/parse.js +1 -0
- package/dist/browser/scrape/parser/valid.d.ts +2 -0
- package/dist/browser/scrape/parser/valid.js +5 -0
- package/dist/browser/scrape/parser/void-tag.d.ts +7 -0
- package/dist/browser/scrape/parser/void-tag.js +43 -0
- package/dist/browser/scrape/types.d.ts +7 -0
- package/dist/browser/seo/analyzer.d.ts +59 -0
- package/dist/browser/seo/analyzer.js +1399 -0
- package/dist/browser/seo/keywords.d.ts +16 -0
- package/dist/browser/seo/keywords.js +55 -0
- package/dist/browser/seo/rules/accessibility.d.ts +2 -0
- package/dist/browser/seo/rules/accessibility.js +733 -0
- package/dist/browser/seo/rules/ai-search.d.ts +2 -0
- package/dist/browser/seo/rules/ai-search.js +436 -0
- package/dist/browser/seo/rules/analytics.d.ts +2 -0
- package/dist/browser/seo/rules/analytics.js +306 -0
- package/dist/browser/seo/rules/best-practices.d.ts +2 -0
- package/dist/browser/seo/rules/best-practices.js +195 -0
- package/dist/browser/seo/rules/canonical.d.ts +12 -0
- package/dist/browser/seo/rules/canonical.js +270 -0
- package/dist/browser/seo/rules/content.d.ts +2 -0
- package/dist/browser/seo/rules/content.js +522 -0
- package/dist/browser/seo/rules/crawl.d.ts +2 -0
- package/dist/browser/seo/rules/crawl.js +435 -0
- package/dist/browser/seo/rules/cwv.d.ts +2 -0
- package/dist/browser/seo/rules/cwv.js +248 -0
- package/dist/browser/seo/rules/ecommerce.d.ts +2 -0
- package/dist/browser/seo/rules/ecommerce.js +312 -0
- package/dist/browser/seo/rules/i18n.d.ts +2 -0
- package/dist/browser/seo/rules/i18n.js +288 -0
- package/dist/browser/seo/rules/images.d.ts +2 -0
- package/dist/browser/seo/rules/images.js +255 -0
- package/dist/browser/seo/rules/index.d.ts +52 -0
- package/dist/browser/seo/rules/index.js +159 -0
- package/dist/browser/seo/rules/internal-linking.d.ts +2 -0
- package/dist/browser/seo/rules/internal-linking.js +394 -0
- package/dist/browser/seo/rules/links.d.ts +2 -0
- package/dist/browser/seo/rules/links.js +498 -0
- package/dist/browser/seo/rules/local.d.ts +2 -0
- package/dist/browser/seo/rules/local.js +289 -0
- package/dist/browser/seo/rules/meta.d.ts +2 -0
- package/dist/browser/seo/rules/meta.js +805 -0
- package/dist/browser/seo/rules/mobile.d.ts +2 -0
- package/dist/browser/seo/rules/mobile.js +161 -0
- package/dist/browser/seo/rules/performance.d.ts +2 -0
- package/dist/browser/seo/rules/performance.js +738 -0
- package/dist/browser/seo/rules/pwa.d.ts +2 -0
- package/dist/browser/seo/rules/pwa.js +299 -0
- package/dist/browser/seo/rules/readability.d.ts +2 -0
- package/dist/browser/seo/rules/readability.js +264 -0
- package/dist/browser/seo/rules/redirects.d.ts +16 -0
- package/dist/browser/seo/rules/redirects.js +199 -0
- package/dist/browser/seo/rules/resources.d.ts +2 -0
- package/dist/browser/seo/rules/resources.js +390 -0
- package/dist/browser/seo/rules/schema.d.ts +2 -0
- package/dist/browser/seo/rules/schema.js +379 -0
- package/dist/browser/seo/rules/security.d.ts +2 -0
- package/dist/browser/seo/rules/security.js +877 -0
- package/dist/browser/seo/rules/social.d.ts +2 -0
- package/dist/browser/seo/rules/social.js +603 -0
- package/dist/browser/seo/rules/structural.d.ts +2 -0
- package/dist/browser/seo/rules/structural.js +223 -0
- package/dist/browser/seo/rules/technical-advanced.d.ts +10 -0
- package/dist/browser/seo/rules/technical-advanced.js +289 -0
- package/dist/browser/seo/rules/technical.d.ts +2 -0
- package/dist/browser/seo/rules/technical.js +480 -0
- package/dist/browser/seo/rules/thresholds.d.ts +196 -0
- package/dist/browser/seo/rules/thresholds.js +118 -0
- package/dist/browser/seo/rules/types.d.ts +498 -0
- package/dist/browser/seo/rules/types.js +11 -0
- package/dist/browser/seo/types.d.ts +211 -0
- package/dist/browser/seo/types.js +1 -0
- package/dist/browser/transport/curl.d.ts +4 -0
- package/dist/browser/transport/curl.js +101 -0
- package/dist/browser/transport/undici.js +1 -2
- package/dist/browser/transport/worker.d.ts +18 -0
- package/dist/browser/transport/worker.js +278 -0
- package/dist/browser/types/index.d.ts +4 -1
- package/dist/browser/utils/binary-manager.d.ts +4 -0
- package/dist/browser/utils/binary-manager.js +72 -0
- package/dist/browser/utils/user-agent.js +2 -13
- package/dist/cache/indexed-db.d.ts +10 -0
- package/dist/cache/indexed-db.js +88 -0
- package/dist/cache/service-worker-cache.d.ts +18 -0
- package/dist/cache/service-worker-cache.js +103 -0
- package/dist/cli/commands/ai.d.ts +2 -0
- package/dist/cli/commands/ai.js +162 -0
- package/dist/cli/commands/bench.d.ts +2 -0
- package/dist/cli/commands/bench.js +51 -0
- package/dist/cli/commands/dns.d.ts +2 -0
- package/dist/cli/commands/dns.js +295 -0
- package/dist/cli/commands/har.d.ts +2 -0
- package/dist/cli/commands/har.js +171 -0
- package/dist/cli/commands/hls.d.ts +2 -0
- package/dist/cli/commands/hls.js +192 -0
- package/dist/cli/commands/network.d.ts +2 -0
- package/dist/cli/commands/network.js +288 -0
- package/dist/cli/commands/protocols.d.ts +2 -0
- package/dist/cli/commands/protocols.js +344 -0
- package/dist/cli/commands/scrape.d.ts +2 -0
- package/dist/cli/commands/scrape.js +176 -0
- package/dist/cli/commands/security.d.ts +2 -0
- package/dist/cli/commands/security.js +57 -0
- package/dist/cli/commands/seo.d.ts +2 -0
- package/dist/cli/commands/seo.js +125 -0
- package/dist/cli/commands/serve.d.ts +2 -0
- package/dist/cli/commands/serve.js +531 -0
- package/dist/cli/commands/spider.d.ts +3 -0
- package/dist/cli/commands/spider.js +456 -0
- package/dist/cli/commands/utils.d.ts +2 -0
- package/dist/cli/commands/utils.js +176 -0
- package/dist/cli/commands/vector.d.ts +2 -0
- package/dist/cli/commands/vector.js +158 -0
- package/dist/cli/handler.d.ts +2 -2
- package/dist/cli/handler.js +6 -6
- package/dist/cli/helpers.d.ts +7 -0
- package/dist/cli/helpers.js +128 -0
- package/dist/cli/index.js +96 -5228
- package/dist/cli/parser/help.d.ts +2 -0
- package/dist/cli/parser/help.js +52 -0
- package/dist/cli/parser/index.d.ts +3 -0
- package/dist/cli/parser/index.js +3 -0
- package/dist/cli/parser/parser.d.ts +4 -0
- package/dist/cli/parser/parser.js +146 -0
- package/dist/cli/parser/types.d.ts +41 -0
- package/dist/cli/parser/types.js +1 -0
- package/dist/cli/presets.d.ts +1 -1
- package/dist/cli/presets.js +1 -1
- package/dist/cli/router.d.ts +36 -0
- package/dist/cli/router.js +195 -0
- package/dist/cli/tui/ai-chat.js +1 -1
- package/dist/cli/tui/commands/context.d.ts +9 -0
- package/dist/cli/tui/commands/context.js +1 -0
- package/dist/cli/tui/commands/dns.d.ts +10 -0
- package/dist/cli/tui/commands/dns.js +461 -0
- package/dist/cli/tui/commands/hls.d.ts +2 -0
- package/dist/cli/tui/commands/hls.js +162 -0
- package/dist/cli/tui/commands/ip.d.ts +2 -0
- package/dist/cli/tui/commands/ip.js +45 -0
- package/dist/cli/tui/commands/network.d.ts +3 -0
- package/dist/cli/tui/commands/network.js +81 -0
- package/dist/cli/tui/commands/protocols.d.ts +6 -0
- package/dist/cli/tui/commands/protocols.js +531 -0
- package/dist/cli/tui/commands/security.d.ts +2 -0
- package/dist/cli/tui/commands/security.js +48 -0
- package/dist/cli/tui/commands/seo.d.ts +2 -0
- package/dist/cli/tui/commands/seo.js +74 -0
- package/dist/cli/tui/context.d.ts +12 -0
- package/dist/cli/tui/context.js +1 -0
- package/dist/cli/tui/shell.d.ts +11 -20
- package/dist/cli/tui/shell.js +216 -1873
- package/dist/constants/user-agents.d.ts +7 -0
- package/dist/constants/user-agents.js +7 -0
- package/dist/core/client.d.ts +2 -0
- package/dist/core/client.js +19 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/mcp/cli.js +2 -3
- package/dist/mcp/data/embeddings.json +1 -0
- package/dist/mcp/tools/network.js +298 -158
- package/dist/plugins/har-player.d.ts +23 -0
- package/dist/plugins/har-player.js +49 -0
- package/dist/plugins/har-recorder.d.ts +37 -3
- package/dist/plugins/har-recorder.js +116 -63
- package/dist/plugins/network-simulation.d.ts +7 -0
- package/dist/plugins/network-simulation.js +13 -0
- package/dist/presets/android.d.ts +2 -0
- package/dist/presets/android.js +16 -0
- package/dist/presets/chaturbate.d.ts +2 -0
- package/dist/presets/chaturbate.js +17 -0
- package/dist/presets/elevenlabs.d.ts +6 -0
- package/dist/presets/elevenlabs.js +20 -0
- package/dist/presets/enhancers.d.ts +20 -0
- package/dist/presets/enhancers.js +85 -0
- package/dist/presets/hubspot.d.ts +9 -0
- package/dist/presets/hubspot.js +28 -0
- package/dist/presets/index.d.ts +10 -0
- package/dist/presets/index.js +10 -0
- package/dist/presets/ios.d.ts +2 -0
- package/dist/presets/ios.js +13 -0
- package/dist/presets/pinecone.d.ts +8 -0
- package/dist/presets/pinecone.js +42 -0
- package/dist/presets/registry.js +60 -0
- package/dist/presets/sendgrid.d.ts +6 -0
- package/dist/presets/sendgrid.js +20 -0
- package/dist/presets/sentry.d.ts +11 -0
- package/dist/presets/sentry.js +48 -0
- package/dist/presets/square.d.ts +10 -0
- package/dist/presets/square.js +33 -0
- package/dist/recker.d.ts +3 -0
- package/dist/recker.js +4 -0
- package/dist/scrape/document.d.ts +5 -4
- package/dist/scrape/document.js +89 -76
- package/dist/scrape/element.d.ts +10 -8
- package/dist/scrape/element.js +295 -81
- package/dist/scrape/extractors.d.ts +11 -11
- package/dist/scrape/extractors.js +145 -113
- package/dist/scrape/index.d.ts +2 -0
- package/dist/scrape/index.js +1 -0
- package/dist/scrape/parser/back.d.ts +1 -0
- package/dist/scrape/parser/back.js +3 -0
- package/dist/scrape/parser/index.d.ts +20 -0
- package/dist/scrape/parser/index.js +19 -0
- package/dist/scrape/parser/matcher.d.ts +30 -0
- package/dist/scrape/parser/matcher.js +99 -0
- package/dist/scrape/parser/nodes/comment.d.ts +12 -0
- package/dist/scrape/parser/nodes/comment.js +21 -0
- package/dist/scrape/parser/nodes/html.d.ts +110 -0
- package/dist/scrape/parser/nodes/html.js +978 -0
- package/dist/scrape/parser/nodes/node.d.ts +18 -0
- package/dist/scrape/parser/nodes/node.js +31 -0
- package/dist/scrape/parser/nodes/text.d.ts +14 -0
- package/dist/scrape/parser/nodes/text.js +30 -0
- package/dist/scrape/parser/nodes/type.d.ts +6 -0
- package/dist/scrape/parser/nodes/type.js +7 -0
- package/dist/scrape/parser/parse.d.ts +1 -0
- package/dist/scrape/parser/parse.js +1 -0
- package/dist/scrape/parser/valid.d.ts +2 -0
- package/dist/scrape/parser/valid.js +5 -0
- package/dist/scrape/parser/void-tag.d.ts +7 -0
- package/dist/scrape/parser/void-tag.js +43 -0
- package/dist/scrape/spider.d.ts +19 -0
- package/dist/scrape/spider.js +28 -3
- package/dist/scrape/types.d.ts +7 -0
- package/dist/seo/analyzer.d.ts +15 -5
- package/dist/seo/analyzer.js +636 -175
- package/dist/seo/formatter.d.ts +16 -0
- package/dist/seo/formatter.js +228 -0
- package/dist/seo/index.d.ts +2 -0
- package/dist/seo/index.js +1 -0
- package/dist/seo/keywords.d.ts +16 -0
- package/dist/seo/keywords.js +55 -0
- package/dist/seo/rules/accessibility.js +96 -57
- package/dist/seo/rules/ai-search.js +44 -31
- package/dist/seo/rules/analytics.d.ts +2 -0
- package/dist/seo/rules/analytics.js +306 -0
- package/dist/seo/rules/best-practices.js +21 -14
- package/dist/seo/rules/canonical.js +53 -32
- package/dist/seo/rules/content.js +317 -31
- package/dist/seo/rules/crawl.js +55 -40
- package/dist/seo/rules/cwv.js +21 -15
- package/dist/seo/rules/ecommerce.js +82 -22
- package/dist/seo/rules/i18n.js +75 -36
- package/dist/seo/rules/images.js +109 -30
- package/dist/seo/rules/index.js +2 -0
- package/dist/seo/rules/internal-linking.js +58 -39
- package/dist/seo/rules/links.js +79 -52
- package/dist/seo/rules/local.js +49 -25
- package/dist/seo/rules/meta.js +339 -81
- package/dist/seo/rules/mobile.js +112 -2
- package/dist/seo/rules/performance.js +434 -66
- package/dist/seo/rules/pwa.js +36 -39
- package/dist/seo/rules/readability.js +31 -22
- package/dist/seo/rules/redirects.js +21 -15
- package/dist/seo/rules/resources.js +59 -42
- package/dist/seo/rules/schema.js +333 -8
- package/dist/seo/rules/security.js +142 -80
- package/dist/seo/rules/social.js +277 -47
- package/dist/seo/rules/structural.js +87 -19
- package/dist/seo/rules/technical-advanced.js +30 -24
- package/dist/seo/rules/technical.js +243 -42
- package/dist/seo/rules/types.d.ts +53 -1
- package/dist/seo/seo-spider.d.ts +22 -0
- package/dist/seo/seo-spider.js +77 -13
- package/dist/seo/types.d.ts +8 -1
- package/dist/seo/validators/llms-txt.js +19 -0
- package/dist/seo/validators/rss.d.ts +11 -0
- package/dist/seo/validators/rss.js +93 -0
- package/dist/seo/validators/sitemap.js +36 -26
- package/dist/transport/curl.d.ts +4 -0
- package/dist/transport/curl.js +101 -0
- package/dist/transport/udp.js +0 -1
- package/dist/transport/undici.js +1 -2
- package/dist/transport/worker.d.ts +18 -0
- package/dist/transport/worker.js +278 -0
- package/dist/types/index.d.ts +4 -1
- package/dist/utils/binary-manager.d.ts +4 -0
- package/dist/utils/binary-manager.js +72 -0
- package/dist/utils/optional-require.d.ts +7 -8
- package/dist/utils/optional-require.js +2 -21
- package/dist/utils/upload.d.ts +6 -0
- package/dist/utils/upload.js +11 -0
- package/dist/utils/user-agent.js +2 -13
- package/dist/version.js +1 -1
- package/package.json +14 -6
- package/dist/browser/utils/optional-require.d.ts +0 -19
- package/dist/browser/utils/optional-require.js +0 -105
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
import { promises as fs } from 'node:fs';
|
|
2
|
+
import colors from '../../utils/colors.js';
|
|
3
|
+
import { summarizeErrors, formatErrorSummary } from '../helpers.js';
|
|
4
|
+
import { RekArgs, generateHelp } from '../parser/index.js';
|
|
5
|
+
const schema = {
|
|
6
|
+
name: 'spider',
|
|
7
|
+
description: 'Crawl a website and analyze all pages.\nThe crawler respects robots.txt, handles JavaScript-rendered content, and provides detailed reports on site structure, broken links, and SEO issues.',
|
|
8
|
+
params: {
|
|
9
|
+
depth: { type: 'number', default: 5, description: 'Max link depth to follow' },
|
|
10
|
+
limit: { type: 'number', default: 100, description: 'Max pages to crawl' },
|
|
11
|
+
concurrency: { type: 'number', default: 5, description: 'Parallel requests' },
|
|
12
|
+
output: { type: 'string', description: 'Save JSON report to file' },
|
|
13
|
+
focus: { type: 'string', default: 'all', choices: ['all', 'links', 'duplicates', 'security', 'ai', 'resources'], description: 'Focus analysis on specific area (requires seo)' }
|
|
14
|
+
},
|
|
15
|
+
keywords: {
|
|
16
|
+
seo: { description: 'Enable SEO analysis mode' }
|
|
17
|
+
},
|
|
18
|
+
flags: {
|
|
19
|
+
json: { description: 'Output JSON to stdout', alias: 'j' },
|
|
20
|
+
robots: { description: 'Respect robots.txt rules', default: false }
|
|
21
|
+
},
|
|
22
|
+
examples: [
|
|
23
|
+
{ cmd: 'rek spider example.com', desc: 'Crawl (ignores robots.txt)' },
|
|
24
|
+
{ cmd: 'rek spider example.com --robots', desc: 'Crawl respecting robots.txt' },
|
|
25
|
+
{ cmd: 'rek spider example.com depth=3 limit=50', desc: 'Depth 3, max 50 pages' },
|
|
26
|
+
{ cmd: 'rek spider example.com seo focus=security', desc: 'Focus on security issues' },
|
|
27
|
+
{ cmd: 'rek spider example.com seo output=report.json', desc: 'SEO with JSON export' }
|
|
28
|
+
]
|
|
29
|
+
};
|
|
30
|
+
export async function runSpider(rawArgs, defaultUrl) {
|
|
31
|
+
const { data, options, args } = RekArgs.parse(rawArgs, schema);
|
|
32
|
+
let url = args[0];
|
|
33
|
+
if (!url && defaultUrl) {
|
|
34
|
+
url = defaultUrl;
|
|
35
|
+
}
|
|
36
|
+
const formatJson = options.json || data.format === 'json';
|
|
37
|
+
const outputFile = data.output;
|
|
38
|
+
const seoEnabled = data.seo;
|
|
39
|
+
const focusMode = data.focus;
|
|
40
|
+
const respectRobotsTxt = !!options.robots;
|
|
41
|
+
const focusCategories = {
|
|
42
|
+
links: ['links'],
|
|
43
|
+
duplicates: ['title', 'meta', 'content'],
|
|
44
|
+
security: ['security'],
|
|
45
|
+
ai: ['ai-search'],
|
|
46
|
+
resources: ['resources', 'performance'],
|
|
47
|
+
all: [],
|
|
48
|
+
};
|
|
49
|
+
if (!url) {
|
|
50
|
+
if (!formatJson) {
|
|
51
|
+
console.log(colors.yellow('Usage: spider <url> [options]'));
|
|
52
|
+
console.log(generateHelp(schema));
|
|
53
|
+
}
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
if (!url.startsWith('http'))
|
|
57
|
+
url = `https://${url}`;
|
|
58
|
+
if (!formatJson) {
|
|
59
|
+
const modeLabel = seoEnabled ? colors.magenta(' + SEO') : '';
|
|
60
|
+
const focusLabel = focusMode !== 'all' ? colors.cyan(` [focus: ${focusMode}]`) : '';
|
|
61
|
+
console.log(colors.cyan(`
|
|
62
|
+
Spider starting: ${url}`));
|
|
63
|
+
console.log(colors.gray(` Depth: ${data.depth} | Limit: ${data.limit} | Concurrency: ${data.concurrency}${modeLabel}${focusLabel}`));
|
|
64
|
+
if (outputFile) {
|
|
65
|
+
console.log(colors.gray(` Output: ${outputFile}`));
|
|
66
|
+
}
|
|
67
|
+
console.log('');
|
|
68
|
+
}
|
|
69
|
+
try {
|
|
70
|
+
if (seoEnabled) {
|
|
71
|
+
const { SeoSpider } = await import('../../seo/index.js');
|
|
72
|
+
const seoSpider = new SeoSpider({
|
|
73
|
+
maxDepth: data.depth,
|
|
74
|
+
maxPages: data.limit,
|
|
75
|
+
concurrency: data.concurrency,
|
|
76
|
+
sameDomain: true,
|
|
77
|
+
delay: 100,
|
|
78
|
+
seo: true,
|
|
79
|
+
respectRobotsTxt,
|
|
80
|
+
output: outputFile || undefined,
|
|
81
|
+
focusCategories: focusCategories[focusMode],
|
|
82
|
+
focusMode: focusMode,
|
|
83
|
+
onProgress: formatJson ? undefined : (progress) => {
|
|
84
|
+
process.stdout.write(`\r${colors.gray(' Crawling:')} ${colors.cyan(progress.crawled.toString())} pages | ${colors.gray('Queue:')} ${progress.queued} | ${colors.gray('Depth:')} ${progress.depth} `);
|
|
85
|
+
},
|
|
86
|
+
});
|
|
87
|
+
const result = await seoSpider.crawl(url);
|
|
88
|
+
if (formatJson) {
|
|
89
|
+
const responseTimes = result.pages.filter(p => p.duration > 0).map(p => p.duration);
|
|
90
|
+
const avgResponseTime = responseTimes.length > 0
|
|
91
|
+
? Math.round(responseTimes.reduce((a, b) => a + b, 0) / responseTimes.length)
|
|
92
|
+
: 0;
|
|
93
|
+
const statusCounts = {};
|
|
94
|
+
for (const page of result.pages) {
|
|
95
|
+
const key = page.status?.toString() || 'error';
|
|
96
|
+
statusCounts[key] = (statusCounts[key] || 0) + 1;
|
|
97
|
+
}
|
|
98
|
+
let totalInternalLinks = 0;
|
|
99
|
+
let totalExternalLinks = 0;
|
|
100
|
+
let totalImages = 0;
|
|
101
|
+
let imagesWithoutAlt = 0;
|
|
102
|
+
for (const page of result.pages) {
|
|
103
|
+
if (page.seoReport) {
|
|
104
|
+
totalInternalLinks += page.seoReport.links?.internal || 0;
|
|
105
|
+
totalExternalLinks += page.seoReport.links?.external || 0;
|
|
106
|
+
totalImages += page.seoReport.images?.total || 0;
|
|
107
|
+
imagesWithoutAlt += page.seoReport.images?.withoutAlt || 0;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
const jsonOutput = {
|
|
111
|
+
startUrl: url,
|
|
112
|
+
crawledAt: new Date().toISOString(),
|
|
113
|
+
duration: result.duration,
|
|
114
|
+
config: {
|
|
115
|
+
maxDepth: data.depth,
|
|
116
|
+
maxPages: data.limit,
|
|
117
|
+
concurrency: data.concurrency,
|
|
118
|
+
focusMode,
|
|
119
|
+
},
|
|
120
|
+
summary: {
|
|
121
|
+
totalPages: result.pages.length,
|
|
122
|
+
uniqueUrls: result.visited.size,
|
|
123
|
+
avgSeoScore: result.summary.avgScore,
|
|
124
|
+
avgResponseTime,
|
|
125
|
+
pagesWithErrors: result.summary.pagesWithErrors,
|
|
126
|
+
pagesWithWarnings: result.summary.pagesWithWarnings,
|
|
127
|
+
duplicateTitles: result.summary.duplicateTitles,
|
|
128
|
+
duplicateDescriptions: result.summary.duplicateDescriptions,
|
|
129
|
+
duplicateH1s: result.summary.duplicateH1s,
|
|
130
|
+
orphanPages: result.summary.orphanPages,
|
|
131
|
+
},
|
|
132
|
+
discovery: result.txtFiles ? {
|
|
133
|
+
humans: result.txtFiles.humans.found,
|
|
134
|
+
llms: result.txtFiles.llms.found
|
|
135
|
+
} : undefined,
|
|
136
|
+
rssFeeds: result.rssFeeds,
|
|
137
|
+
content: {
|
|
138
|
+
totalInternalLinks,
|
|
139
|
+
totalExternalLinks,
|
|
140
|
+
totalImages,
|
|
141
|
+
imagesWithoutAlt,
|
|
142
|
+
},
|
|
143
|
+
httpStatus: statusCounts,
|
|
144
|
+
siteWideIssues: result.siteWideIssues.map(issue => ({
|
|
145
|
+
type: issue.type,
|
|
146
|
+
severity: issue.severity,
|
|
147
|
+
message: issue.message,
|
|
148
|
+
value: issue.value,
|
|
149
|
+
affectedUrls: issue.affectedUrls,
|
|
150
|
+
})),
|
|
151
|
+
pages: result.pages.map(page => ({
|
|
152
|
+
url: page.url,
|
|
153
|
+
status: page.status,
|
|
154
|
+
depth: page.depth,
|
|
155
|
+
duration: page.duration,
|
|
156
|
+
title: page.title,
|
|
157
|
+
error: page.error,
|
|
158
|
+
seo: page.seoReport ? {
|
|
159
|
+
score: page.seoReport.score,
|
|
160
|
+
grade: page.seoReport.grade,
|
|
161
|
+
title: page.seoReport.title,
|
|
162
|
+
metaDescription: page.seoReport.metaDescription,
|
|
163
|
+
headings: page.seoReport.headings,
|
|
164
|
+
links: page.seoReport.links,
|
|
165
|
+
images: page.seoReport.images,
|
|
166
|
+
checks: page.seoReport.checks,
|
|
167
|
+
} : null,
|
|
168
|
+
})),
|
|
169
|
+
errors: result.pages.filter(p => p.error).map(p => ({
|
|
170
|
+
url: p.url,
|
|
171
|
+
status: p.status,
|
|
172
|
+
error: p.error
|
|
173
|
+
})),
|
|
174
|
+
};
|
|
175
|
+
console.log(JSON.stringify(jsonOutput, null, 2));
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
process.stdout.write('\r' + ' '.repeat(80) + '\r');
|
|
179
|
+
console.log(colors.green(`\n✔ SEO Spider complete`) + colors.gray(` (${(result.duration / 1000).toFixed(1)}s)`));
|
|
180
|
+
console.log(` ${colors.cyan('Pages crawled')}: ${result.pages.length}`);
|
|
181
|
+
console.log(` ${colors.cyan('Unique URLs')}: ${result.visited.size}`);
|
|
182
|
+
console.log(` ${colors.cyan('Avg SEO Score')}: ${result.summary.avgScore}/100`);
|
|
183
|
+
const responseTimes = result.pages.filter(p => p.duration > 0).map(p => p.duration);
|
|
184
|
+
const avgResponseTime = responseTimes.length > 0
|
|
185
|
+
? Math.round(responseTimes.reduce((a, b) => a + b, 0) / responseTimes.length)
|
|
186
|
+
: 0;
|
|
187
|
+
const minResponseTime = responseTimes.length > 0 ? Math.min(...responseTimes) : 0;
|
|
188
|
+
const maxResponseTime = responseTimes.length > 0 ? Math.max(...responseTimes) : 0;
|
|
189
|
+
const reqPerSec = result.duration > 0 ? (result.pages.length / (result.duration / 1000)).toFixed(1) : '0';
|
|
190
|
+
const statusCounts = new Map();
|
|
191
|
+
for (const page of result.pages) {
|
|
192
|
+
const key = page.status ? page.status.toString() : 'error';
|
|
193
|
+
statusCounts.set(key, (statusCounts.get(key) || 0) + 1);
|
|
194
|
+
}
|
|
195
|
+
let totalInternalLinks = 0;
|
|
196
|
+
let totalExternalLinks = 0;
|
|
197
|
+
let totalImages = 0;
|
|
198
|
+
let imagesWithoutAlt = 0;
|
|
199
|
+
let pagesWithoutTitle = 0;
|
|
200
|
+
let pagesWithoutDescription = 0;
|
|
201
|
+
for (const page of result.pages) {
|
|
202
|
+
if (page.seoReport) {
|
|
203
|
+
totalInternalLinks += page.seoReport.links?.internal || 0;
|
|
204
|
+
totalExternalLinks += page.seoReport.links?.external || 0;
|
|
205
|
+
totalImages += page.seoReport.images?.total || 0;
|
|
206
|
+
imagesWithoutAlt += page.seoReport.images?.withoutAlt || 0;
|
|
207
|
+
if (!page.seoReport.title?.text)
|
|
208
|
+
pagesWithoutTitle++;
|
|
209
|
+
if (!page.seoReport.metaDescription?.text)
|
|
210
|
+
pagesWithoutDescription++;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
console.log(colors.bold('\n Performance:'));
|
|
214
|
+
console.log(` ${colors.gray('Avg Response:')} ${avgResponseTime}ms`);
|
|
215
|
+
console.log(` ${colors.gray('Min/Max:')} ${minResponseTime}ms / ${maxResponseTime}ms`);
|
|
216
|
+
console.log(` ${colors.gray('Throughput:')} ${reqPerSec} req/s`);
|
|
217
|
+
console.log(colors.bold('\n HTTP Status:'));
|
|
218
|
+
const sortedStatuses = Array.from(statusCounts.entries()).sort((a, b) => b[1] - a[1]);
|
|
219
|
+
for (const [statusKey, count] of sortedStatuses.slice(0, 5)) {
|
|
220
|
+
const statusNum = parseInt(statusKey);
|
|
221
|
+
const statusColor = statusNum >= 400 ? colors.red :
|
|
222
|
+
statusNum >= 300 ? colors.yellow :
|
|
223
|
+
statusKey === 'error' ? colors.red : colors.green;
|
|
224
|
+
const pct = ((count / result.pages.length) * 100).toFixed(0);
|
|
225
|
+
console.log(` ${statusColor(statusKey.padEnd(5))} ${count.toString().padStart(3)} (${pct}%)`);
|
|
226
|
+
}
|
|
227
|
+
console.log(colors.bold('\n Content:'));
|
|
228
|
+
console.log(` ${colors.gray('Internal links:')} ${totalInternalLinks.toLocaleString()}`);
|
|
229
|
+
console.log(` ${colors.gray('External links:')} ${totalExternalLinks.toLocaleString()}`);
|
|
230
|
+
console.log(` ${colors.gray('Images:')} ${totalImages.toLocaleString()} (${imagesWithoutAlt} missing alt)`);
|
|
231
|
+
console.log(` ${colors.gray('Missing title:')} ${pagesWithoutTitle}`);
|
|
232
|
+
console.log(` ${colors.gray('Missing desc:')} ${pagesWithoutDescription}`);
|
|
233
|
+
console.log(colors.bold('\n SEO Summary:'));
|
|
234
|
+
const { summary } = result;
|
|
235
|
+
console.log(` ${colors.red('✗')} Pages with errors: ${summary.pagesWithErrors}`);
|
|
236
|
+
console.log(` ${colors.yellow('⚠')} Pages with warnings: ${summary.pagesWithWarnings}`);
|
|
237
|
+
console.log(` ${colors.magenta('⚐')} Duplicate titles: ${summary.duplicateTitles}`);
|
|
238
|
+
console.log(` ${colors.magenta('⚐')} Duplicate descriptions:${summary.duplicateDescriptions}`);
|
|
239
|
+
console.log(` ${colors.magenta('⚐')} Duplicate H1s: ${summary.duplicateH1s}`);
|
|
240
|
+
console.log(` ${colors.gray('○')} Orphan pages: ${summary.orphanPages}`);
|
|
241
|
+
if (result.txtFiles) {
|
|
242
|
+
console.log(colors.bold('\n Discovery:'));
|
|
243
|
+
const { humans, llms } = result.txtFiles;
|
|
244
|
+
if (humans.found)
|
|
245
|
+
console.log(` ${colors.green('✔')} humans.txt found`);
|
|
246
|
+
if (llms.found)
|
|
247
|
+
console.log(` ${colors.green('✔')} llms.txt found`);
|
|
248
|
+
if (!humans.found && !llms.found)
|
|
249
|
+
console.log(` ${colors.gray('○')} No special text files found`);
|
|
250
|
+
}
|
|
251
|
+
if (result.rssFeeds && result.rssFeeds.length > 0) {
|
|
252
|
+
console.log(colors.bold('\n RSS/Atom Feeds:'));
|
|
253
|
+
result.rssFeeds.forEach(feed => {
|
|
254
|
+
const typeLabel = feed.type === 'rss' ? 'RSS' : 'Atom';
|
|
255
|
+
const title = feed.title ? `"${feed.title}"` : 'Untitled';
|
|
256
|
+
const count = feed.itemCount > 0 ? `(${feed.itemCount} items)` : '';
|
|
257
|
+
console.log(` ${colors.green('✔')} ${colors.cyan(typeLabel)} ${title} ${colors.gray(count)}`);
|
|
258
|
+
console.log(` ${colors.gray(feed.url)}`);
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
if (result.siteWideIssues.length > 0) {
|
|
262
|
+
console.log(colors.bold('\n Site-Wide Issues:'));
|
|
263
|
+
const sortedIssues = [...result.siteWideIssues].sort((a, b) => {
|
|
264
|
+
const priority = { error: 0, warning: 1, info: 2 };
|
|
265
|
+
return priority[a.severity] - priority[b.severity];
|
|
266
|
+
});
|
|
267
|
+
for (const issue of sortedIssues.slice(0, 10)) {
|
|
268
|
+
const icon = issue.severity === 'error' ? colors.red('✗') :
|
|
269
|
+
issue.severity === 'warning' ? colors.yellow('⚠') : colors.gray('○');
|
|
270
|
+
const colorFn = issue.severity === 'error' ? colors.red :
|
|
271
|
+
issue.severity === 'warning' ? colors.yellow : colors.gray;
|
|
272
|
+
console.log(` ${icon} ${colorFn(issue.message)}`);
|
|
273
|
+
if (issue.value) {
|
|
274
|
+
const truncatedValue = issue.value.length > 50 ? issue.value.slice(0, 47) + '...' : issue.value;
|
|
275
|
+
console.log(` ${colors.gray(`"${truncatedValue}"`)}`);
|
|
276
|
+
}
|
|
277
|
+
const uniquePaths = [...new Set(issue.affectedUrls.map(u => {
|
|
278
|
+
try {
|
|
279
|
+
return new URL(u).pathname;
|
|
280
|
+
}
|
|
281
|
+
catch {
|
|
282
|
+
return u;
|
|
283
|
+
}
|
|
284
|
+
}))];
|
|
285
|
+
if (uniquePaths.length <= 3) {
|
|
286
|
+
for (const path of uniquePaths) {
|
|
287
|
+
console.log(` ${colors.gray('→')} ${path}`);
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
else {
|
|
291
|
+
console.log(` ${colors.gray(`→ ${uniquePaths.length} pages affected`)}`);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
if (result.siteWideIssues.length > 10) {
|
|
295
|
+
console.log(colors.gray(` ... and ${result.siteWideIssues.length - 10} more issues`));
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
const pagesWithScores = result.pages
|
|
299
|
+
.filter(p => p.seoReport)
|
|
300
|
+
.sort((a, b) => (a.seoReport?.score || 0) - (b.seoReport?.score || 0));
|
|
301
|
+
const seenPaths = new Set();
|
|
302
|
+
const uniquePages = pagesWithScores.filter(page => {
|
|
303
|
+
const path = new URL(page.url).pathname;
|
|
304
|
+
if (seenPaths.has(path))
|
|
305
|
+
return false;
|
|
306
|
+
seenPaths.add(path);
|
|
307
|
+
return true;
|
|
308
|
+
});
|
|
309
|
+
if (uniquePages.length > 0) {
|
|
310
|
+
console.log(colors.bold('\n Pages by SEO Score:'));
|
|
311
|
+
const worstPages = uniquePages.slice(0, 5);
|
|
312
|
+
for (const page of worstPages) {
|
|
313
|
+
const score = page.seoReport?.score || 0;
|
|
314
|
+
const grade = page.seoReport?.grade || '?';
|
|
315
|
+
const path = new URL(page.url).pathname;
|
|
316
|
+
const scoreColor = score >= 80 ? colors.green : score >= 60 ? colors.yellow : colors.red;
|
|
317
|
+
console.log(` ${scoreColor(`${score.toString().padStart(3)}`)} ${colors.gray(`[${grade}]`)} ${path.slice(0, 50)}`);
|
|
318
|
+
}
|
|
319
|
+
if (uniquePages.length > 5) {
|
|
320
|
+
console.log(colors.gray(` ... and ${uniquePages.length - 5} more pages`));
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
if (outputFile) {
|
|
324
|
+
console.log(colors.green(`\n Report saved to: ${outputFile}`));
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
else {
|
|
328
|
+
const { Spider } = await import('../../scrape/spider.js');
|
|
329
|
+
const spider = new Spider({
|
|
330
|
+
maxDepth: data.depth,
|
|
331
|
+
maxPages: data.limit,
|
|
332
|
+
concurrency: data.concurrency,
|
|
333
|
+
sameDomain: true,
|
|
334
|
+
delay: 100,
|
|
335
|
+
respectRobotsTxt,
|
|
336
|
+
onProgress: formatJson ? undefined : (progress) => {
|
|
337
|
+
process.stdout.write(`\r${colors.gray(' Crawling:')} ${colors.cyan(progress.crawled.toString())} pages | ${colors.gray('Queue:')} ${progress.queued} | ${colors.gray('Depth:')} ${progress.depth} `);
|
|
338
|
+
},
|
|
339
|
+
});
|
|
340
|
+
const result = await spider.crawl(url);
|
|
341
|
+
if (formatJson) {
|
|
342
|
+
const jsonOutput = {
|
|
343
|
+
startUrl: result.startUrl,
|
|
344
|
+
crawledAt: new Date().toISOString(),
|
|
345
|
+
duration: result.duration,
|
|
346
|
+
config: {
|
|
347
|
+
maxDepth: data.depth,
|
|
348
|
+
maxPages: data.limit,
|
|
349
|
+
concurrency: data.concurrency,
|
|
350
|
+
},
|
|
351
|
+
summary: {
|
|
352
|
+
totalPages: result.pages.length,
|
|
353
|
+
successCount: result.pages.filter(p => !p.error).length,
|
|
354
|
+
errorCount: result.errors.length,
|
|
355
|
+
uniqueUrls: result.visited.size,
|
|
356
|
+
},
|
|
357
|
+
pages: result.pages.map(p => ({
|
|
358
|
+
url: p.url,
|
|
359
|
+
status: p.status,
|
|
360
|
+
title: p.title,
|
|
361
|
+
depth: p.depth,
|
|
362
|
+
linksCount: p.links.length,
|
|
363
|
+
duration: p.duration,
|
|
364
|
+
error: p.error,
|
|
365
|
+
meta: p.meta,
|
|
366
|
+
metrics: p.metrics,
|
|
367
|
+
social: p.social,
|
|
368
|
+
})), errors: result.pages.filter(p => p.error).map(p => ({
|
|
369
|
+
url: p.url,
|
|
370
|
+
status: p.status,
|
|
371
|
+
error: p.error
|
|
372
|
+
})),
|
|
373
|
+
};
|
|
374
|
+
console.log(JSON.stringify(jsonOutput, null, 2));
|
|
375
|
+
return;
|
|
376
|
+
}
|
|
377
|
+
process.stdout.write('\r' + ' '.repeat(80) + '\r');
|
|
378
|
+
console.log(colors.green(`\n✔ Spider complete`) + colors.gray(` (${(result.duration / 1000).toFixed(1)}s)`));
|
|
379
|
+
console.log(` ${colors.cyan('Pages crawled')}: ${result.pages.length}`);
|
|
380
|
+
console.log(` ${colors.cyan('Unique URLs')}: ${result.visited.size}`);
|
|
381
|
+
console.log(` ${colors.cyan('Errors')}: ${result.errors.length}`);
|
|
382
|
+
const byDepth = new Map();
|
|
383
|
+
for (const page of result.pages) {
|
|
384
|
+
byDepth.set(page.depth, (byDepth.get(page.depth) || 0) + 1);
|
|
385
|
+
}
|
|
386
|
+
console.log(colors.bold('\n Pages by depth:'));
|
|
387
|
+
for (const [depth, count] of Array.from(byDepth.entries()).sort((a, b) => a[0] - b[0])) {
|
|
388
|
+
const bar = '█'.repeat(Math.min(count, 40));
|
|
389
|
+
console.log(` ${colors.gray(`d${depth}:`)} ${bar} ${count}`);
|
|
390
|
+
}
|
|
391
|
+
const topPages = [...result.pages]
|
|
392
|
+
.filter(p => !p.error)
|
|
393
|
+
.sort((a, b) => b.links.length - a.links.length)
|
|
394
|
+
.slice(0, 10);
|
|
395
|
+
if (topPages.length > 0) {
|
|
396
|
+
console.log(colors.bold('\n Top pages by outgoing links:'));
|
|
397
|
+
for (const page of topPages) {
|
|
398
|
+
const title = page.title.slice(0, 40) || new URL(page.url).pathname;
|
|
399
|
+
console.log(` ${colors.cyan(page.links.length.toString().padStart(3))} ${title}`);
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
if (result.errors.length > 0) {
|
|
403
|
+
const errorSummary = summarizeErrors(result.errors);
|
|
404
|
+
console.log(formatErrorSummary(errorSummary));
|
|
405
|
+
}
|
|
406
|
+
if (outputFile) {
|
|
407
|
+
const jsonOutput = {
|
|
408
|
+
startUrl: result.startUrl,
|
|
409
|
+
crawledAt: new Date().toISOString(),
|
|
410
|
+
duration: result.duration,
|
|
411
|
+
summary: {
|
|
412
|
+
totalPages: result.pages.length,
|
|
413
|
+
successCount: result.pages.filter(p => !p.error).length,
|
|
414
|
+
errorCount: result.errors.length,
|
|
415
|
+
uniqueUrls: result.visited.size,
|
|
416
|
+
},
|
|
417
|
+
pages: result.pages.map(p => ({
|
|
418
|
+
url: p.url,
|
|
419
|
+
status: p.status,
|
|
420
|
+
title: p.title,
|
|
421
|
+
depth: p.depth,
|
|
422
|
+
linksCount: p.links.length,
|
|
423
|
+
duration: p.duration,
|
|
424
|
+
error: p.error,
|
|
425
|
+
meta: p.meta,
|
|
426
|
+
metrics: p.metrics,
|
|
427
|
+
social: p.social,
|
|
428
|
+
})), errors: result.pages.filter(p => p.error).map(p => ({
|
|
429
|
+
url: p.url,
|
|
430
|
+
status: p.status,
|
|
431
|
+
error: p.error
|
|
432
|
+
})),
|
|
433
|
+
};
|
|
434
|
+
await fs.writeFile(outputFile, JSON.stringify(jsonOutput, null, 2));
|
|
435
|
+
console.log(colors.green(`\n Report saved to: ${outputFile}`));
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
console.log('');
|
|
439
|
+
}
|
|
440
|
+
catch (error) {
|
|
441
|
+
console.error(colors.red(`\nSpider failed: ${error.message}`));
|
|
442
|
+
process.exit(1);
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
export function registerSpiderCommand(program) {
|
|
446
|
+
program
|
|
447
|
+
.command('spider')
|
|
448
|
+
.alias('crawl')
|
|
449
|
+
.description('Crawl a website and analyze all pages')
|
|
450
|
+
.argument('<url>', 'Starting URL to crawl')
|
|
451
|
+
.argument('[args...]', 'Options: depth=N limit=N concurrency=N seo focus=MODE...')
|
|
452
|
+
.addHelpText('after', generateHelp(schema))
|
|
453
|
+
.action(async (url, rawArgs) => {
|
|
454
|
+
await runSpider([url, ...rawArgs]);
|
|
455
|
+
});
|
|
456
|
+
}
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import colors from '../../utils/colors.js';
|
|
2
|
+
import { RekArgs, generateHelp } from '../parser/index.js';
|
|
3
|
+
import { promises as fs } from 'node:fs';
|
|
4
|
+
import pathMod from 'node:path';
|
|
5
|
+
import { installCurlImpersonate, hasImpersonate, getCurlPath } from '../../utils/binary-manager.js';
|
|
6
|
+
const uploadSchema = {
|
|
7
|
+
name: 'upload',
|
|
8
|
+
description: 'Upload a file to a URL',
|
|
9
|
+
params: {
|
|
10
|
+
field: { type: 'string', default: 'file', description: 'Form field name' },
|
|
11
|
+
},
|
|
12
|
+
flags: {
|
|
13
|
+
progress: { description: 'Show upload progress', default: true },
|
|
14
|
+
'no-progress': { description: 'Disable progress bar' }
|
|
15
|
+
},
|
|
16
|
+
examples: [
|
|
17
|
+
{ cmd: 'rek upload api.com/files ./image.png', desc: 'Simple upload' },
|
|
18
|
+
{ cmd: 'rek upload api.com/files data.json field=doc', desc: 'Custom field' }
|
|
19
|
+
]
|
|
20
|
+
};
|
|
21
|
+
const downloadSchema = {
|
|
22
|
+
name: 'download',
|
|
23
|
+
description: 'Download a file from a URL',
|
|
24
|
+
flags: {
|
|
25
|
+
resume: { description: 'Resume partial download', default: false },
|
|
26
|
+
progress: { description: 'Show progress', default: true },
|
|
27
|
+
'no-progress': { description: 'Disable progress' }
|
|
28
|
+
},
|
|
29
|
+
examples: [
|
|
30
|
+
{ cmd: 'rek download example.com/file.zip', desc: 'Download' },
|
|
31
|
+
{ cmd: 'rek download example.com/large.iso resume', desc: 'Resume download' }
|
|
32
|
+
]
|
|
33
|
+
};
|
|
34
|
+
const proxySchema = {
|
|
35
|
+
name: 'proxy',
|
|
36
|
+
description: 'Route requests through a proxy',
|
|
37
|
+
params: {
|
|
38
|
+
method: { type: 'string', default: 'GET', description: 'HTTP Method' },
|
|
39
|
+
},
|
|
40
|
+
examples: [
|
|
41
|
+
{ cmd: 'rek proxy http://127.0.0.1:8080 api.com/get', desc: 'Proxy GET' },
|
|
42
|
+
{ cmd: 'rek proxy socks5://127.0.0.1:9050 api.com/post method=POST', desc: 'SOCKS5 POST' }
|
|
43
|
+
]
|
|
44
|
+
};
|
|
45
|
+
export function registerUtilsCommands(program) {
|
|
46
|
+
program.command('upload')
|
|
47
|
+
.description(uploadSchema.description)
|
|
48
|
+
.argument('<url>', 'Target URL')
|
|
49
|
+
.argument('<file>', 'File path')
|
|
50
|
+
.argument('[args...]', 'Options')
|
|
51
|
+
.addHelpText('after', generateHelp(uploadSchema))
|
|
52
|
+
.action(async (url, file, rawArgs) => {
|
|
53
|
+
const { data, options, headers } = RekArgs.parse(rawArgs, uploadSchema);
|
|
54
|
+
const showProgress = options['no-progress'] ? false : (options.progress !== false);
|
|
55
|
+
if (!url.startsWith('http'))
|
|
56
|
+
url = `https://${url}`;
|
|
57
|
+
const { createClient } = await import('../../core/client.js');
|
|
58
|
+
try {
|
|
59
|
+
await fs.access(file);
|
|
60
|
+
const stats = await fs.stat(file);
|
|
61
|
+
console.log(colors.gray(`Uploading ${pathMod.basename(file)} (${(stats.size / 1024).toFixed(1)} KB)...`));
|
|
62
|
+
const client = createClient();
|
|
63
|
+
const fileContent = await fs.readFile(file);
|
|
64
|
+
const boundary = `----ReckerBoundary${Date.now()}`;
|
|
65
|
+
const filename = pathMod.basename(file);
|
|
66
|
+
const bodyParts = [
|
|
67
|
+
`--${boundary}`,
|
|
68
|
+
`Content-Disposition: form-data; name="${data.field}"; filename="${filename}"`,
|
|
69
|
+
'Content-Type: application/octet-stream',
|
|
70
|
+
'',
|
|
71
|
+
''
|
|
72
|
+
];
|
|
73
|
+
const header = Buffer.from(bodyParts.join('\r\n'));
|
|
74
|
+
const footer = Buffer.from(`\r\n--${boundary}--\r\n`);
|
|
75
|
+
const body = Buffer.concat([header, fileContent, footer]);
|
|
76
|
+
const response = await client.post(url, body, {
|
|
77
|
+
headers: {
|
|
78
|
+
...headers,
|
|
79
|
+
'Content-Type': `multipart/form-data; boundary=${boundary}`,
|
|
80
|
+
},
|
|
81
|
+
});
|
|
82
|
+
console.log(colors.green(`✔ Upload complete: ${response.status} ${response.statusText}`));
|
|
83
|
+
const text = await response.text();
|
|
84
|
+
if (text)
|
|
85
|
+
console.log(text);
|
|
86
|
+
}
|
|
87
|
+
catch (err) {
|
|
88
|
+
console.error(colors.red(`Upload Error: ${err.message}`));
|
|
89
|
+
process.exit(1);
|
|
90
|
+
}
|
|
91
|
+
});
|
|
92
|
+
program.command('download')
|
|
93
|
+
.description(downloadSchema.description)
|
|
94
|
+
.argument('<url>', 'Source URL')
|
|
95
|
+
.argument('[args...]', 'Output path and options')
|
|
96
|
+
.addHelpText('after', generateHelp(downloadSchema))
|
|
97
|
+
.action(async (url, rawArgs) => {
|
|
98
|
+
const { options, headers, args } = RekArgs.parse(rawArgs, downloadSchema);
|
|
99
|
+
const output = args[0];
|
|
100
|
+
const showProgress = options['no-progress'] ? false : (options.progress !== false);
|
|
101
|
+
const resume = !!options.resume;
|
|
102
|
+
if (!url.startsWith('http'))
|
|
103
|
+
url = `https://${url}`;
|
|
104
|
+
const { downloadToFile } = await import('../../utils/download.js');
|
|
105
|
+
const { createClient } = await import('../../core/client.js');
|
|
106
|
+
const urlPath = new URL(url).pathname;
|
|
107
|
+
const filename = output || pathMod.basename(urlPath) || 'download';
|
|
108
|
+
console.log(colors.gray(`Downloading to ${filename}...`));
|
|
109
|
+
try {
|
|
110
|
+
const client = createClient();
|
|
111
|
+
await downloadToFile(client, url, filename, {
|
|
112
|
+
resume,
|
|
113
|
+
headers,
|
|
114
|
+
onProgress: showProgress ? (p) => {
|
|
115
|
+
const total = p.total || 0;
|
|
116
|
+
const pct = total > 0 ? Math.round((p.loaded / total) * 100) : 0;
|
|
117
|
+
const mb = (p.loaded / 1024 / 1024).toFixed(1);
|
|
118
|
+
process.stdout.write(`\r ${pct}% (${mb} MB)`);
|
|
119
|
+
} : undefined
|
|
120
|
+
});
|
|
121
|
+
if (showProgress)
|
|
122
|
+
process.stdout.write('\n');
|
|
123
|
+
console.log(colors.green(`✔ Download complete`));
|
|
124
|
+
}
|
|
125
|
+
catch (err) {
|
|
126
|
+
console.error(colors.red(`Download Error: ${err.message}`));
|
|
127
|
+
process.exit(1);
|
|
128
|
+
}
|
|
129
|
+
});
|
|
130
|
+
program.command('proxy')
|
|
131
|
+
.description(proxySchema.description)
|
|
132
|
+
.argument('<proxy>', 'Proxy URL')
|
|
133
|
+
.argument('<target>', 'Target URL')
|
|
134
|
+
.argument('[args...]', 'Request options')
|
|
135
|
+
.addHelpText('after', generateHelp(proxySchema))
|
|
136
|
+
.action(async (proxy, target, rawArgs) => {
|
|
137
|
+
const { data, headers } = RekArgs.parse(rawArgs, proxySchema);
|
|
138
|
+
if (!target.startsWith('http'))
|
|
139
|
+
target = `https://${target}`;
|
|
140
|
+
console.log(colors.gray(`Proxy: ${proxy}`));
|
|
141
|
+
console.log(colors.gray(`Target: ${target}`));
|
|
142
|
+
const { createClient } = await import('../../core/client.js');
|
|
143
|
+
try {
|
|
144
|
+
const client = createClient({ proxy: { url: proxy } });
|
|
145
|
+
const method = data.method.toLowerCase();
|
|
146
|
+
const body = { ...data };
|
|
147
|
+
delete body.method;
|
|
148
|
+
const hasBody = Object.keys(body).length > 0;
|
|
149
|
+
const options = hasBody ? { json: body, headers } : { headers };
|
|
150
|
+
const response = await client[method](target, options);
|
|
151
|
+
console.log(colors.green(`✔ ${response.status} ${response.statusText}`));
|
|
152
|
+
console.log(await response.text());
|
|
153
|
+
}
|
|
154
|
+
catch (err) {
|
|
155
|
+
console.error(colors.red(`Proxy Error: ${err.message}`));
|
|
156
|
+
process.exit(1);
|
|
157
|
+
}
|
|
158
|
+
});
|
|
159
|
+
program.command('setup')
|
|
160
|
+
.description('Install external dependencies (curl-impersonate) for advanced features')
|
|
161
|
+
.action(async () => {
|
|
162
|
+
if (await hasImpersonate()) {
|
|
163
|
+
console.log(colors.green(`✔ curl-impersonate is already installed at:`));
|
|
164
|
+
console.log(colors.gray(getCurlPath()));
|
|
165
|
+
return;
|
|
166
|
+
}
|
|
167
|
+
try {
|
|
168
|
+
console.log(colors.cyan('Installing curl-impersonate...'));
|
|
169
|
+
await installCurlImpersonate(console);
|
|
170
|
+
}
|
|
171
|
+
catch (e) {
|
|
172
|
+
console.error(colors.red(`Installation failed: ${e.message}`));
|
|
173
|
+
process.exit(1);
|
|
174
|
+
}
|
|
175
|
+
});
|
|
176
|
+
}
|