webpeel 0.19.4 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/cache.d.ts +0 -1
- package/dist/cache.js +0 -1
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +476 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1015 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +839 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +273 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +524 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +686 -0
- package/dist/cli-auth.d.ts +0 -1
- package/dist/cli-auth.js +0 -1
- package/dist/cli.d.ts +7 -6
- package/dist/cli.js +35 -4698
- package/dist/core/actions.d.ts +0 -1
- package/dist/core/actions.js +0 -1
- package/dist/core/agent.d.ts +0 -1
- package/dist/core/agent.js +9 -12
- package/dist/core/answer.d.ts +0 -1
- package/dist/core/answer.js +0 -1
- package/dist/core/application-tracker.d.ts +0 -1
- package/dist/core/application-tracker.js +0 -1
- package/dist/core/apply.d.ts +0 -1
- package/dist/core/apply.js +0 -1
- package/dist/core/auto-extract.d.ts +0 -1
- package/dist/core/auto-extract.js +0 -1
- package/dist/core/auto-interact.d.ts +0 -1
- package/dist/core/auto-interact.js +0 -1
- package/dist/core/bm25-filter.d.ts +0 -1
- package/dist/core/bm25-filter.js +0 -1
- package/dist/core/branding.d.ts +0 -1
- package/dist/core/branding.js +0 -1
- package/dist/core/browser-fetch.d.ts +0 -1
- package/dist/core/browser-fetch.js +17 -10
- package/dist/core/browser-pool.d.ts +0 -1
- package/dist/core/browser-pool.js +0 -1
- package/dist/core/budget.d.ts +0 -1
- package/dist/core/budget.js +0 -1
- package/dist/core/cache.d.ts +0 -1
- package/dist/core/cache.js +0 -1
- package/dist/core/cf-worker-proxy.d.ts +0 -1
- package/dist/core/cf-worker-proxy.js +0 -1
- package/dist/core/challenge-detection.d.ts +0 -1
- package/dist/core/challenge-detection.js +0 -1
- package/dist/core/change-tracking.d.ts +0 -1
- package/dist/core/change-tracking.js +0 -1
- package/dist/core/chunker.d.ts +0 -1
- package/dist/core/chunker.js +0 -1
- package/dist/core/chunking.d.ts +0 -1
- package/dist/core/chunking.js +0 -1
- package/dist/core/cloak-fetch.d.ts +0 -1
- package/dist/core/cloak-fetch.js +0 -1
- package/dist/core/content-pruner.d.ts +0 -1
- package/dist/core/content-pruner.js +0 -1
- package/dist/core/crawl-checkpoint.d.ts +0 -1
- package/dist/core/crawl-checkpoint.js +0 -1
- package/dist/core/crawler.d.ts +0 -1
- package/dist/core/crawler.js +6 -5
- package/dist/core/cycle-fetch.d.ts +0 -1
- package/dist/core/cycle-fetch.js +0 -1
- package/dist/core/deep-fetch.d.ts +0 -1
- package/dist/core/deep-fetch.js +0 -1
- package/dist/core/design-analysis.d.ts +0 -1
- package/dist/core/design-analysis.js +0 -1
- package/dist/core/design-compare.d.ts +0 -1
- package/dist/core/design-compare.js +0 -1
- package/dist/core/diff.d.ts +0 -1
- package/dist/core/diff.js +0 -1
- package/dist/core/dns-cache.d.ts +0 -1
- package/dist/core/dns-cache.js +0 -1
- package/dist/core/documents.d.ts +0 -1
- package/dist/core/documents.js +0 -1
- package/dist/core/domain-extractors.d.ts +0 -1
- package/dist/core/domain-extractors.js +0 -1
- package/dist/core/extract-inline.d.ts +0 -1
- package/dist/core/extract-inline.js +0 -1
- package/dist/core/extract-listings.d.ts +0 -1
- package/dist/core/extract-listings.js +0 -1
- package/dist/core/extract.d.ts +0 -1
- package/dist/core/extract.js +0 -1
- package/dist/core/fetcher.d.ts +0 -1
- package/dist/core/fetcher.js +0 -1
- package/dist/core/google-cache.d.ts +0 -1
- package/dist/core/google-cache.js +0 -1
- package/dist/core/hotel-search.d.ts +0 -1
- package/dist/core/hotel-search.js +0 -1
- package/dist/core/http-fetch.d.ts +0 -1
- package/dist/core/http-fetch.js +5 -7
- package/dist/core/human.d.ts +0 -1
- package/dist/core/human.js +0 -1
- package/dist/core/jobs.d.ts +0 -1
- package/dist/core/jobs.js +0 -1
- package/dist/core/json-ld.d.ts +0 -1
- package/dist/core/json-ld.js +0 -1
- package/dist/core/llm-extract.d.ts +0 -1
- package/dist/core/llm-extract.js +0 -1
- package/dist/core/logger.d.ts +17 -0
- package/dist/core/logger.js +44 -0
- package/dist/core/map.d.ts +0 -1
- package/dist/core/map.js +0 -1
- package/dist/core/markdown.d.ts +0 -1
- package/dist/core/markdown.js +0 -1
- package/dist/core/metadata.d.ts +0 -1
- package/dist/core/metadata.js +0 -1
- package/dist/core/paginate.d.ts +0 -1
- package/dist/core/paginate.js +0 -1
- package/dist/core/pdf.d.ts +0 -1
- package/dist/core/pdf.js +0 -1
- package/dist/core/peel-tls.d.ts +0 -1
- package/dist/core/peel-tls.js +0 -1
- package/dist/core/pipeline.d.ts +0 -1
- package/dist/core/pipeline.js +22 -25
- package/dist/core/profiles.d.ts +0 -1
- package/dist/core/profiles.js +0 -1
- package/dist/core/quick-answer.d.ts +0 -1
- package/dist/core/quick-answer.js +0 -1
- package/dist/core/rate-governor.d.ts +0 -1
- package/dist/core/rate-governor.js +0 -1
- package/dist/core/readability.d.ts +0 -1
- package/dist/core/readability.js +0 -1
- package/dist/core/research.d.ts +0 -1
- package/dist/core/research.js +0 -1
- package/dist/core/schema-extraction.d.ts +0 -1
- package/dist/core/schema-extraction.js +0 -1
- package/dist/core/schema-postprocess.d.ts +0 -1
- package/dist/core/schema-postprocess.js +0 -1
- package/dist/core/schema-templates.d.ts +0 -1
- package/dist/core/schema-templates.js +0 -1
- package/dist/core/screenshot.d.ts +0 -1
- package/dist/core/screenshot.js +0 -1
- package/dist/core/search-fallback.d.ts +0 -1
- package/dist/core/search-fallback.js +0 -1
- package/dist/core/search-provider.d.ts +0 -1
- package/dist/core/search-provider.js +18 -21
- package/dist/core/site-search.d.ts +0 -1
- package/dist/core/site-search.js +0 -1
- package/dist/core/sitemap.d.ts +0 -1
- package/dist/core/sitemap.js +0 -1
- package/dist/core/stealth-patches.d.ts +0 -1
- package/dist/core/stealth-patches.js +0 -1
- package/dist/core/stemmer.d.ts +0 -1
- package/dist/core/stemmer.js +0 -1
- package/dist/core/strategies.d.ts +6 -1
- package/dist/core/strategies.js +29 -41
- package/dist/core/strategy-hooks.d.ts +0 -1
- package/dist/core/strategy-hooks.js +0 -1
- package/dist/core/summarize.d.ts +0 -1
- package/dist/core/summarize.js +0 -1
- package/dist/core/synonyms.d.ts +0 -1
- package/dist/core/synonyms.js +0 -1
- package/dist/core/table-format.d.ts +0 -1
- package/dist/core/table-format.js +0 -1
- package/dist/core/timing.d.ts +0 -1
- package/dist/core/timing.js +0 -1
- package/dist/core/user-agents.d.ts +0 -1
- package/dist/core/user-agents.js +0 -1
- package/dist/core/watch-manager.d.ts +0 -1
- package/dist/core/watch-manager.js +0 -1
- package/dist/core/watch.d.ts +0 -1
- package/dist/core/watch.js +0 -1
- package/dist/core/youtube.d.ts +0 -1
- package/dist/core/youtube.js +0 -1
- package/dist/index.d.ts +8 -3
- package/dist/index.js +27 -3
- package/dist/integrations/index.d.ts +0 -1
- package/dist/integrations/index.js +0 -1
- package/dist/integrations/langchain.d.ts +0 -1
- package/dist/integrations/langchain.js +0 -1
- package/dist/integrations/llamaindex.d.ts +0 -1
- package/dist/integrations/llamaindex.js +0 -1
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +266 -0
- package/dist/mcp/handlers/extract.d.ts +6 -0
- package/dist/mcp/handlers/extract.js +102 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +61 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +31 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +63 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +3 -4
- package/dist/mcp/server.js +35 -1101
- package/dist/mcp/smart-router.d.ts +0 -1
- package/dist/mcp/smart-router.js +3 -1
- package/dist/types.d.ts +6 -1
- package/dist/types.js +0 -1
- package/package.json +3 -13
- package/dist/cache.d.ts.map +0 -1
- package/dist/cache.js.map +0 -1
- package/dist/cli-auth.d.ts.map +0 -1
- package/dist/cli-auth.js.map +0 -1
- package/dist/cli.bundle.cjs +0 -159248
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/core/actions.d.ts.map +0 -1
- package/dist/core/actions.js.map +0 -1
- package/dist/core/agent.d.ts.map +0 -1
- package/dist/core/agent.js.map +0 -1
- package/dist/core/answer.d.ts.map +0 -1
- package/dist/core/answer.js.map +0 -1
- package/dist/core/application-tracker.d.ts.map +0 -1
- package/dist/core/application-tracker.js.map +0 -1
- package/dist/core/apply.d.ts.map +0 -1
- package/dist/core/apply.js.map +0 -1
- package/dist/core/auto-extract.d.ts.map +0 -1
- package/dist/core/auto-extract.js.map +0 -1
- package/dist/core/auto-interact.d.ts.map +0 -1
- package/dist/core/auto-interact.js.map +0 -1
- package/dist/core/bm25-filter.d.ts.map +0 -1
- package/dist/core/bm25-filter.js.map +0 -1
- package/dist/core/branding.d.ts.map +0 -1
- package/dist/core/branding.js.map +0 -1
- package/dist/core/browser-fetch.d.ts.map +0 -1
- package/dist/core/browser-fetch.js.map +0 -1
- package/dist/core/browser-pool.d.ts.map +0 -1
- package/dist/core/browser-pool.js.map +0 -1
- package/dist/core/budget.d.ts.map +0 -1
- package/dist/core/budget.js.map +0 -1
- package/dist/core/cache.d.ts.map +0 -1
- package/dist/core/cache.js.map +0 -1
- package/dist/core/cf-worker-proxy.d.ts.map +0 -1
- package/dist/core/cf-worker-proxy.js.map +0 -1
- package/dist/core/challenge-detection.d.ts.map +0 -1
- package/dist/core/challenge-detection.js.map +0 -1
- package/dist/core/change-tracking.d.ts.map +0 -1
- package/dist/core/change-tracking.js.map +0 -1
- package/dist/core/chunker.d.ts.map +0 -1
- package/dist/core/chunker.js.map +0 -1
- package/dist/core/chunking.d.ts.map +0 -1
- package/dist/core/chunking.js.map +0 -1
- package/dist/core/cloak-fetch.d.ts.map +0 -1
- package/dist/core/cloak-fetch.js.map +0 -1
- package/dist/core/content-pruner.d.ts.map +0 -1
- package/dist/core/content-pruner.js.map +0 -1
- package/dist/core/crawl-checkpoint.d.ts.map +0 -1
- package/dist/core/crawl-checkpoint.js.map +0 -1
- package/dist/core/crawler.d.ts.map +0 -1
- package/dist/core/crawler.js.map +0 -1
- package/dist/core/cycle-fetch.d.ts.map +0 -1
- package/dist/core/cycle-fetch.js.map +0 -1
- package/dist/core/deep-fetch.d.ts.map +0 -1
- package/dist/core/deep-fetch.js.map +0 -1
- package/dist/core/design-analysis.d.ts.map +0 -1
- package/dist/core/design-analysis.js.map +0 -1
- package/dist/core/design-compare.d.ts.map +0 -1
- package/dist/core/design-compare.js.map +0 -1
- package/dist/core/diff.d.ts.map +0 -1
- package/dist/core/diff.js.map +0 -1
- package/dist/core/dns-cache.d.ts.map +0 -1
- package/dist/core/dns-cache.js.map +0 -1
- package/dist/core/documents.d.ts.map +0 -1
- package/dist/core/documents.js.map +0 -1
- package/dist/core/domain-extractors.d.ts.map +0 -1
- package/dist/core/domain-extractors.js.map +0 -1
- package/dist/core/extract-inline.d.ts.map +0 -1
- package/dist/core/extract-inline.js.map +0 -1
- package/dist/core/extract-listings.d.ts.map +0 -1
- package/dist/core/extract-listings.js.map +0 -1
- package/dist/core/extract.d.ts.map +0 -1
- package/dist/core/extract.js.map +0 -1
- package/dist/core/fetcher.d.ts.map +0 -1
- package/dist/core/fetcher.js.map +0 -1
- package/dist/core/google-cache.d.ts.map +0 -1
- package/dist/core/google-cache.js.map +0 -1
- package/dist/core/hotel-search.d.ts.map +0 -1
- package/dist/core/hotel-search.js.map +0 -1
- package/dist/core/http-fetch.d.ts.map +0 -1
- package/dist/core/http-fetch.js.map +0 -1
- package/dist/core/human.d.ts.map +0 -1
- package/dist/core/human.js.map +0 -1
- package/dist/core/jobs.d.ts.map +0 -1
- package/dist/core/jobs.js.map +0 -1
- package/dist/core/json-ld.d.ts.map +0 -1
- package/dist/core/json-ld.js.map +0 -1
- package/dist/core/llm-extract.d.ts.map +0 -1
- package/dist/core/llm-extract.js.map +0 -1
- package/dist/core/map.d.ts.map +0 -1
- package/dist/core/map.js.map +0 -1
- package/dist/core/markdown.d.ts.map +0 -1
- package/dist/core/markdown.js.map +0 -1
- package/dist/core/metadata.d.ts.map +0 -1
- package/dist/core/metadata.js.map +0 -1
- package/dist/core/paginate.d.ts.map +0 -1
- package/dist/core/paginate.js.map +0 -1
- package/dist/core/pdf.d.ts.map +0 -1
- package/dist/core/pdf.js.map +0 -1
- package/dist/core/peel-tls.d.ts.map +0 -1
- package/dist/core/peel-tls.js.map +0 -1
- package/dist/core/pipeline.d.ts.map +0 -1
- package/dist/core/pipeline.js.map +0 -1
- package/dist/core/profiles.d.ts.map +0 -1
- package/dist/core/profiles.js.map +0 -1
- package/dist/core/quick-answer.d.ts.map +0 -1
- package/dist/core/quick-answer.js.map +0 -1
- package/dist/core/rate-governor.d.ts.map +0 -1
- package/dist/core/rate-governor.js.map +0 -1
- package/dist/core/readability.d.ts.map +0 -1
- package/dist/core/readability.js.map +0 -1
- package/dist/core/research.d.ts.map +0 -1
- package/dist/core/research.js.map +0 -1
- package/dist/core/schema-extraction.d.ts.map +0 -1
- package/dist/core/schema-extraction.js.map +0 -1
- package/dist/core/schema-postprocess.d.ts.map +0 -1
- package/dist/core/schema-postprocess.js.map +0 -1
- package/dist/core/schema-templates.d.ts.map +0 -1
- package/dist/core/schema-templates.js.map +0 -1
- package/dist/core/screenshot.d.ts.map +0 -1
- package/dist/core/screenshot.js.map +0 -1
- package/dist/core/search-fallback.d.ts.map +0 -1
- package/dist/core/search-fallback.js.map +0 -1
- package/dist/core/search-provider.d.ts.map +0 -1
- package/dist/core/search-provider.js.map +0 -1
- package/dist/core/site-search.d.ts.map +0 -1
- package/dist/core/site-search.js.map +0 -1
- package/dist/core/sitemap.d.ts.map +0 -1
- package/dist/core/sitemap.js.map +0 -1
- package/dist/core/stealth-patches.d.ts.map +0 -1
- package/dist/core/stealth-patches.js.map +0 -1
- package/dist/core/stemmer.d.ts.map +0 -1
- package/dist/core/stemmer.js.map +0 -1
- package/dist/core/strategies.d.ts.map +0 -1
- package/dist/core/strategies.js.map +0 -1
- package/dist/core/strategy-hooks.d.ts.map +0 -1
- package/dist/core/strategy-hooks.js.map +0 -1
- package/dist/core/summarize.d.ts.map +0 -1
- package/dist/core/summarize.js.map +0 -1
- package/dist/core/synonyms.d.ts.map +0 -1
- package/dist/core/synonyms.js.map +0 -1
- package/dist/core/table-format.d.ts.map +0 -1
- package/dist/core/table-format.js.map +0 -1
- package/dist/core/timing.d.ts.map +0 -1
- package/dist/core/timing.js.map +0 -1
- package/dist/core/user-agents.d.ts.map +0 -1
- package/dist/core/user-agents.js.map +0 -1
- package/dist/core/watch-manager.d.ts.map +0 -1
- package/dist/core/watch-manager.js.map +0 -1
- package/dist/core/watch.d.ts.map +0 -1
- package/dist/core/watch.js.map +0 -1
- package/dist/core/youtube.d.ts.map +0 -1
- package/dist/core/youtube.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/integrations/index.d.ts.map +0 -1
- package/dist/integrations/index.js.map +0 -1
- package/dist/integrations/langchain.d.ts.map +0 -1
- package/dist/integrations/langchain.js.map +0 -1
- package/dist/integrations/llamaindex.d.ts.map +0 -1
- package/dist/integrations/llamaindex.js.map +0 -1
- package/dist/mcp/server.d.ts.map +0 -1
- package/dist/mcp/server.js.map +0 -1
- package/dist/mcp/smart-router.d.ts.map +0 -1
- package/dist/mcp/smart-router.js.map +0 -1
- package/dist/server/app.d.ts +0 -15
- package/dist/server/app.d.ts.map +0 -1
- package/dist/server/app.js +0 -350
- package/dist/server/app.js.map +0 -1
- package/dist/server/auth-store.d.ts +0 -28
- package/dist/server/auth-store.d.ts.map +0 -1
- package/dist/server/auth-store.js +0 -89
- package/dist/server/auth-store.js.map +0 -1
- package/dist/server/email-service.d.ts +0 -22
- package/dist/server/email-service.d.ts.map +0 -1
- package/dist/server/email-service.js +0 -80
- package/dist/server/email-service.js.map +0 -1
- package/dist/server/job-queue.d.ts +0 -93
- package/dist/server/job-queue.d.ts.map +0 -1
- package/dist/server/job-queue.js +0 -146
- package/dist/server/job-queue.js.map +0 -1
- package/dist/server/logger.d.ts +0 -11
- package/dist/server/logger.d.ts.map +0 -1
- package/dist/server/logger.js +0 -38
- package/dist/server/logger.js.map +0 -1
- package/dist/server/middleware/auth.d.ts +0 -29
- package/dist/server/middleware/auth.d.ts.map +0 -1
- package/dist/server/middleware/auth.js +0 -222
- package/dist/server/middleware/auth.js.map +0 -1
- package/dist/server/middleware/rate-limit.d.ts +0 -25
- package/dist/server/middleware/rate-limit.d.ts.map +0 -1
- package/dist/server/middleware/rate-limit.js +0 -168
- package/dist/server/middleware/rate-limit.js.map +0 -1
- package/dist/server/middleware/url-validator.d.ts +0 -16
- package/dist/server/middleware/url-validator.d.ts.map +0 -1
- package/dist/server/middleware/url-validator.js +0 -187
- package/dist/server/middleware/url-validator.js.map +0 -1
- package/dist/server/openapi.yaml +0 -4944
- package/dist/server/pg-auth-store.d.ts +0 -133
- package/dist/server/pg-auth-store.d.ts.map +0 -1
- package/dist/server/pg-auth-store.js +0 -473
- package/dist/server/pg-auth-store.js.map +0 -1
- package/dist/server/pg-job-queue.d.ts +0 -60
- package/dist/server/pg-job-queue.d.ts.map +0 -1
- package/dist/server/pg-job-queue.js +0 -365
- package/dist/server/pg-job-queue.js.map +0 -1
- package/dist/server/premium/domain-intel.d.ts +0 -17
- package/dist/server/premium/domain-intel.d.ts.map +0 -1
- package/dist/server/premium/domain-intel.js +0 -134
- package/dist/server/premium/domain-intel.js.map +0 -1
- package/dist/server/premium/index.d.ts +0 -18
- package/dist/server/premium/index.d.ts.map +0 -1
- package/dist/server/premium/index.js +0 -36
- package/dist/server/premium/index.js.map +0 -1
- package/dist/server/premium/swr-cache.d.ts +0 -15
- package/dist/server/premium/swr-cache.d.ts.map +0 -1
- package/dist/server/premium/swr-cache.js +0 -35
- package/dist/server/premium/swr-cache.js.map +0 -1
- package/dist/server/routes/activity.d.ts +0 -7
- package/dist/server/routes/activity.d.ts.map +0 -1
- package/dist/server/routes/activity.js +0 -68
- package/dist/server/routes/activity.js.map +0 -1
- package/dist/server/routes/agent.d.ts +0 -16
- package/dist/server/routes/agent.d.ts.map +0 -1
- package/dist/server/routes/agent.js +0 -247
- package/dist/server/routes/agent.js.map +0 -1
- package/dist/server/routes/answer.d.ts +0 -6
- package/dist/server/routes/answer.d.ts.map +0 -1
- package/dist/server/routes/answer.js +0 -133
- package/dist/server/routes/answer.js.map +0 -1
- package/dist/server/routes/ask.d.ts +0 -23
- package/dist/server/routes/ask.d.ts.map +0 -1
- package/dist/server/routes/ask.js +0 -119
- package/dist/server/routes/ask.js.map +0 -1
- package/dist/server/routes/batch.d.ts +0 -7
- package/dist/server/routes/batch.d.ts.map +0 -1
- package/dist/server/routes/batch.js +0 -412
- package/dist/server/routes/batch.js.map +0 -1
- package/dist/server/routes/cli-usage.d.ts +0 -7
- package/dist/server/routes/cli-usage.d.ts.map +0 -1
- package/dist/server/routes/cli-usage.js +0 -121
- package/dist/server/routes/cli-usage.js.map +0 -1
- package/dist/server/routes/compat.d.ts +0 -24
- package/dist/server/routes/compat.d.ts.map +0 -1
- package/dist/server/routes/compat.js +0 -653
- package/dist/server/routes/compat.js.map +0 -1
- package/dist/server/routes/deep-fetch.d.ts +0 -9
- package/dist/server/routes/deep-fetch.d.ts.map +0 -1
- package/dist/server/routes/deep-fetch.js +0 -50
- package/dist/server/routes/deep-fetch.js.map +0 -1
- package/dist/server/routes/demo.d.ts +0 -25
- package/dist/server/routes/demo.d.ts.map +0 -1
- package/dist/server/routes/demo.js +0 -434
- package/dist/server/routes/demo.js.map +0 -1
- package/dist/server/routes/extract.d.ts +0 -9
- package/dist/server/routes/extract.d.ts.map +0 -1
- package/dist/server/routes/extract.js +0 -150
- package/dist/server/routes/extract.js.map +0 -1
- package/dist/server/routes/fetch.d.ts +0 -8
- package/dist/server/routes/fetch.d.ts.map +0 -1
- package/dist/server/routes/fetch.js +0 -988
- package/dist/server/routes/fetch.js.map +0 -1
- package/dist/server/routes/health.d.ts +0 -8
- package/dist/server/routes/health.d.ts.map +0 -1
- package/dist/server/routes/health.js +0 -20
- package/dist/server/routes/health.js.map +0 -1
- package/dist/server/routes/jobs.d.ts +0 -8
- package/dist/server/routes/jobs.d.ts.map +0 -1
- package/dist/server/routes/jobs.js +0 -487
- package/dist/server/routes/jobs.js.map +0 -1
- package/dist/server/routes/mcp.d.ts +0 -18
- package/dist/server/routes/mcp.d.ts.map +0 -1
- package/dist/server/routes/mcp.js +0 -1260
- package/dist/server/routes/mcp.js.map +0 -1
- package/dist/server/routes/oauth.d.ts +0 -10
- package/dist/server/routes/oauth.d.ts.map +0 -1
- package/dist/server/routes/oauth.js +0 -334
- package/dist/server/routes/oauth.js.map +0 -1
- package/dist/server/routes/quick-answer.d.ts +0 -9
- package/dist/server/routes/quick-answer.d.ts.map +0 -1
- package/dist/server/routes/quick-answer.js +0 -93
- package/dist/server/routes/quick-answer.js.map +0 -1
- package/dist/server/routes/screenshot.d.ts +0 -23
- package/dist/server/routes/screenshot.d.ts.map +0 -1
- package/dist/server/routes/screenshot.js +0 -819
- package/dist/server/routes/screenshot.js.map +0 -1
- package/dist/server/routes/search.d.ts +0 -7
- package/dist/server/routes/search.d.ts.map +0 -1
- package/dist/server/routes/search.js +0 -312
- package/dist/server/routes/search.js.map +0 -1
- package/dist/server/routes/session.d.ts +0 -16
- package/dist/server/routes/session.d.ts.map +0 -1
- package/dist/server/routes/session.js +0 -278
- package/dist/server/routes/session.js.map +0 -1
- package/dist/server/routes/stats.d.ts +0 -7
- package/dist/server/routes/stats.d.ts.map +0 -1
- package/dist/server/routes/stats.js +0 -65
- package/dist/server/routes/stats.js.map +0 -1
- package/dist/server/routes/stripe.d.ts +0 -16
- package/dist/server/routes/stripe.d.ts.map +0 -1
- package/dist/server/routes/stripe.js +0 -283
- package/dist/server/routes/stripe.js.map +0 -1
- package/dist/server/routes/users.d.ts +0 -9
- package/dist/server/routes/users.d.ts.map +0 -1
- package/dist/server/routes/users.js +0 -1211
- package/dist/server/routes/users.js.map +0 -1
- package/dist/server/routes/watch.d.ts +0 -16
- package/dist/server/routes/watch.d.ts.map +0 -1
- package/dist/server/routes/watch.js +0 -257
- package/dist/server/routes/watch.js.map +0 -1
- package/dist/server/routes/webhooks.d.ts +0 -16
- package/dist/server/routes/webhooks.d.ts.map +0 -1
- package/dist/server/routes/webhooks.js +0 -74
- package/dist/server/routes/webhooks.js.map +0 -1
- package/dist/server/routes/youtube.d.ts +0 -7
- package/dist/server/routes/youtube.d.ts.map +0 -1
- package/dist/server/routes/youtube.js +0 -93
- package/dist/server/routes/youtube.js.map +0 -1
- package/dist/server/sentry.d.ts +0 -14
- package/dist/server/sentry.d.ts.map +0 -1
- package/dist/server/sentry.js +0 -39
- package/dist/server/sentry.js.map +0 -1
- package/dist/server/types.d.ts +0 -16
- package/dist/server/types.d.ts.map +0 -1
- package/dist/server/types.js +0 -8
- package/dist/server/types.js.map +0 -1
- package/dist/server/utils/response.d.ts +0 -45
- package/dist/server/utils/response.d.ts.map +0 -1
- package/dist/server/utils/response.js +0 -70
- package/dist/server/utils/response.js.map +0 -1
- package/dist/server/utils/sse.d.ts +0 -23
- package/dist/server/utils/sse.d.ts.map +0 -1
- package/dist/server/utils/sse.js +0 -39
- package/dist/server/utils/sse.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
|
@@ -1,653 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Firecrawl API Compatibility Layer
|
|
3
|
-
*
|
|
4
|
-
* Drop-in replacement for Firecrawl's API - users can switch by ONLY changing the base URL.
|
|
5
|
-
* This is our killer acquisition feature.
|
|
6
|
-
*
|
|
7
|
-
* NOTE: Error responses in this file intentionally use Firecrawl's format:
|
|
8
|
-
* { success: false, error: "Human-readable message" }
|
|
9
|
-
* This is required for Firecrawl drop-in compatibility and differs from the
|
|
10
|
-
* standard WebPeel API error format: { error: "error_code", message: "description" }.
|
|
11
|
-
* Do NOT change this format — it would break Firecrawl-compatible integrations.
|
|
12
|
-
*
|
|
13
|
-
* Implements Firecrawl endpoints:
|
|
14
|
-
* - POST /v1/scrape
|
|
15
|
-
* - POST /v2/scrape (v2 with formats: ["screenshot"] support)
|
|
16
|
-
* - POST /v1/crawl
|
|
17
|
-
* - GET /v1/crawl/:id
|
|
18
|
-
* - POST /v1/search
|
|
19
|
-
* - POST /v1/map
|
|
20
|
-
*/
|
|
21
|
-
import { Router } from 'express';
|
|
22
|
-
import { peel } from '../../index.js';
|
|
23
|
-
import { crawl } from '../../core/crawler.js';
|
|
24
|
-
import { mapDomain } from '../../core/map.js';
|
|
25
|
-
import { takeScreenshot } from '../../core/screenshot.js';
|
|
26
|
-
import { normalizeActions } from '../../core/actions.js';
|
|
27
|
-
import { extractInlineJson } from '../../core/extract-inline.js';
|
|
28
|
-
import { validateUrlForSSRF, SSRFError } from '../middleware/url-validator.js';
|
|
29
|
-
const VALID_LLM_PROVIDERS = ['openai', 'anthropic', 'google'];
|
|
30
|
-
/**
|
|
31
|
-
* Map Firecrawl's action format to our PageAction format.
|
|
32
|
-
* Delegates to the shared normalizeActions helper so behaviour stays
|
|
33
|
-
* consistent across all API surfaces.
|
|
34
|
-
*/
|
|
35
|
-
function mapFirecrawlActions(actions) {
|
|
36
|
-
if (!actions || !Array.isArray(actions))
|
|
37
|
-
return undefined;
|
|
38
|
-
return normalizeActions(actions);
|
|
39
|
-
}
|
|
40
|
-
export function createCompatRouter(jobQueue) {
|
|
41
|
-
const router = Router();
|
|
42
|
-
/**
|
|
43
|
-
* POST /v1/scrape - Firecrawl's main scrape endpoint
|
|
44
|
-
*
|
|
45
|
-
* Maps to our peel() function
|
|
46
|
-
*/
|
|
47
|
-
router.post('/v1/scrape', async (req, res) => {
|
|
48
|
-
try {
|
|
49
|
-
const { url, formats = ['markdown'], onlyMainContent = true, // Firecrawl defaults to true
|
|
50
|
-
includeTags, excludeTags, waitFor, timeout, actions, headers, location,
|
|
51
|
-
// Inline extraction (BYOK)
|
|
52
|
-
extract: extractParam, llmProvider, llmApiKey, llmModel, stream, } = req.body;
|
|
53
|
-
// Validate URL
|
|
54
|
-
if (!url || typeof url !== 'string') {
|
|
55
|
-
res.status(400).json({
|
|
56
|
-
success: false,
|
|
57
|
-
error: 'Missing or invalid "url" parameter',
|
|
58
|
-
});
|
|
59
|
-
return;
|
|
60
|
-
}
|
|
61
|
-
// SECURITY: Validate URL to prevent SSRF attacks
|
|
62
|
-
try {
|
|
63
|
-
validateUrlForSSRF(url);
|
|
64
|
-
}
|
|
65
|
-
catch (error) {
|
|
66
|
-
if (error instanceof SSRFError) {
|
|
67
|
-
res.status(400).json({
|
|
68
|
-
success: false,
|
|
69
|
-
error: 'blocked_url',
|
|
70
|
-
message: 'Cannot fetch localhost, private networks, or non-HTTP URLs',
|
|
71
|
-
});
|
|
72
|
-
return;
|
|
73
|
-
}
|
|
74
|
-
throw error;
|
|
75
|
-
}
|
|
76
|
-
// Determine if we need to render based on Firecrawl params
|
|
77
|
-
const needsRender = waitFor !== undefined || actions !== undefined;
|
|
78
|
-
// Map Firecrawl parameters to our PeelOptions
|
|
79
|
-
// onlyMainContent=true (default) → raw=false (use smart extraction)
|
|
80
|
-
// onlyMainContent=false → raw=true (return everything)
|
|
81
|
-
const options = {
|
|
82
|
-
render: needsRender,
|
|
83
|
-
wait: waitFor,
|
|
84
|
-
timeout: timeout || 30000,
|
|
85
|
-
stream: stream === true,
|
|
86
|
-
includeTags: Array.isArray(includeTags) ? includeTags : undefined,
|
|
87
|
-
excludeTags: Array.isArray(excludeTags) ? excludeTags : undefined,
|
|
88
|
-
raw: onlyMainContent === false,
|
|
89
|
-
actions: mapFirecrawlActions(actions),
|
|
90
|
-
headers,
|
|
91
|
-
screenshot: formats.includes('screenshot'),
|
|
92
|
-
images: formats.includes('images'),
|
|
93
|
-
format: 'markdown', // Always use markdown as base
|
|
94
|
-
};
|
|
95
|
-
// If location is provided, map it
|
|
96
|
-
if (location) {
|
|
97
|
-
options.location = {
|
|
98
|
-
country: location.country,
|
|
99
|
-
languages: location.languages,
|
|
100
|
-
};
|
|
101
|
-
}
|
|
102
|
-
if (options.stream) {
|
|
103
|
-
res.setHeader('X-Stream', 'true');
|
|
104
|
-
if (typeof res.flushHeaders === 'function') {
|
|
105
|
-
res.flushHeaders();
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
// Execute peel
|
|
109
|
-
const result = await peel(url, options);
|
|
110
|
-
// Build Firecrawl-compatible response
|
|
111
|
-
const data = {
|
|
112
|
-
markdown: result.content,
|
|
113
|
-
metadata: {
|
|
114
|
-
...result.metadata,
|
|
115
|
-
title: result.title,
|
|
116
|
-
description: result.metadata?.description || '',
|
|
117
|
-
language: result.metadata?.language || 'en',
|
|
118
|
-
sourceURL: result.url,
|
|
119
|
-
statusCode: 200,
|
|
120
|
-
},
|
|
121
|
-
};
|
|
122
|
-
// Add optional formats
|
|
123
|
-
if (formats.includes('html')) {
|
|
124
|
-
// Re-fetch with HTML format if requested
|
|
125
|
-
const htmlResult = await peel(url, { ...options, format: 'html' });
|
|
126
|
-
data.html = htmlResult.content;
|
|
127
|
-
}
|
|
128
|
-
if (formats.includes('rawHtml')) {
|
|
129
|
-
const rawResult = await peel(url, { ...options, format: 'html', raw: true });
|
|
130
|
-
data.rawHtml = rawResult.content;
|
|
131
|
-
}
|
|
132
|
-
if (formats.includes('links')) {
|
|
133
|
-
data.links = result.links;
|
|
134
|
-
}
|
|
135
|
-
if (formats.includes('screenshot') && result.screenshot) {
|
|
136
|
-
data.screenshot = `data:image/png;base64,${result.screenshot}`;
|
|
137
|
-
}
|
|
138
|
-
if (formats.includes('images') && result.images) {
|
|
139
|
-
data.images = result.images;
|
|
140
|
-
}
|
|
141
|
-
// --- Inline JSON extraction via LLM (BYOK) ---
|
|
142
|
-
// Resolve extract from: (1) top-level extract param, (2) formats array object
|
|
143
|
-
let resolvedExtract;
|
|
144
|
-
if (extractParam && typeof extractParam === 'object' && (extractParam.schema || extractParam.prompt)) {
|
|
145
|
-
resolvedExtract = extractParam;
|
|
146
|
-
}
|
|
147
|
-
if (!resolvedExtract) {
|
|
148
|
-
const jsonFormatObj = formats.find((f) => typeof f === 'object' && f !== null && f.type === 'json' && (f.schema || f.prompt));
|
|
149
|
-
if (jsonFormatObj) {
|
|
150
|
-
resolvedExtract = { schema: jsonFormatObj.schema, prompt: jsonFormatObj.prompt };
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
if (resolvedExtract && llmApiKey && llmProvider && VALID_LLM_PROVIDERS.includes(llmProvider)) {
|
|
154
|
-
const extractResult = await extractInlineJson(result.content, {
|
|
155
|
-
schema: resolvedExtract.schema,
|
|
156
|
-
prompt: resolvedExtract.prompt,
|
|
157
|
-
llmProvider: llmProvider,
|
|
158
|
-
llmApiKey: llmApiKey.trim(),
|
|
159
|
-
llmModel,
|
|
160
|
-
});
|
|
161
|
-
data.json = extractResult.data;
|
|
162
|
-
data.extractTokensUsed = extractResult.tokensUsed;
|
|
163
|
-
}
|
|
164
|
-
else if (formats.includes('json')) {
|
|
165
|
-
// Fallback: return structured metadata as JSON (no LLM)
|
|
166
|
-
data.json = result.extracted || result.metadata;
|
|
167
|
-
}
|
|
168
|
-
if (formats.includes('branding')) {
|
|
169
|
-
data.branding = result.branding;
|
|
170
|
-
}
|
|
171
|
-
if (formats.includes('summary')) {
|
|
172
|
-
data.summary = result.summary;
|
|
173
|
-
}
|
|
174
|
-
res.json({
|
|
175
|
-
success: true,
|
|
176
|
-
data,
|
|
177
|
-
});
|
|
178
|
-
}
|
|
179
|
-
catch (error) {
|
|
180
|
-
console.error('Firecrawl /v1/scrape error:', error);
|
|
181
|
-
res.status(500).json({
|
|
182
|
-
success: false,
|
|
183
|
-
error: 'An unexpected error occurred. Please try again.',
|
|
184
|
-
});
|
|
185
|
-
}
|
|
186
|
-
});
|
|
187
|
-
/**
|
|
188
|
-
* POST /v1/crawl - Firecrawl's crawl endpoint (async)
|
|
189
|
-
*
|
|
190
|
-
* Maps to our crawl() function with job queue
|
|
191
|
-
*/
|
|
192
|
-
router.post('/v1/crawl', async (req, res) => {
|
|
193
|
-
try {
|
|
194
|
-
const { url, limit = 100, maxDepth = 3, includePaths = [], excludePaths = [], scrapeOptions = {}, webhook, } = req.body;
|
|
195
|
-
// Validate URL
|
|
196
|
-
if (!url || typeof url !== 'string') {
|
|
197
|
-
res.status(400).json({
|
|
198
|
-
success: false,
|
|
199
|
-
error: 'Missing or invalid "url" parameter',
|
|
200
|
-
});
|
|
201
|
-
return;
|
|
202
|
-
}
|
|
203
|
-
try {
|
|
204
|
-
new URL(url);
|
|
205
|
-
}
|
|
206
|
-
catch {
|
|
207
|
-
res.status(400).json({
|
|
208
|
-
success: false,
|
|
209
|
-
error: 'Invalid URL format',
|
|
210
|
-
});
|
|
211
|
-
return;
|
|
212
|
-
}
|
|
213
|
-
// SECURITY: Validate URL to prevent SSRF attacks
|
|
214
|
-
try {
|
|
215
|
-
validateUrlForSSRF(url);
|
|
216
|
-
}
|
|
217
|
-
catch (error) {
|
|
218
|
-
if (error instanceof SSRFError) {
|
|
219
|
-
res.status(400).json({
|
|
220
|
-
success: false,
|
|
221
|
-
error: 'blocked_url',
|
|
222
|
-
message: 'Cannot fetch localhost, private networks, or non-HTTP URLs',
|
|
223
|
-
});
|
|
224
|
-
return;
|
|
225
|
-
}
|
|
226
|
-
throw error;
|
|
227
|
-
}
|
|
228
|
-
// Create job (with owner for authorization)
|
|
229
|
-
const ownerId = req.auth?.keyInfo?.accountId;
|
|
230
|
-
const job = await jobQueue.createJob('crawl', webhook, ownerId);
|
|
231
|
-
// Start crawl in background
|
|
232
|
-
setImmediate(async () => {
|
|
233
|
-
try {
|
|
234
|
-
jobQueue.updateJob(job.id, { status: 'processing' });
|
|
235
|
-
// Build crawl options
|
|
236
|
-
const crawlOptions = {
|
|
237
|
-
maxPages: limit,
|
|
238
|
-
maxDepth,
|
|
239
|
-
tier: req.auth?.tier,
|
|
240
|
-
onProgress: (progress) => {
|
|
241
|
-
const total = progress.crawled + progress.queued;
|
|
242
|
-
jobQueue.updateJob(job.id, {
|
|
243
|
-
total,
|
|
244
|
-
completed: progress.crawled,
|
|
245
|
-
creditsUsed: progress.crawled,
|
|
246
|
-
});
|
|
247
|
-
},
|
|
248
|
-
// Map scrapeOptions to PeelOptions
|
|
249
|
-
...scrapeOptions,
|
|
250
|
-
};
|
|
251
|
-
// Add path filters if provided
|
|
252
|
-
if (includePaths.length > 0) {
|
|
253
|
-
crawlOptions.includePatterns = includePaths;
|
|
254
|
-
}
|
|
255
|
-
if (excludePaths.length > 0) {
|
|
256
|
-
crawlOptions.excludePatterns = excludePaths;
|
|
257
|
-
}
|
|
258
|
-
// Run crawl
|
|
259
|
-
const results = await crawl(url, crawlOptions);
|
|
260
|
-
// Map results to Firecrawl format
|
|
261
|
-
const firecrawlResults = results.map(r => ({
|
|
262
|
-
url: r.url,
|
|
263
|
-
markdown: r.markdown,
|
|
264
|
-
metadata: {
|
|
265
|
-
title: r.title,
|
|
266
|
-
description: '',
|
|
267
|
-
sourceURL: r.url,
|
|
268
|
-
statusCode: 200,
|
|
269
|
-
},
|
|
270
|
-
links: r.links,
|
|
271
|
-
}));
|
|
272
|
-
// Update job with results
|
|
273
|
-
jobQueue.updateJob(job.id, {
|
|
274
|
-
status: 'completed',
|
|
275
|
-
data: firecrawlResults,
|
|
276
|
-
total: results.length,
|
|
277
|
-
completed: results.length,
|
|
278
|
-
creditsUsed: results.length,
|
|
279
|
-
});
|
|
280
|
-
}
|
|
281
|
-
catch (error) {
|
|
282
|
-
jobQueue.updateJob(job.id, {
|
|
283
|
-
status: 'failed',
|
|
284
|
-
error: error.message || 'Unknown error',
|
|
285
|
-
});
|
|
286
|
-
}
|
|
287
|
-
});
|
|
288
|
-
// Return job ID immediately (Firecrawl format)
|
|
289
|
-
res.json({
|
|
290
|
-
success: true,
|
|
291
|
-
id: job.id,
|
|
292
|
-
});
|
|
293
|
-
}
|
|
294
|
-
catch (error) {
|
|
295
|
-
console.error('Firecrawl /v1/crawl error:', error);
|
|
296
|
-
res.status(500).json({
|
|
297
|
-
success: false,
|
|
298
|
-
error: 'An unexpected error occurred. Please try again.',
|
|
299
|
-
});
|
|
300
|
-
}
|
|
301
|
-
});
|
|
302
|
-
/**
|
|
303
|
-
* GET /v1/crawl/:id - Get crawl job status (Firecrawl format)
|
|
304
|
-
*/
|
|
305
|
-
router.get('/v1/crawl/:id', async (req, res) => {
|
|
306
|
-
try {
|
|
307
|
-
const id = req.params.id;
|
|
308
|
-
const job = await jobQueue.getJob(id);
|
|
309
|
-
if (!job) {
|
|
310
|
-
res.status(404).json({
|
|
311
|
-
success: false,
|
|
312
|
-
error: 'Job not found',
|
|
313
|
-
});
|
|
314
|
-
return;
|
|
315
|
-
}
|
|
316
|
-
// SECURITY: Verify the requester owns this job
|
|
317
|
-
const requestOwnerId = req.auth?.keyInfo?.accountId;
|
|
318
|
-
if (job.ownerId && requestOwnerId && job.ownerId !== requestOwnerId) {
|
|
319
|
-
res.status(404).json({
|
|
320
|
-
success: false,
|
|
321
|
-
error: 'Job not found',
|
|
322
|
-
});
|
|
323
|
-
return;
|
|
324
|
-
}
|
|
325
|
-
// Map our job status to Firecrawl's status format
|
|
326
|
-
const firecrawlStatus = job.status === 'processing' ? 'scraping' : job.status;
|
|
327
|
-
res.json({
|
|
328
|
-
success: true,
|
|
329
|
-
status: firecrawlStatus,
|
|
330
|
-
completed: job.completed || 0,
|
|
331
|
-
total: job.total || 0,
|
|
332
|
-
creditsUsed: job.creditsUsed || 0,
|
|
333
|
-
expiresAt: job.expiresAt,
|
|
334
|
-
data: job.data || [],
|
|
335
|
-
});
|
|
336
|
-
}
|
|
337
|
-
catch (error) {
|
|
338
|
-
console.error('Firecrawl GET /v1/crawl/:id error:', error);
|
|
339
|
-
res.status(500).json({
|
|
340
|
-
success: false,
|
|
341
|
-
error: 'An unexpected error occurred. Please try again.',
|
|
342
|
-
});
|
|
343
|
-
}
|
|
344
|
-
});
|
|
345
|
-
/**
|
|
346
|
-
* POST /v1/search - Firecrawl's search endpoint
|
|
347
|
-
*
|
|
348
|
-
* Uses DuckDuckGo search with optional scraping
|
|
349
|
-
*/
|
|
350
|
-
router.post('/v1/search', async (req, res) => {
|
|
351
|
-
try {
|
|
352
|
-
const { query, limit = 5, scrapeOptions = {}, } = req.body;
|
|
353
|
-
// Validate query
|
|
354
|
-
if (!query || typeof query !== 'string') {
|
|
355
|
-
res.status(400).json({
|
|
356
|
-
success: false,
|
|
357
|
-
error: 'Missing or invalid "query" parameter',
|
|
358
|
-
});
|
|
359
|
-
return;
|
|
360
|
-
}
|
|
361
|
-
// Use our search route logic (DuckDuckGo HTML scraping)
|
|
362
|
-
const { fetch: undiciFetch } = await import('undici');
|
|
363
|
-
const { load } = await import('cheerio');
|
|
364
|
-
const searchUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
|
|
365
|
-
const response = await undiciFetch(searchUrl, {
|
|
366
|
-
headers: {
|
|
367
|
-
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
|
|
368
|
-
},
|
|
369
|
-
});
|
|
370
|
-
if (!response.ok) {
|
|
371
|
-
throw new Error(`Search failed: HTTP ${response.status}`);
|
|
372
|
-
}
|
|
373
|
-
const html = await response.text();
|
|
374
|
-
const $ = load(html);
|
|
375
|
-
const results = [];
|
|
376
|
-
$('.result').each((_i, elem) => {
|
|
377
|
-
if (results.length >= limit)
|
|
378
|
-
return;
|
|
379
|
-
const $result = $(elem);
|
|
380
|
-
let title = $result.find('.result__title').text().trim();
|
|
381
|
-
const rawUrl = $result.find('.result__a').attr('href') || '';
|
|
382
|
-
let snippet = $result.find('.result__snippet').text().trim();
|
|
383
|
-
if (!title || !rawUrl)
|
|
384
|
-
return;
|
|
385
|
-
// Extract actual URL from DuckDuckGo redirect
|
|
386
|
-
let url = rawUrl;
|
|
387
|
-
try {
|
|
388
|
-
const ddgUrl = new URL(rawUrl, 'https://duckduckgo.com');
|
|
389
|
-
const uddg = ddgUrl.searchParams.get('uddg');
|
|
390
|
-
if (uddg) {
|
|
391
|
-
url = decodeURIComponent(uddg);
|
|
392
|
-
}
|
|
393
|
-
}
|
|
394
|
-
catch (e) {
|
|
395
|
-
if (process.env.DEBUG)
|
|
396
|
-
console.debug('[webpeel]', 'ddg url parse failed:', e instanceof Error ? e.message : e);
|
|
397
|
-
}
|
|
398
|
-
// Validate URL
|
|
399
|
-
try {
|
|
400
|
-
const parsed = new URL(url);
|
|
401
|
-
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
|
402
|
-
return;
|
|
403
|
-
}
|
|
404
|
-
url = parsed.href;
|
|
405
|
-
}
|
|
406
|
-
catch {
|
|
407
|
-
return;
|
|
408
|
-
}
|
|
409
|
-
results.push({ title, url, snippet });
|
|
410
|
-
});
|
|
411
|
-
// If scraping is requested, fetch each result
|
|
412
|
-
const firecrawlResults = await Promise.all(results.map(async (result) => {
|
|
413
|
-
try {
|
|
414
|
-
// Scrape the URL with provided options
|
|
415
|
-
const peelResult = await peel(result.url, {
|
|
416
|
-
format: 'markdown',
|
|
417
|
-
timeout: 10000,
|
|
418
|
-
...scrapeOptions,
|
|
419
|
-
});
|
|
420
|
-
return {
|
|
421
|
-
url: result.url,
|
|
422
|
-
markdown: peelResult.content,
|
|
423
|
-
metadata: {
|
|
424
|
-
title: peelResult.title || result.title,
|
|
425
|
-
description: result.snippet,
|
|
426
|
-
sourceURL: result.url,
|
|
427
|
-
statusCode: 200,
|
|
428
|
-
...peelResult.metadata,
|
|
429
|
-
},
|
|
430
|
-
};
|
|
431
|
-
}
|
|
432
|
-
catch (error) {
|
|
433
|
-
// Return basic result if scraping fails
|
|
434
|
-
return {
|
|
435
|
-
url: result.url,
|
|
436
|
-
markdown: '',
|
|
437
|
-
metadata: {
|
|
438
|
-
title: result.title,
|
|
439
|
-
description: result.snippet,
|
|
440
|
-
sourceURL: result.url,
|
|
441
|
-
error: error.message,
|
|
442
|
-
},
|
|
443
|
-
};
|
|
444
|
-
}
|
|
445
|
-
}));
|
|
446
|
-
res.json({
|
|
447
|
-
success: true,
|
|
448
|
-
data: firecrawlResults,
|
|
449
|
-
});
|
|
450
|
-
}
|
|
451
|
-
catch (error) {
|
|
452
|
-
console.error('Firecrawl /v1/search error:', error);
|
|
453
|
-
res.status(500).json({
|
|
454
|
-
success: false,
|
|
455
|
-
error: 'An unexpected error occurred. Please try again.',
|
|
456
|
-
});
|
|
457
|
-
}
|
|
458
|
-
});
|
|
459
|
-
/**
|
|
460
|
-
* POST /v1/map - Firecrawl's map endpoint
|
|
461
|
-
*
|
|
462
|
-
* Maps to our mapDomain() function
|
|
463
|
-
*/
|
|
464
|
-
router.post('/v1/map', async (req, res) => {
|
|
465
|
-
try {
|
|
466
|
-
const { url, limit = 5000, search, } = req.body;
|
|
467
|
-
// Validate URL
|
|
468
|
-
if (!url || typeof url !== 'string') {
|
|
469
|
-
res.status(400).json({
|
|
470
|
-
success: false,
|
|
471
|
-
error: 'Missing or invalid "url" parameter',
|
|
472
|
-
});
|
|
473
|
-
return;
|
|
474
|
-
}
|
|
475
|
-
try {
|
|
476
|
-
new URL(url);
|
|
477
|
-
}
|
|
478
|
-
catch {
|
|
479
|
-
res.status(400).json({
|
|
480
|
-
success: false,
|
|
481
|
-
error: 'Invalid URL format',
|
|
482
|
-
});
|
|
483
|
-
return;
|
|
484
|
-
}
|
|
485
|
-
// SECURITY: Validate URL to prevent SSRF attacks
|
|
486
|
-
try {
|
|
487
|
-
validateUrlForSSRF(url);
|
|
488
|
-
}
|
|
489
|
-
catch (error) {
|
|
490
|
-
if (error instanceof SSRFError) {
|
|
491
|
-
res.status(400).json({
|
|
492
|
-
success: false,
|
|
493
|
-
error: 'blocked_url',
|
|
494
|
-
message: 'Cannot fetch localhost, private networks, or non-HTTP URLs',
|
|
495
|
-
});
|
|
496
|
-
return;
|
|
497
|
-
}
|
|
498
|
-
throw error;
|
|
499
|
-
}
|
|
500
|
-
// Run mapDomain
|
|
501
|
-
const result = await mapDomain(url, {
|
|
502
|
-
maxUrls: limit,
|
|
503
|
-
search,
|
|
504
|
-
});
|
|
505
|
-
res.json({
|
|
506
|
-
success: true,
|
|
507
|
-
links: result.urls,
|
|
508
|
-
});
|
|
509
|
-
}
|
|
510
|
-
catch (error) {
|
|
511
|
-
console.error('Firecrawl /v1/map error:', error);
|
|
512
|
-
res.status(500).json({
|
|
513
|
-
success: false,
|
|
514
|
-
error: 'An unexpected error occurred. Please try again.',
|
|
515
|
-
});
|
|
516
|
-
}
|
|
517
|
-
});
|
|
518
|
-
/**
|
|
519
|
-
* POST /v2/scrape - Firecrawl v2-compatible scrape with screenshot support
|
|
520
|
-
*
|
|
521
|
-
* Same as /v1/scrape but adds first-class screenshot support.
|
|
522
|
-
* When formats includes "screenshot" (and nothing else), returns
|
|
523
|
-
* a screenshot directly; otherwise falls through to peel() like v1.
|
|
524
|
-
*/
|
|
525
|
-
router.post('/v2/scrape', async (req, res) => {
|
|
526
|
-
try {
|
|
527
|
-
const { url, formats = ['markdown'], onlyMainContent = true, includeTags, excludeTags, waitFor, timeout, actions, headers, location,
|
|
528
|
-
// Screenshot-specific v2 options
|
|
529
|
-
fullPage, width, height, screenshotFormat, quality, stream, } = req.body;
|
|
530
|
-
// Validate URL
|
|
531
|
-
if (!url || typeof url !== 'string') {
|
|
532
|
-
res.status(400).json({
|
|
533
|
-
success: false,
|
|
534
|
-
error: 'Missing or invalid "url" parameter',
|
|
535
|
-
});
|
|
536
|
-
return;
|
|
537
|
-
}
|
|
538
|
-
// SECURITY: Validate URL to prevent SSRF attacks
|
|
539
|
-
try {
|
|
540
|
-
validateUrlForSSRF(url);
|
|
541
|
-
}
|
|
542
|
-
catch (error) {
|
|
543
|
-
if (error instanceof SSRFError) {
|
|
544
|
-
res.status(400).json({
|
|
545
|
-
success: false,
|
|
546
|
-
error: 'blocked_url',
|
|
547
|
-
message: 'Cannot fetch localhost, private networks, or non-HTTP URLs',
|
|
548
|
-
});
|
|
549
|
-
return;
|
|
550
|
-
}
|
|
551
|
-
throw error;
|
|
552
|
-
}
|
|
553
|
-
const wantsScreenshot = formats.includes('screenshot') || formats.includes('screenshot@fullPage');
|
|
554
|
-
// If screenshot-only request, use the dedicated screenshot function
|
|
555
|
-
if (wantsScreenshot && formats.length === 1) {
|
|
556
|
-
const result = await takeScreenshot(url, {
|
|
557
|
-
fullPage: fullPage === true || formats[0] === 'screenshot@fullPage',
|
|
558
|
-
width: typeof width === 'number' ? width : undefined,
|
|
559
|
-
height: typeof height === 'number' ? height : undefined,
|
|
560
|
-
format: screenshotFormat || 'png',
|
|
561
|
-
quality: typeof quality === 'number' ? quality : undefined,
|
|
562
|
-
waitFor: typeof waitFor === 'number' ? waitFor : undefined,
|
|
563
|
-
timeout: typeof timeout === 'number' ? timeout : 30000,
|
|
564
|
-
actions: mapFirecrawlActions(actions),
|
|
565
|
-
headers,
|
|
566
|
-
});
|
|
567
|
-
res.json({
|
|
568
|
-
success: true,
|
|
569
|
-
data: {
|
|
570
|
-
screenshot: `data:${result.contentType};base64,${result.screenshot}`,
|
|
571
|
-
metadata: {
|
|
572
|
-
sourceURL: result.url,
|
|
573
|
-
statusCode: 200,
|
|
574
|
-
format: result.format,
|
|
575
|
-
},
|
|
576
|
-
},
|
|
577
|
-
});
|
|
578
|
-
return;
|
|
579
|
-
}
|
|
580
|
-
// Otherwise, fall through to peel() like v1/scrape
|
|
581
|
-
const needsRender = waitFor !== undefined || actions !== undefined || wantsScreenshot;
|
|
582
|
-
const options = {
|
|
583
|
-
render: needsRender,
|
|
584
|
-
wait: waitFor,
|
|
585
|
-
timeout: timeout || 30000,
|
|
586
|
-
stream: stream === true,
|
|
587
|
-
includeTags: Array.isArray(includeTags) ? includeTags : undefined,
|
|
588
|
-
excludeTags: Array.isArray(excludeTags) ? excludeTags : undefined,
|
|
589
|
-
raw: onlyMainContent === false,
|
|
590
|
-
actions: mapFirecrawlActions(actions),
|
|
591
|
-
headers,
|
|
592
|
-
screenshot: wantsScreenshot,
|
|
593
|
-
screenshotFullPage: fullPage === true,
|
|
594
|
-
images: formats.includes('images'),
|
|
595
|
-
format: 'markdown',
|
|
596
|
-
};
|
|
597
|
-
if (location) {
|
|
598
|
-
options.location = {
|
|
599
|
-
country: location.country,
|
|
600
|
-
languages: location.languages,
|
|
601
|
-
};
|
|
602
|
-
}
|
|
603
|
-
if (options.stream) {
|
|
604
|
-
res.setHeader('X-Stream', 'true');
|
|
605
|
-
if (typeof res.flushHeaders === 'function') {
|
|
606
|
-
res.flushHeaders();
|
|
607
|
-
}
|
|
608
|
-
}
|
|
609
|
-
const result = await peel(url, options);
|
|
610
|
-
const data = {
|
|
611
|
-
markdown: result.content,
|
|
612
|
-
metadata: {
|
|
613
|
-
title: result.title,
|
|
614
|
-
description: result.metadata.description || '',
|
|
615
|
-
language: 'en',
|
|
616
|
-
sourceURL: result.url,
|
|
617
|
-
statusCode: 200,
|
|
618
|
-
...result.metadata,
|
|
619
|
-
},
|
|
620
|
-
};
|
|
621
|
-
if (formats.includes('html')) {
|
|
622
|
-
const htmlResult = await peel(url, { ...options, format: 'html' });
|
|
623
|
-
data.html = htmlResult.content;
|
|
624
|
-
}
|
|
625
|
-
if (formats.includes('rawHtml')) {
|
|
626
|
-
const rawResult = await peel(url, { ...options, format: 'html', raw: true });
|
|
627
|
-
data.rawHtml = rawResult.content;
|
|
628
|
-
}
|
|
629
|
-
if (formats.includes('links')) {
|
|
630
|
-
data.links = result.links;
|
|
631
|
-
}
|
|
632
|
-
if (wantsScreenshot && result.screenshot) {
|
|
633
|
-
data.screenshot = `data:image/png;base64,${result.screenshot}`;
|
|
634
|
-
}
|
|
635
|
-
if (formats.includes('images') && result.images) {
|
|
636
|
-
data.images = result.images;
|
|
637
|
-
}
|
|
638
|
-
res.json({
|
|
639
|
-
success: true,
|
|
640
|
-
data,
|
|
641
|
-
});
|
|
642
|
-
}
|
|
643
|
-
catch (error) {
|
|
644
|
-
console.error('Firecrawl /v2/scrape error:', error);
|
|
645
|
-
res.status(500).json({
|
|
646
|
-
success: false,
|
|
647
|
-
error: 'An unexpected error occurred. Please try again.',
|
|
648
|
-
});
|
|
649
|
-
}
|
|
650
|
-
});
|
|
651
|
-
return router;
|
|
652
|
-
}
|
|
653
|
-
//# sourceMappingURL=compat.js.map
|