@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* POST /v1/extract — Structured JSON Schema extraction endpoint.
|
|
3
|
+
*
|
|
4
|
+
* Firecrawl-compatible: pass a URL + JSON schema, get structured data back.
|
|
5
|
+
*
|
|
6
|
+
* Auth: API key required (full or read scope)
|
|
7
|
+
* Body: { url, schema, prompt?, llm?, render? }
|
|
8
|
+
*
|
|
9
|
+
* Also exposes:
|
|
10
|
+
* GET /v1/extract/auto — Auto-extract known structured types from a URL
|
|
11
|
+
* POST /v1/extract/auto — Same but via POST body
|
|
12
|
+
*/
|
|
13
|
+
import { Router } from 'express';
|
|
14
|
+
import crypto from 'crypto';
|
|
15
|
+
import { peel } from '../../index.js';
|
|
16
|
+
import { extractStructured, } from '../../core/structured-extract.js';
|
|
17
|
+
import { getDefaultLLMConfig, isFreeTierLimitError, } from '../../core/llm-provider.js';
|
|
18
|
+
const VALID_PROVIDERS = [
|
|
19
|
+
'cloudflare',
|
|
20
|
+
'openai',
|
|
21
|
+
'anthropic',
|
|
22
|
+
'google',
|
|
23
|
+
'ollama',
|
|
24
|
+
'cerebras',
|
|
25
|
+
];
|
|
26
|
+
function reqId(req) {
|
|
27
|
+
return req.requestId || crypto.randomUUID();
|
|
28
|
+
}
|
|
29
|
+
export function createExtractRouter() {
|
|
30
|
+
const router = Router();
|
|
31
|
+
// ── POST /v1/extract ─────────────────────────────────────────────────────
|
|
32
|
+
router.post('/v1/extract', async (req, res) => {
|
|
33
|
+
try {
|
|
34
|
+
const { url, schema: schemaRaw, prompt, llm: llmRaw, render,
|
|
35
|
+
// Legacy fields for backward compat
|
|
36
|
+
llmApiKey, llmProvider, model: legacyModel, } = req.body;
|
|
37
|
+
// ── Validate URL ────────────────────────────────────────────────────
|
|
38
|
+
if (!url || typeof url !== 'string') {
|
|
39
|
+
res.status(400).json({
|
|
40
|
+
success: false,
|
|
41
|
+
error: {
|
|
42
|
+
type: 'invalid_request',
|
|
43
|
+
message: 'Missing or invalid "url" field in request body.',
|
|
44
|
+
hint: 'Pass a URL: { "url": "https://example.com", "schema": { ... } }',
|
|
45
|
+
docs: 'https://webpeel.dev/docs/errors#invalid-request',
|
|
46
|
+
},
|
|
47
|
+
requestId: reqId(req),
|
|
48
|
+
});
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
if (url.length > 2048) {
|
|
52
|
+
res.status(400).json({
|
|
53
|
+
success: false,
|
|
54
|
+
error: {
|
|
55
|
+
type: 'invalid_url',
|
|
56
|
+
message: 'URL too long (max 2048 characters)',
|
|
57
|
+
docs: 'https://webpeel.dev/docs/errors#invalid-url',
|
|
58
|
+
},
|
|
59
|
+
requestId: reqId(req),
|
|
60
|
+
});
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
try {
|
|
64
|
+
const parsed = new URL(url);
|
|
65
|
+
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
|
66
|
+
res.status(400).json({
|
|
67
|
+
success: false,
|
|
68
|
+
error: {
|
|
69
|
+
type: 'invalid_url',
|
|
70
|
+
message: 'Only HTTP and HTTPS URLs are supported',
|
|
71
|
+
docs: 'https://webpeel.dev/docs/errors#invalid-url',
|
|
72
|
+
},
|
|
73
|
+
requestId: reqId(req),
|
|
74
|
+
});
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
catch {
|
|
79
|
+
res.status(400).json({
|
|
80
|
+
success: false,
|
|
81
|
+
error: {
|
|
82
|
+
type: 'invalid_url',
|
|
83
|
+
message: `Invalid URL format: ${url}`,
|
|
84
|
+
hint: 'Ensure the URL is well-formed: https://example.com',
|
|
85
|
+
docs: 'https://webpeel.dev/docs/errors#invalid-url',
|
|
86
|
+
},
|
|
87
|
+
requestId: reqId(req),
|
|
88
|
+
});
|
|
89
|
+
return;
|
|
90
|
+
}
|
|
91
|
+
// ── Validate schema ─────────────────────────────────────────────────
|
|
92
|
+
if (!schemaRaw && !prompt) {
|
|
93
|
+
res.status(400).json({
|
|
94
|
+
success: false,
|
|
95
|
+
error: {
|
|
96
|
+
type: 'invalid_request',
|
|
97
|
+
message: 'Either "schema" or "prompt" is required for structured extraction.',
|
|
98
|
+
hint: 'Include a JSON schema in the request body: { "schema": { "type": "object", "properties": { ... } } }',
|
|
99
|
+
docs: 'https://webpeel.dev/docs/errors#invalid-request',
|
|
100
|
+
},
|
|
101
|
+
requestId: reqId(req),
|
|
102
|
+
});
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
// Build or validate schema
|
|
106
|
+
let schema;
|
|
107
|
+
if (schemaRaw) {
|
|
108
|
+
if (typeof schemaRaw !== 'object' || schemaRaw === null || Array.isArray(schemaRaw)) {
|
|
109
|
+
res.status(400).json({
|
|
110
|
+
success: false,
|
|
111
|
+
error: {
|
|
112
|
+
type: 'invalid_request',
|
|
113
|
+
message: '"schema" must be a JSON object',
|
|
114
|
+
hint: '{ "type": "object", "properties": { "field": { "type": "string" } } }',
|
|
115
|
+
docs: 'https://webpeel.dev/docs/errors#invalid-request',
|
|
116
|
+
},
|
|
117
|
+
requestId: reqId(req),
|
|
118
|
+
});
|
|
119
|
+
return;
|
|
120
|
+
}
|
|
121
|
+
const schemaObj = schemaRaw;
|
|
122
|
+
// Accept both full JSON Schema and shorthand { field: "type" }
|
|
123
|
+
if (schemaObj.type === 'object' && schemaObj.properties) {
|
|
124
|
+
schema = schemaObj;
|
|
125
|
+
}
|
|
126
|
+
else {
|
|
127
|
+
// Shorthand: { "company_mission": "string", "is_open_source": "boolean" }
|
|
128
|
+
const props = {};
|
|
129
|
+
for (const [k, v] of Object.entries(schemaObj)) {
|
|
130
|
+
props[k] = { type: typeof v === 'string' ? v : 'string' };
|
|
131
|
+
}
|
|
132
|
+
schema = { type: 'object', properties: props };
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
// No schema provided but prompt is — create a minimal schema
|
|
137
|
+
schema = { type: 'object', properties: { result: { type: 'string', description: prompt } } };
|
|
138
|
+
}
|
|
139
|
+
// ── Resolve LLM config ──────────────────────────────────────────────
|
|
140
|
+
let llmConfig;
|
|
141
|
+
if (llmRaw && typeof llmRaw === 'object' && !Array.isArray(llmRaw)) {
|
|
142
|
+
// New format: { "provider": "openai", "apiKey": "sk-...", "model": "..." }
|
|
143
|
+
const llmObj = llmRaw;
|
|
144
|
+
const provider = typeof llmObj.provider === 'string' ? llmObj.provider : 'openai';
|
|
145
|
+
if (!VALID_PROVIDERS.includes(provider)) {
|
|
146
|
+
res.status(400).json({
|
|
147
|
+
success: false,
|
|
148
|
+
error: {
|
|
149
|
+
type: 'invalid_request',
|
|
150
|
+
message: `Invalid "llm.provider". Must be one of: ${VALID_PROVIDERS.join(', ')}`,
|
|
151
|
+
docs: 'https://webpeel.dev/docs/errors#invalid-request',
|
|
152
|
+
},
|
|
153
|
+
requestId: reqId(req),
|
|
154
|
+
});
|
|
155
|
+
return;
|
|
156
|
+
}
|
|
157
|
+
llmConfig = {
|
|
158
|
+
provider: provider,
|
|
159
|
+
apiKey: typeof llmObj.apiKey === 'string' ? llmObj.apiKey : undefined,
|
|
160
|
+
model: typeof llmObj.model === 'string' ? llmObj.model : undefined,
|
|
161
|
+
endpoint: typeof llmObj.endpoint === 'string' ? llmObj.endpoint : undefined,
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
else if (typeof llmApiKey === 'string' && llmApiKey) {
|
|
165
|
+
// Legacy format: llmApiKey + llmProvider at top level
|
|
166
|
+
const provider = (typeof llmProvider === 'string' && VALID_PROVIDERS.includes(llmProvider))
|
|
167
|
+
? llmProvider
|
|
168
|
+
: 'openai';
|
|
169
|
+
llmConfig = {
|
|
170
|
+
provider,
|
|
171
|
+
apiKey: llmApiKey,
|
|
172
|
+
model: typeof legacyModel === 'string' ? legacyModel : undefined,
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
else {
|
|
176
|
+
// Try server-side default (env vars)
|
|
177
|
+
const defaultCfg = getDefaultLLMConfig();
|
|
178
|
+
// Only use server default if it has a real key (not bare cloudflare)
|
|
179
|
+
if (defaultCfg.provider !== 'cloudflare' || (process.env.CLOUDFLARE_ACCOUNT_ID && process.env.CLOUDFLARE_API_TOKEN)) {
|
|
180
|
+
llmConfig = defaultCfg;
|
|
181
|
+
}
|
|
182
|
+
// If still no config, we'll use heuristic extraction
|
|
183
|
+
}
|
|
184
|
+
// ── Fetch page content ──────────────────────────────────────────────
|
|
185
|
+
const useRender = render === true || render === 'true';
|
|
186
|
+
const peelResult = await peel(url, {
|
|
187
|
+
format: 'markdown',
|
|
188
|
+
render: useRender,
|
|
189
|
+
noEscalate: !useRender, // prevent OOM: only browser when render=true explicitly
|
|
190
|
+
timeout: 30000,
|
|
191
|
+
readable: true,
|
|
192
|
+
});
|
|
193
|
+
const content = peelResult.content || '';
|
|
194
|
+
// ── Extract structured data ─────────────────────────────────────────
|
|
195
|
+
// Seed hints from domain-api structured data (GitHub stars/language, etc.)
|
|
196
|
+
// This lets heuristic extraction use pre-parsed structured fields as ground truth.
|
|
197
|
+
const domainHints = {};
|
|
198
|
+
const rawDomainData = peelResult.domainData?.structured;
|
|
199
|
+
if (rawDomainData && typeof rawDomainData === 'object') {
|
|
200
|
+
for (const [k, v] of Object.entries(rawDomainData)) {
|
|
201
|
+
if (v !== null && v !== undefined && v !== '') {
|
|
202
|
+
domainHints[k] = v;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
const extractResult = await extractStructured(content, schema, llmConfig, typeof prompt === 'string' ? prompt : undefined, Object.keys(domainHints).length > 0 ? domainHints : undefined);
|
|
207
|
+
const method = llmConfig ? 'llm' : 'heuristic';
|
|
208
|
+
res.json({
|
|
209
|
+
success: true,
|
|
210
|
+
data: {
|
|
211
|
+
url: peelResult.url || url,
|
|
212
|
+
extracted: extractResult.data,
|
|
213
|
+
confidence: extractResult.confidence,
|
|
214
|
+
tokensUsed: extractResult.tokensUsed,
|
|
215
|
+
method,
|
|
216
|
+
},
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
catch (error) {
|
|
220
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
221
|
+
console.error('[/v1/extract] Error:', msg);
|
|
222
|
+
if (isFreeTierLimitError(error)) {
|
|
223
|
+
res.status(429).json({
|
|
224
|
+
success: false,
|
|
225
|
+
error: {
|
|
226
|
+
type: 'free_tier_limit',
|
|
227
|
+
message: error.message,
|
|
228
|
+
hint: 'Provide your own API key in the "llm" config object for unlimited use.',
|
|
229
|
+
docs: 'https://webpeel.dev/docs/extract#free-tier',
|
|
230
|
+
},
|
|
231
|
+
requestId: reqId(req),
|
|
232
|
+
});
|
|
233
|
+
return;
|
|
234
|
+
}
|
|
235
|
+
if (msg.includes('401') || msg.includes('Unauthorized') || msg.includes('authentication failed')) {
|
|
236
|
+
res.status(401).json({
|
|
237
|
+
success: false,
|
|
238
|
+
error: { type: 'llm_auth_failed', message: msg },
|
|
239
|
+
requestId: reqId(req),
|
|
240
|
+
});
|
|
241
|
+
return;
|
|
242
|
+
}
|
|
243
|
+
if (msg.includes('429') || msg.includes('rate limit')) {
|
|
244
|
+
res.status(429).json({
|
|
245
|
+
success: false,
|
|
246
|
+
error: {
|
|
247
|
+
type: 'llm_rate_limited',
|
|
248
|
+
message: msg,
|
|
249
|
+
hint: 'Try again in a moment or use a different LLM provider.',
|
|
250
|
+
docs: 'https://webpeel.dev/docs/errors#llm-rate-limited',
|
|
251
|
+
},
|
|
252
|
+
requestId: reqId(req),
|
|
253
|
+
});
|
|
254
|
+
return;
|
|
255
|
+
}
|
|
256
|
+
res.status(500).json({
|
|
257
|
+
success: false,
|
|
258
|
+
error: {
|
|
259
|
+
type: 'extraction_failed',
|
|
260
|
+
message: msg,
|
|
261
|
+
docs: 'https://webpeel.dev/docs/errors#extraction-failed',
|
|
262
|
+
},
|
|
263
|
+
requestId: reqId(req),
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
});
|
|
267
|
+
// ── GET /v1/extract/auto ─────────────────────────────────────────────────
|
|
268
|
+
router.get('/v1/extract/auto', async (req, res) => {
|
|
269
|
+
const url = req.query.url;
|
|
270
|
+
if (!url) {
|
|
271
|
+
res.status(400).json({
|
|
272
|
+
success: false,
|
|
273
|
+
error: {
|
|
274
|
+
type: 'missing_url',
|
|
275
|
+
message: 'Missing url parameter',
|
|
276
|
+
hint: 'Pass a URL: GET /v1/extract/auto?url=https://example.com',
|
|
277
|
+
docs: 'https://webpeel.dev/docs/errors#missing-url',
|
|
278
|
+
},
|
|
279
|
+
requestId: reqId(req),
|
|
280
|
+
});
|
|
281
|
+
return;
|
|
282
|
+
}
|
|
283
|
+
const { autoExtract } = await import('../../core/auto-extract.js');
|
|
284
|
+
const result = await peel(url, { format: 'html' });
|
|
285
|
+
const extracted = autoExtract(result.content || '', url);
|
|
286
|
+
res.json({ url, pageType: extracted.type, structured: extracted });
|
|
287
|
+
});
|
|
288
|
+
// ── POST /v1/extract/auto ────────────────────────────────────────────────
|
|
289
|
+
router.post('/v1/extract/auto', async (req, res) => {
|
|
290
|
+
const { url, ...rest } = req.body;
|
|
291
|
+
if (!url || typeof url !== 'string') {
|
|
292
|
+
res.status(400).json({
|
|
293
|
+
success: false,
|
|
294
|
+
error: {
|
|
295
|
+
type: 'missing_url',
|
|
296
|
+
message: 'Missing or invalid url field in request body',
|
|
297
|
+
hint: 'Pass a URL in the request body: { "url": "https://example.com" }',
|
|
298
|
+
docs: 'https://webpeel.dev/docs/errors#missing-url',
|
|
299
|
+
},
|
|
300
|
+
requestId: reqId(req),
|
|
301
|
+
});
|
|
302
|
+
return;
|
|
303
|
+
}
|
|
304
|
+
try {
|
|
305
|
+
const { autoExtract } = await import('../../core/auto-extract.js');
|
|
306
|
+
const result = await peel(url, { format: 'html', ...rest });
|
|
307
|
+
const extracted = autoExtract(result.content || '', url);
|
|
308
|
+
res.json({ url, pageType: extracted.type, structured: extracted });
|
|
309
|
+
}
|
|
310
|
+
catch (error) {
|
|
311
|
+
const msg = error instanceof Error ? error.message : 'Unknown error';
|
|
312
|
+
console.error('[/v1/extract/auto POST] Error:', msg);
|
|
313
|
+
res.status(500).json({
|
|
314
|
+
success: false,
|
|
315
|
+
error: {
|
|
316
|
+
type: 'extraction_failed',
|
|
317
|
+
message: msg,
|
|
318
|
+
docs: 'https://webpeel.dev/docs/errors#extraction-failed',
|
|
319
|
+
},
|
|
320
|
+
requestId: reqId(req),
|
|
321
|
+
});
|
|
322
|
+
}
|
|
323
|
+
});
|
|
324
|
+
return router;
|
|
325
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Feed discovery and parsing endpoint — GET /v1/feed
|
|
3
|
+
*
|
|
4
|
+
* Discovers and fetches RSS/Atom feeds for any website URL.
|
|
5
|
+
* Supports direct feed URLs as well as HTML pages (auto-discovers via <link> tags
|
|
6
|
+
* or probes common feed paths like /feed, /rss.xml, etc.).
|
|
7
|
+
*
|
|
8
|
+
* Query params:
|
|
9
|
+
* - url (required) — website URL or direct feed URL
|
|
10
|
+
* - limit (optional) — max items to return (default 20, max 100)
|
|
11
|
+
* - format (optional) — "json" (default) or "markdown"
|
|
12
|
+
*/
|
|
13
|
+
import { Router } from 'express';
|
|
14
|
+
import { AuthStore } from '../auth-store.js';
|
|
15
|
+
export declare function createFeedRouter(_authStore: AuthStore): Router;
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Feed discovery and parsing endpoint — GET /v1/feed
|
|
3
|
+
*
|
|
4
|
+
* Discovers and fetches RSS/Atom feeds for any website URL.
|
|
5
|
+
* Supports direct feed URLs as well as HTML pages (auto-discovers via <link> tags
|
|
6
|
+
* or probes common feed paths like /feed, /rss.xml, etc.).
|
|
7
|
+
*
|
|
8
|
+
* Query params:
|
|
9
|
+
* - url (required) — website URL or direct feed URL
|
|
10
|
+
* - limit (optional) — max items to return (default 20, max 100)
|
|
11
|
+
* - format (optional) — "json" (default) or "markdown"
|
|
12
|
+
*/
|
|
13
|
+
import { Router } from 'express';
|
|
14
|
+
import { validateUrlForSSRF, SSRFError } from '../middleware/url-validator.js';
|
|
15
|
+
// ── Helpers: XML text extraction ──────────────────────────────────────────────
|
|
16
|
+
/** Extract the inner text of the first matching XML tag. */
|
|
17
|
+
function extractTag(xml, tag) {
|
|
18
|
+
// Try with namespace prefix first (e.g. dc:creator), then plain
|
|
19
|
+
const patterns = [
|
|
20
|
+
new RegExp(`<${tag}[^>]*><!\\[CDATA\\[([\\s\\S]*?)\\]\\]></${tag}>`, 'i'),
|
|
21
|
+
new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`, 'i'),
|
|
22
|
+
];
|
|
23
|
+
for (const re of patterns) {
|
|
24
|
+
const m = xml.match(re);
|
|
25
|
+
if (m)
|
|
26
|
+
return m[1].trim();
|
|
27
|
+
}
|
|
28
|
+
return '';
|
|
29
|
+
}
|
|
30
|
+
/** Extract an attribute value from an XML/HTML tag. */
|
|
31
|
+
function extractAttr(tag, attr) {
|
|
32
|
+
const re = new RegExp(`${attr}=["']([^"']+)["']`, 'i');
|
|
33
|
+
const m = tag.match(re);
|
|
34
|
+
return m ? m[1].trim() : '';
|
|
35
|
+
}
|
|
36
|
+
/** Strip HTML tags from a string (for description cleanup). */
|
|
37
|
+
function stripHtml(html) {
|
|
38
|
+
return html.replace(/<[^>]+>/g, '').replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&').replace(/"/g, '"').replace(/'/g, "'").trim();
|
|
39
|
+
}
|
|
40
|
+
// ── RSS/Atom parser ───────────────────────────────────────────────────────────
|
|
41
|
+
/**
|
|
42
|
+
* Parse an RSS 2.0 or Atom feed XML string into a flat array of FeedItem objects.
|
|
43
|
+
* Uses regex — no external dependencies.
|
|
44
|
+
*/
|
|
45
|
+
function parseRSSFeed(xml) {
|
|
46
|
+
const items = [];
|
|
47
|
+
// Detect feed type
|
|
48
|
+
const isAtom = /<feed[\s>]/i.test(xml);
|
|
49
|
+
if (isAtom) {
|
|
50
|
+
// ── Atom ───────────────────────────────────────────────────────────────
|
|
51
|
+
const entryRe = /<entry[\s>]([\s\S]*?)<\/entry>/gi;
|
|
52
|
+
let m;
|
|
53
|
+
while ((m = entryRe.exec(xml)) !== null) {
|
|
54
|
+
const entry = m[1];
|
|
55
|
+
// title
|
|
56
|
+
const title = stripHtml(extractTag(entry, 'title') || '');
|
|
57
|
+
// link — prefer <link rel="alternate" href="..."> else <link href="...">
|
|
58
|
+
let link = '';
|
|
59
|
+
const linkTagRe = /<link([^>]*)\/?>/gi;
|
|
60
|
+
let lt;
|
|
61
|
+
while ((lt = linkTagRe.exec(entry)) !== null) {
|
|
62
|
+
const attrs = lt[1];
|
|
63
|
+
const rel = extractAttr(attrs, 'rel') || 'alternate';
|
|
64
|
+
const href = extractAttr(attrs, 'href');
|
|
65
|
+
if (href && (rel === 'alternate' || rel === '')) {
|
|
66
|
+
link = href;
|
|
67
|
+
break;
|
|
68
|
+
}
|
|
69
|
+
if (href && !link)
|
|
70
|
+
link = href; // fallback
|
|
71
|
+
}
|
|
72
|
+
// description — prefer <content>, fallback <summary>
|
|
73
|
+
const content = extractTag(entry, 'content') || extractTag(entry, 'summary') || '';
|
|
74
|
+
const description = stripHtml(content).substring(0, 500);
|
|
75
|
+
// date — prefer <published>, fallback <updated>
|
|
76
|
+
const pubDate = extractTag(entry, 'published') || extractTag(entry, 'updated') || '';
|
|
77
|
+
// author
|
|
78
|
+
const authorBlock = entry.match(/<author[\s>]([\s\S]*?)<\/author>/i);
|
|
79
|
+
const author = authorBlock ? (extractTag(authorBlock[1], 'name') || '') : '';
|
|
80
|
+
// id
|
|
81
|
+
const guid = extractTag(entry, 'id') || link;
|
|
82
|
+
items.push({ title, link, description, pubDate, author, guid });
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
// ── RSS 2.0 ────────────────────────────────────────────────────────────
|
|
87
|
+
const itemRe = /<item[\s>]([\s\S]*?)<\/item>/gi;
|
|
88
|
+
let m;
|
|
89
|
+
while ((m = itemRe.exec(xml)) !== null) {
|
|
90
|
+
const item = m[1];
|
|
91
|
+
const title = stripHtml(extractTag(item, 'title') || '');
|
|
92
|
+
const link = extractTag(item, 'link') || extractTag(item, 'feedburner:origLink') || '';
|
|
93
|
+
const rawDesc = extractTag(item, 'description') || extractTag(item, 'content:encoded') || '';
|
|
94
|
+
const description = stripHtml(rawDesc).substring(0, 500);
|
|
95
|
+
const pubDate = extractTag(item, 'pubDate') || extractTag(item, 'dc:date') || '';
|
|
96
|
+
const author = extractTag(item, 'author') || extractTag(item, 'dc:creator') || '';
|
|
97
|
+
const guid = extractTag(item, 'guid') || link;
|
|
98
|
+
items.push({ title, link, description, pubDate, author, guid });
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return items;
|
|
102
|
+
}
|
|
103
|
+
// ── HTML feed discovery ───────────────────────────────────────────────────────
|
|
104
|
+
/**
|
|
105
|
+
* Scan an HTML document for <link rel="alternate" type="application/rss+xml"> tags
|
|
106
|
+
* and similar, returning discovered feed URLs resolved against the page URL.
|
|
107
|
+
*/
|
|
108
|
+
function discoverFeeds(html, pageUrl) {
|
|
109
|
+
const feeds = [];
|
|
110
|
+
const seen = new Set();
|
|
111
|
+
// Match all <link ...> tags in the <head>
|
|
112
|
+
const linkRe = /<link([^>]+)>/gi;
|
|
113
|
+
let m;
|
|
114
|
+
while ((m = linkRe.exec(html)) !== null) {
|
|
115
|
+
const attrs = m[1];
|
|
116
|
+
const rel = extractAttr(attrs, 'rel').toLowerCase();
|
|
117
|
+
const type = extractAttr(attrs, 'type').toLowerCase();
|
|
118
|
+
const href = extractAttr(attrs, 'href');
|
|
119
|
+
if (rel !== 'alternate' || !href)
|
|
120
|
+
continue;
|
|
121
|
+
// Accept RSS, Atom, and generic XML feed types
|
|
122
|
+
const isFeed = type.includes('rss') ||
|
|
123
|
+
type.includes('atom') ||
|
|
124
|
+
type.includes('application/xml') ||
|
|
125
|
+
type.includes('text/xml');
|
|
126
|
+
if (!isFeed)
|
|
127
|
+
continue;
|
|
128
|
+
// Resolve relative URLs
|
|
129
|
+
let resolvedUrl;
|
|
130
|
+
try {
|
|
131
|
+
resolvedUrl = new URL(href, pageUrl).toString();
|
|
132
|
+
}
|
|
133
|
+
catch {
|
|
134
|
+
continue;
|
|
135
|
+
}
|
|
136
|
+
if (seen.has(resolvedUrl))
|
|
137
|
+
continue;
|
|
138
|
+
seen.add(resolvedUrl);
|
|
139
|
+
const title = extractAttr(attrs, 'title') || 'Feed';
|
|
140
|
+
feeds.push({ url: resolvedUrl, type, title });
|
|
141
|
+
}
|
|
142
|
+
return feeds;
|
|
143
|
+
}
|
|
144
|
+
// ── Router factory ────────────────────────────────────────────────────────────
|
|
145
|
+
export function createFeedRouter(_authStore) {
|
|
146
|
+
const router = Router();
|
|
147
|
+
router.get('/v1/feed', async (req, res) => {
|
|
148
|
+
try {
|
|
149
|
+
const url = req.query.url;
|
|
150
|
+
const limitRaw = parseInt(req.query.limit || '20', 10);
|
|
151
|
+
const limit = isNaN(limitRaw) ? 20 : Math.min(limitRaw, 100);
|
|
152
|
+
const format = req.query.format || 'json';
|
|
153
|
+
// ── Validate required param ──────────────────────────────────────────
|
|
154
|
+
if (!url) {
|
|
155
|
+
res.status(400).json({
|
|
156
|
+
success: false,
|
|
157
|
+
error: { type: 'invalid_request', message: 'Missing required parameter: "url"' },
|
|
158
|
+
});
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
// ── SSRF guard ───────────────────────────────────────────────────────
|
|
162
|
+
try {
|
|
163
|
+
validateUrlForSSRF(url);
|
|
164
|
+
}
|
|
165
|
+
catch (e) {
|
|
166
|
+
if (e instanceof SSRFError) {
|
|
167
|
+
res.status(400).json({
|
|
168
|
+
success: false,
|
|
169
|
+
error: { type: 'invalid_request', message: e.message },
|
|
170
|
+
});
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
throw e;
|
|
174
|
+
}
|
|
175
|
+
let feedUrl = url;
|
|
176
|
+
let feedItems = [];
|
|
177
|
+
// ── Fetch the URL ────────────────────────────────────────────────────
|
|
178
|
+
const response = await fetch(url, {
|
|
179
|
+
headers: {
|
|
180
|
+
'User-Agent': 'WebPeel/0.21 (+https://webpeel.dev/bot)',
|
|
181
|
+
'Accept': 'application/rss+xml, application/atom+xml, application/xml, text/xml, text/html, */*',
|
|
182
|
+
},
|
|
183
|
+
signal: AbortSignal.timeout(10000),
|
|
184
|
+
});
|
|
185
|
+
if (!response.ok) {
|
|
186
|
+
res.status(502).json({
|
|
187
|
+
success: false,
|
|
188
|
+
error: { type: 'fetch_error', message: `Failed to fetch URL: HTTP ${response.status}` },
|
|
189
|
+
});
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
192
|
+
const contentType = response.headers.get('content-type') || '';
|
|
193
|
+
const text = await response.text();
|
|
194
|
+
const trimmed = text.trimStart();
|
|
195
|
+
const looksLikeFeed = contentType.includes('xml') ||
|
|
196
|
+
contentType.includes('rss') ||
|
|
197
|
+
contentType.includes('atom') ||
|
|
198
|
+
trimmed.startsWith('<?xml') ||
|
|
199
|
+
trimmed.startsWith('<rss') ||
|
|
200
|
+
trimmed.startsWith('<feed');
|
|
201
|
+
if (looksLikeFeed) {
|
|
202
|
+
// ── Direct feed URL ────────────────────────────────────────────────
|
|
203
|
+
feedItems = parseRSSFeed(text);
|
|
204
|
+
}
|
|
205
|
+
else {
|
|
206
|
+
// ── HTML page — discover feeds ─────────────────────────────────────
|
|
207
|
+
const feedLinks = discoverFeeds(text, url);
|
|
208
|
+
if (feedLinks.length > 0) {
|
|
209
|
+
// Fetch the first (highest-priority) discovered feed
|
|
210
|
+
feedUrl = feedLinks[0].url;
|
|
211
|
+
try {
|
|
212
|
+
validateUrlForSSRF(feedUrl);
|
|
213
|
+
}
|
|
214
|
+
catch {
|
|
215
|
+
// If discovered feed URL is blocked, fall through to probe paths
|
|
216
|
+
feedUrl = url;
|
|
217
|
+
}
|
|
218
|
+
if (feedUrl !== url) {
|
|
219
|
+
const feedRes = await fetch(feedUrl, {
|
|
220
|
+
headers: { 'User-Agent': 'WebPeel/0.21 (+https://webpeel.dev/bot)' },
|
|
221
|
+
signal: AbortSignal.timeout(10000),
|
|
222
|
+
});
|
|
223
|
+
if (feedRes.ok) {
|
|
224
|
+
const feedText = await feedRes.text();
|
|
225
|
+
feedItems = parseRSSFeed(feedText);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
// If still no items, probe common feed paths
|
|
230
|
+
if (feedItems.length === 0) {
|
|
231
|
+
const baseUrl = new URL(url).origin;
|
|
232
|
+
const commonPaths = [
|
|
233
|
+
'/feed',
|
|
234
|
+
'/rss',
|
|
235
|
+
'/rss.xml',
|
|
236
|
+
'/feed.xml',
|
|
237
|
+
'/atom.xml',
|
|
238
|
+
'/feed/rss',
|
|
239
|
+
'/blog/feed',
|
|
240
|
+
'/blog/rss',
|
|
241
|
+
'/index.xml',
|
|
242
|
+
];
|
|
243
|
+
for (const path of commonPaths) {
|
|
244
|
+
const candidateUrl = baseUrl + path;
|
|
245
|
+
try {
|
|
246
|
+
validateUrlForSSRF(candidateUrl);
|
|
247
|
+
}
|
|
248
|
+
catch {
|
|
249
|
+
continue;
|
|
250
|
+
}
|
|
251
|
+
try {
|
|
252
|
+
const probeRes = await fetch(candidateUrl, {
|
|
253
|
+
headers: { 'User-Agent': 'WebPeel/0.21 (+https://webpeel.dev/bot)' },
|
|
254
|
+
signal: AbortSignal.timeout(3000),
|
|
255
|
+
});
|
|
256
|
+
if (!probeRes.ok)
|
|
257
|
+
continue;
|
|
258
|
+
const probeText = await probeRes.text();
|
|
259
|
+
const probeTrimmed = probeText.trimStart();
|
|
260
|
+
if (probeTrimmed.startsWith('<?xml') ||
|
|
261
|
+
probeTrimmed.startsWith('<rss') ||
|
|
262
|
+
probeTrimmed.startsWith('<feed')) {
|
|
263
|
+
feedItems = parseRSSFeed(probeText);
|
|
264
|
+
feedUrl = candidateUrl;
|
|
265
|
+
break;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
catch {
|
|
269
|
+
// Continue to next candidate
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
// ── Trim to limit ────────────────────────────────────────────────────
|
|
275
|
+
feedItems = feedItems.slice(0, limit);
|
|
276
|
+
// ── Format response ──────────────────────────────────────────────────
|
|
277
|
+
if (format === 'markdown') {
|
|
278
|
+
const md = feedItems
|
|
279
|
+
.map((item, i) => `${i + 1}. **${item.title || '(no title)'}**\n ${item.link || ''}\n ${item.pubDate || ''}\n ${item.description?.substring(0, 200) || ''}`)
|
|
280
|
+
.join('\n\n');
|
|
281
|
+
res.json({
|
|
282
|
+
success: true,
|
|
283
|
+
data: {
|
|
284
|
+
feedUrl,
|
|
285
|
+
format: 'markdown',
|
|
286
|
+
content: md,
|
|
287
|
+
itemCount: feedItems.length,
|
|
288
|
+
},
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
else {
|
|
292
|
+
res.json({
|
|
293
|
+
success: true,
|
|
294
|
+
data: {
|
|
295
|
+
feedUrl,
|
|
296
|
+
items: feedItems,
|
|
297
|
+
itemCount: feedItems.length,
|
|
298
|
+
},
|
|
299
|
+
});
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
catch (err) {
|
|
303
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
304
|
+
res.status(500).json({
|
|
305
|
+
success: false,
|
|
306
|
+
error: { type: 'internal', message },
|
|
307
|
+
});
|
|
308
|
+
}
|
|
309
|
+
});
|
|
310
|
+
return router;
|
|
311
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Queue-backed /v1/fetch and /v1/render endpoints.
|
|
3
|
+
*
|
|
4
|
+
* Used when API_MODE=queue (microservices mode).
|
|
5
|
+
* Instead of calling peel() directly, jobs are enqueued in Bull
|
|
6
|
+
* and results are polled from Redis via GET /v1/jobs/:id.
|
|
7
|
+
*
|
|
8
|
+
* POST /v1/fetch → enqueue in webpeel:fetch queue → return { jobId, status }
|
|
9
|
+
* POST /v1/render → enqueue in webpeel:render queue → return { jobId, status }
|
|
10
|
+
* GET /v1/jobs/:id → return job status + result from Redis
|
|
11
|
+
*/
|
|
12
|
+
import { Router } from 'express';
|
|
13
|
+
export declare function createQueueFetchRouter(): Router;
|