@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Domain-aware structured extractors for WebPeel.
|
|
3
|
+
*
|
|
4
|
+
* This file re-exports from individual extractor files for backward compatibility.
|
|
5
|
+
* Each extractor now lives in its own file under src/ee/extractors/.
|
|
6
|
+
*/
|
|
7
|
+
export { getDomainExtractor, hasDomainExtractor, extractDomainData, clearExtractorCache, setExtractorRedis, } from './extractors/index.js';
|
|
8
|
+
export type { DomainExtractResult, DomainExtractor } from './extractors/index.js';
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Domain-aware structured extractors for WebPeel.
|
|
3
|
+
*
|
|
4
|
+
* This file re-exports from individual extractor files for backward compatibility.
|
|
5
|
+
* Each extractor now lives in its own file under src/ee/extractors/.
|
|
6
|
+
*/
|
|
7
|
+
// Re-exported from individual extractor files for backward compatibility
|
|
8
|
+
export { getDomainExtractor, hasDomainExtractor, extractDomainData, clearExtractorCache, setExtractorRedis, } from './extractors/index.js';
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Domain Intelligence — premium server-only optimisation.
|
|
3
|
+
*
|
|
4
|
+
* Learns from historical fetch outcomes which domains require browser or
|
|
5
|
+
* stealth mode, so subsequent requests skip the slow simple→browser
|
|
6
|
+
* escalation path and go straight to the right strategy.
|
|
7
|
+
*
|
|
8
|
+
* Uses an exponential moving average for latency tracking and requires a
|
|
9
|
+
* minimum sample count before issuing recommendations to avoid false
|
|
10
|
+
* positives from one-off failures.
|
|
11
|
+
*
|
|
12
|
+
* This module is NOT shipped in the npm package.
|
|
13
|
+
*/
|
|
14
|
+
import type { StrategyHooks } from '../core/strategy-hooks.js';
|
|
15
|
+
export declare function clearDomainIntel(): void;
|
|
16
|
+
export declare function createDomainIntelHooks(): Pick<StrategyHooks, 'getDomainRecommendation' | 'recordDomainResult'>;
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Domain Intelligence — premium server-only optimisation.
|
|
3
|
+
*
|
|
4
|
+
* Learns from historical fetch outcomes which domains require browser or
|
|
5
|
+
* stealth mode, so subsequent requests skip the slow simple→browser
|
|
6
|
+
* escalation path and go straight to the right strategy.
|
|
7
|
+
*
|
|
8
|
+
* Uses an exponential moving average for latency tracking and requires a
|
|
9
|
+
* minimum sample count before issuing recommendations to avoid false
|
|
10
|
+
* positives from one-off failures.
|
|
11
|
+
*
|
|
12
|
+
* This module is NOT shipped in the npm package.
|
|
13
|
+
*/
|
|
14
|
+
/* ---------- configuration ----------------------------------------------- */
|
|
15
|
+
const MAX_DOMAINS = 500;
|
|
16
|
+
const TTL_MS = 60 * 60 * 1000; // 1 hour
|
|
17
|
+
const EMA_ALPHA = 0.3;
|
|
18
|
+
const MIN_SAMPLES = 3;
|
|
19
|
+
/* ---------- state ------------------------------------------------------- */
|
|
20
|
+
const domainIntel = new Map();
|
|
21
|
+
const methodCounts = new Map();
|
|
22
|
+
/* ---------- internals --------------------------------------------------- */
|
|
23
|
+
function domainKey(url) {
|
|
24
|
+
try {
|
|
25
|
+
return new URL(url).hostname.toLowerCase();
|
|
26
|
+
}
|
|
27
|
+
catch {
|
|
28
|
+
return '';
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
function prune(now) {
|
|
32
|
+
for (const [key, intel] of domainIntel) {
|
|
33
|
+
if (now - intel.lastSeen > TTL_MS) {
|
|
34
|
+
domainIntel.delete(key);
|
|
35
|
+
methodCounts.delete(key);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
/* ---------- hook implementations ---------------------------------------- */
|
|
40
|
+
function getDomainRecommendation(url) {
|
|
41
|
+
const key = domainKey(url);
|
|
42
|
+
if (!key)
|
|
43
|
+
return null;
|
|
44
|
+
const intel = domainIntel.get(key);
|
|
45
|
+
if (!intel)
|
|
46
|
+
return null;
|
|
47
|
+
const now = Date.now();
|
|
48
|
+
if (now - intel.lastSeen > TTL_MS) {
|
|
49
|
+
domainIntel.delete(key);
|
|
50
|
+
methodCounts.delete(key);
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
if (intel.sampleCount < MIN_SAMPLES)
|
|
54
|
+
return null;
|
|
55
|
+
const counts = methodCounts.get(key);
|
|
56
|
+
if (!counts)
|
|
57
|
+
return null;
|
|
58
|
+
// LRU touch
|
|
59
|
+
domainIntel.delete(key);
|
|
60
|
+
domainIntel.set(key, intel);
|
|
61
|
+
// All samples needed stealth → recommend stealth
|
|
62
|
+
if (counts.stealth === intel.sampleCount && intel.needsStealth) {
|
|
63
|
+
return { mode: 'stealth' };
|
|
64
|
+
}
|
|
65
|
+
// All samples needed browser (never succeeded with simple) → recommend browser
|
|
66
|
+
if (counts.simple === 0 &&
|
|
67
|
+
counts.browser + counts.stealth === intel.sampleCount &&
|
|
68
|
+
intel.needsBrowser) {
|
|
69
|
+
return { mode: 'browser' };
|
|
70
|
+
}
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
function recordDomainResult(url, method, latencyMs) {
|
|
74
|
+
const key = domainKey(url);
|
|
75
|
+
if (!key)
|
|
76
|
+
return;
|
|
77
|
+
const now = Date.now();
|
|
78
|
+
prune(now);
|
|
79
|
+
const existing = domainIntel.get(key);
|
|
80
|
+
const sanitizedLatency = Number.isFinite(latencyMs) && latencyMs > 0
|
|
81
|
+
? latencyMs
|
|
82
|
+
: (existing?.avgLatencyMs ?? 0);
|
|
83
|
+
const next = existing
|
|
84
|
+
? {
|
|
85
|
+
needsBrowser: existing.needsBrowser ||
|
|
86
|
+
method === 'browser' ||
|
|
87
|
+
method === 'stealth',
|
|
88
|
+
needsStealth: existing.needsStealth || method === 'stealth',
|
|
89
|
+
avgLatencyMs: existing.avgLatencyMs === 0
|
|
90
|
+
? sanitizedLatency
|
|
91
|
+
: existing.avgLatencyMs * (1 - EMA_ALPHA) +
|
|
92
|
+
sanitizedLatency * EMA_ALPHA,
|
|
93
|
+
lastSeen: now,
|
|
94
|
+
sampleCount: existing.sampleCount + 1,
|
|
95
|
+
}
|
|
96
|
+
: {
|
|
97
|
+
needsBrowser: method === 'browser' || method === 'stealth',
|
|
98
|
+
needsStealth: method === 'stealth',
|
|
99
|
+
avgLatencyMs: sanitizedLatency,
|
|
100
|
+
lastSeen: now,
|
|
101
|
+
sampleCount: 1,
|
|
102
|
+
};
|
|
103
|
+
const existingCounts = methodCounts.get(key) ?? {
|
|
104
|
+
simple: 0,
|
|
105
|
+
browser: 0,
|
|
106
|
+
stealth: 0,
|
|
107
|
+
};
|
|
108
|
+
existingCounts[method] += 1;
|
|
109
|
+
// Delete-then-set for LRU ordering
|
|
110
|
+
domainIntel.delete(key);
|
|
111
|
+
domainIntel.set(key, next);
|
|
112
|
+
methodCounts.set(key, existingCounts);
|
|
113
|
+
// Evict oldest when over capacity
|
|
114
|
+
while (domainIntel.size > MAX_DOMAINS) {
|
|
115
|
+
const oldest = domainIntel.keys().next().value;
|
|
116
|
+
if (!oldest)
|
|
117
|
+
break;
|
|
118
|
+
domainIntel.delete(oldest);
|
|
119
|
+
methodCounts.delete(oldest);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
/* ---------- cleanup ----------------------------------------------------- */
|
|
123
|
+
export function clearDomainIntel() {
|
|
124
|
+
domainIntel.clear();
|
|
125
|
+
methodCounts.clear();
|
|
126
|
+
}
|
|
127
|
+
/* ---------- public export ----------------------------------------------- */
|
|
128
|
+
export function createDomainIntelHooks() {
|
|
129
|
+
return {
|
|
130
|
+
getDomainRecommendation,
|
|
131
|
+
recordDomainResult,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import { tryParseJson } from './shared.js';
|
|
2
|
+
// ---------------------------------------------------------------------------
|
|
3
|
+
// 15. Allrecipes (Recipe Sites) extractor
|
|
4
|
+
// ---------------------------------------------------------------------------
|
|
5
|
+
export async function allrecipesExtractor(html, url) {
|
|
6
|
+
try {
|
|
7
|
+
const { load } = await import('cheerio');
|
|
8
|
+
const $ = load(html);
|
|
9
|
+
// Try Schema.org Recipe JSON-LD first
|
|
10
|
+
let recipe = null;
|
|
11
|
+
$('script[type="application/ld+json"]').each((_, el) => {
|
|
12
|
+
if (recipe)
|
|
13
|
+
return;
|
|
14
|
+
const raw = $(el).html() || '';
|
|
15
|
+
const parsed = tryParseJson(raw);
|
|
16
|
+
// Can be an array or direct object
|
|
17
|
+
const candidates = Array.isArray(parsed) ? parsed : [parsed];
|
|
18
|
+
for (const item of candidates) {
|
|
19
|
+
if (item?.['@type'] === 'Recipe' || (Array.isArray(item?.['@type']) && item['@type'].includes('Recipe'))) {
|
|
20
|
+
recipe = item;
|
|
21
|
+
break;
|
|
22
|
+
}
|
|
23
|
+
// Sometimes it's nested in @graph
|
|
24
|
+
if (item?.['@graph']) {
|
|
25
|
+
const graphRecipe = item['@graph'].find((g) => g?.['@type'] === 'Recipe');
|
|
26
|
+
if (graphRecipe) {
|
|
27
|
+
recipe = graphRecipe;
|
|
28
|
+
break;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
});
|
|
33
|
+
let title;
|
|
34
|
+
let ingredients = [];
|
|
35
|
+
let instructions = [];
|
|
36
|
+
let prepTime = '';
|
|
37
|
+
let cookTime = '';
|
|
38
|
+
let totalTime = '';
|
|
39
|
+
let servings = '';
|
|
40
|
+
let rating = '';
|
|
41
|
+
let reviewCount = '';
|
|
42
|
+
let description = '';
|
|
43
|
+
if (recipe) {
|
|
44
|
+
title = recipe.name || '';
|
|
45
|
+
description = recipe.description || '';
|
|
46
|
+
ingredients = (recipe.recipeIngredient || []).map((i) => i.trim());
|
|
47
|
+
// Instructions can be strings or HowToStep objects
|
|
48
|
+
const rawInstructions = recipe.recipeInstructions || [];
|
|
49
|
+
for (const step of rawInstructions) {
|
|
50
|
+
if (typeof step === 'string')
|
|
51
|
+
instructions.push(step.trim());
|
|
52
|
+
else if (step.text)
|
|
53
|
+
instructions.push(step.text.trim());
|
|
54
|
+
else if (step['@type'] === 'HowToSection' && step.itemListElement) {
|
|
55
|
+
for (const s of step.itemListElement) {
|
|
56
|
+
if (s.text)
|
|
57
|
+
instructions.push(s.text.trim());
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
// Parse ISO 8601 duration (PT30M, PT1H30M)
|
|
62
|
+
const parseDuration = (d) => {
|
|
63
|
+
if (!d)
|
|
64
|
+
return '';
|
|
65
|
+
const h = d.match(/(\d+)H/)?.[1];
|
|
66
|
+
const m = d.match(/(\d+)M/)?.[1];
|
|
67
|
+
return [h ? `${h}h` : '', m ? `${m}m` : ''].filter(Boolean).join(' ');
|
|
68
|
+
};
|
|
69
|
+
prepTime = parseDuration(recipe.prepTime || '');
|
|
70
|
+
cookTime = parseDuration(recipe.cookTime || '');
|
|
71
|
+
totalTime = parseDuration(recipe.totalTime || '');
|
|
72
|
+
servings = String(recipe.recipeYield || '');
|
|
73
|
+
rating = recipe.aggregateRating?.ratingValue ? String(recipe.aggregateRating.ratingValue) : '';
|
|
74
|
+
reviewCount = recipe.aggregateRating?.reviewCount ? String(recipe.aggregateRating.reviewCount) : '';
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
// HTML fallback
|
|
78
|
+
title = $('h1').first().text().trim() ||
|
|
79
|
+
$('meta[property="og:title"]').attr('content') || '';
|
|
80
|
+
description = $('meta[property="og:description"]').attr('content') || '';
|
|
81
|
+
$('[class*="ingredient"]').each((_, el) => {
|
|
82
|
+
const text = $(el).text().trim();
|
|
83
|
+
if (text && text.length < 200)
|
|
84
|
+
ingredients.push(text);
|
|
85
|
+
});
|
|
86
|
+
$('[class*="instruction"] li, [class*="step"] li').each((_, el) => {
|
|
87
|
+
const text = $(el).text().trim();
|
|
88
|
+
if (text)
|
|
89
|
+
instructions.push(text);
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
if (!title)
|
|
93
|
+
return null;
|
|
94
|
+
const structured = {
|
|
95
|
+
title, description, ingredients, instructions,
|
|
96
|
+
prepTime, cookTime, totalTime, servings, rating, reviewCount, url,
|
|
97
|
+
};
|
|
98
|
+
const timeParts = [
|
|
99
|
+
prepTime ? `Prep: ${prepTime}` : '',
|
|
100
|
+
cookTime ? `Cook: ${cookTime}` : '',
|
|
101
|
+
totalTime ? `Total: ${totalTime}` : '',
|
|
102
|
+
].filter(Boolean).join(' | ');
|
|
103
|
+
const metaLine = [
|
|
104
|
+
timeParts,
|
|
105
|
+
servings ? `Servings: ${servings}` : '',
|
|
106
|
+
rating ? `Rating: ${rating}${reviewCount ? ` (${reviewCount} reviews)` : ''}` : '',
|
|
107
|
+
].filter(Boolean).join(' | ');
|
|
108
|
+
const ingredientsMd = ingredients.length
|
|
109
|
+
? `## Ingredients\n\n${ingredients.map(i => `- ${i}`).join('\n')}`
|
|
110
|
+
: '';
|
|
111
|
+
const instructionsMd = instructions.length
|
|
112
|
+
? `## Instructions\n\n${instructions.map((s, i) => `${i + 1}. ${s}`).join('\n')}`
|
|
113
|
+
: '';
|
|
114
|
+
const cleanContent = `# 🍽️ ${title}\n\n${metaLine ? `*${metaLine}*\n\n` : ''}${description ? description + '\n\n' : ''}${ingredientsMd}\n\n${instructionsMd}`.trim();
|
|
115
|
+
return { domain: 'allrecipes.com', type: 'recipe', structured, cleanContent };
|
|
116
|
+
}
|
|
117
|
+
catch {
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { tryParseJson } from './shared.js';
|
|
2
|
+
// ---------------------------------------------------------------------------
|
|
3
|
+
// 12. Amazon Products extractor
|
|
4
|
+
// ---------------------------------------------------------------------------
|
|
5
|
+
export async function amazonExtractor(html, url) {
|
|
6
|
+
try {
|
|
7
|
+
const { load } = await import('cheerio');
|
|
8
|
+
const $ = load(html);
|
|
9
|
+
// Extract from JSON-LD first
|
|
10
|
+
let jsonLdData = null;
|
|
11
|
+
$('script[type="application/ld+json"]').each((_, el) => {
|
|
12
|
+
if (jsonLdData)
|
|
13
|
+
return;
|
|
14
|
+
const raw = $(el).html() || '';
|
|
15
|
+
const parsed = tryParseJson(raw);
|
|
16
|
+
if (parsed?.['@type'] === 'Product')
|
|
17
|
+
jsonLdData = parsed;
|
|
18
|
+
});
|
|
19
|
+
// Meta tag fallbacks
|
|
20
|
+
const ogTitle = $('meta[property="og:title"]').attr('content') || '';
|
|
21
|
+
const ogDescription = $('meta[property="og:description"]').attr('content') || '';
|
|
22
|
+
const ogImage = $('meta[property="og:image"]').attr('content') || '';
|
|
23
|
+
// HTML selectors
|
|
24
|
+
const title = jsonLdData?.name ||
|
|
25
|
+
$('#productTitle').text().trim() ||
|
|
26
|
+
$('#title').text().trim() ||
|
|
27
|
+
ogTitle;
|
|
28
|
+
if (!title)
|
|
29
|
+
return null;
|
|
30
|
+
const priceWhole = $('#priceblock_ourprice').text().trim() ||
|
|
31
|
+
$('.a-price .a-offscreen').first().text().trim() ||
|
|
32
|
+
$('[data-asin-price]').first().attr('data-asin-price') || '';
|
|
33
|
+
const rating = jsonLdData?.aggregateRating?.ratingValue ||
|
|
34
|
+
$('#acrPopover .a-size-base.a-color-base').first().text().trim() ||
|
|
35
|
+
$('span[data-hook="rating-out-of-text"]').text().trim() || '';
|
|
36
|
+
const reviewCount = jsonLdData?.aggregateRating?.reviewCount ||
|
|
37
|
+
$('#acrCustomerReviewText').text().replace(/[^0-9,]/g, '').trim() || '';
|
|
38
|
+
const availability = jsonLdData?.offers?.availability?.replace('https://schema.org/', '') ||
|
|
39
|
+
$('#availability span').first().text().trim() || '';
|
|
40
|
+
const description = jsonLdData?.description ||
|
|
41
|
+
$('#feature-bullets .a-list-item').map((_, el) => $(el).text().trim()).get().join('\n') ||
|
|
42
|
+
$('#productDescription p').text().trim() ||
|
|
43
|
+
ogDescription;
|
|
44
|
+
const features = [];
|
|
45
|
+
$('#feature-bullets li').each((_, el) => {
|
|
46
|
+
const text = $(el).text().trim();
|
|
47
|
+
if (text && !text.includes('Make sure this fits'))
|
|
48
|
+
features.push(text);
|
|
49
|
+
});
|
|
50
|
+
// ASIN from URL
|
|
51
|
+
const asinMatch = url.match(/\/dp\/([A-Z0-9]{10})/i);
|
|
52
|
+
const asin = asinMatch?.[1] || '';
|
|
53
|
+
const structured = {
|
|
54
|
+
title,
|
|
55
|
+
price: priceWhole,
|
|
56
|
+
rating,
|
|
57
|
+
reviewCount,
|
|
58
|
+
availability,
|
|
59
|
+
description,
|
|
60
|
+
features,
|
|
61
|
+
asin,
|
|
62
|
+
image: ogImage,
|
|
63
|
+
url,
|
|
64
|
+
};
|
|
65
|
+
const ratingLine = rating ? `\n**Rating:** ${rating}${reviewCount ? ` (${reviewCount} reviews)` : ''}` : '';
|
|
66
|
+
const priceLine = priceWhole ? `\n**Price:** ${priceWhole}` : '';
|
|
67
|
+
const availLine = availability ? `\n**Availability:** ${availability}` : '';
|
|
68
|
+
const featuresSection = features.length
|
|
69
|
+
? `\n\n## Features\n\n${features.map(f => `- ${f}`).join('\n')}`
|
|
70
|
+
: '';
|
|
71
|
+
const descSection = description ? `\n\n## Description\n\n${description.substring(0, 1000)}` : '';
|
|
72
|
+
const cleanContent = `# 🛒 ${title}${priceLine}${ratingLine}${availLine}${descSection}${featuresSection}`;
|
|
73
|
+
return { domain: 'amazon.com', type: 'product', structured, cleanContent };
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import { simpleFetch } from '../../core/fetcher.js';
|
|
2
|
+
import { stripHtml } from './shared.js';
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// 7. ArXiv extractor (ArXiv API)
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
export async function arxivExtractor(_html, url) {
|
|
7
|
+
const urlObj = new URL(url);
|
|
8
|
+
const path = urlObj.pathname;
|
|
9
|
+
// --- Search page: /search/?query=... or /search/?searchtype=all&query=... ---
|
|
10
|
+
if (path.startsWith('/search')) {
|
|
11
|
+
const rawQuery = urlObj.searchParams.get('query') || '';
|
|
12
|
+
if (!rawQuery)
|
|
13
|
+
return null;
|
|
14
|
+
try {
|
|
15
|
+
const searchQuery = encodeURIComponent(`all:${rawQuery}`);
|
|
16
|
+
const apiUrl = `https://export.arxiv.org/api/query?search_query=${searchQuery}&max_results=10&sortBy=relevance`;
|
|
17
|
+
const result = await simpleFetch(apiUrl, 'WebPeel/0.21', 20000, { Accept: 'application/xml' });
|
|
18
|
+
if (!result?.html)
|
|
19
|
+
return null;
|
|
20
|
+
const xml = result.html;
|
|
21
|
+
// Parse total results count from opensearch:totalResults
|
|
22
|
+
const totalMatch = xml.match(/<opensearch:totalResults[^>]*>(\d+)<\/opensearch:totalResults>/);
|
|
23
|
+
const total = totalMatch ? parseInt(totalMatch[1], 10) : 0;
|
|
24
|
+
// Parse all entries
|
|
25
|
+
const entries = [...xml.matchAll(/<entry[\s\S]*?<\/entry>/g)].map(m => m[0]);
|
|
26
|
+
const papers = entries.map(entryXml => {
|
|
27
|
+
const getTag = (tag) => {
|
|
28
|
+
const match = entryXml.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`));
|
|
29
|
+
return match ? stripHtml(match[1]).trim() : '';
|
|
30
|
+
};
|
|
31
|
+
const getAllTags = (tag) => {
|
|
32
|
+
const matches = [...entryXml.matchAll(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`, 'g'))];
|
|
33
|
+
return matches.map(m => stripHtml(m[1]).trim()).filter(Boolean);
|
|
34
|
+
};
|
|
35
|
+
const title = getTag('title');
|
|
36
|
+
const published = getTag('published');
|
|
37
|
+
const authors = getAllTags('name');
|
|
38
|
+
const summary = getTag('summary');
|
|
39
|
+
// Extract arXiv ID from <id> tag
|
|
40
|
+
const idTag = getTag('id');
|
|
41
|
+
const idMatch2 = idTag.match(/abs\/(\d{4}\.\d{4,5}(?:v\d+)?)/);
|
|
42
|
+
const paperId2 = idMatch2 ? idMatch2[1] : '';
|
|
43
|
+
// Categories
|
|
44
|
+
const cats = [...entryXml.matchAll(/category[^>]*term="([^"]+)"/g)].map(m => m[1]);
|
|
45
|
+
return { title, published: published?.split('T')[0], authors, summary, paperId: paperId2, categories: cats };
|
|
46
|
+
}).filter(p => p.title);
|
|
47
|
+
if (papers.length === 0)
|
|
48
|
+
return null;
|
|
49
|
+
const rows = papers.map((p, i) => {
|
|
50
|
+
const authorLine = p.authors.length === 0 ? '—'
|
|
51
|
+
: p.authors.length === 1 ? p.authors[0]
|
|
52
|
+
: `${p.authors[0]} et al.`;
|
|
53
|
+
const pdfLink = p.paperId ? ` [[PDF](https://arxiv.org/pdf/${p.paperId})]` : '';
|
|
54
|
+
return `| ${i + 1} | [${p.title}](https://arxiv.org/abs/${p.paperId}) | ${p.published || '?'} | ${authorLine} |${pdfLink}`;
|
|
55
|
+
}).join('\n');
|
|
56
|
+
const cleanContent = `# 🔍 arXiv Search — "${rawQuery}"\n\n| # | Paper | Published | Authors |\n|---|-------|-----------|--------|\n${rows}\n\n*Source: arXiv API · Total results: ${total.toLocaleString()}*`;
|
|
57
|
+
return {
|
|
58
|
+
domain: 'arxiv.org',
|
|
59
|
+
type: 'search',
|
|
60
|
+
structured: { query: rawQuery, total, papers },
|
|
61
|
+
cleanContent,
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
catch (e) {
|
|
65
|
+
if (process.env.DEBUG)
|
|
66
|
+
console.debug('[webpeel]', 'ArXiv search failed:', e instanceof Error ? e.message : e);
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
// Extract paper ID from URL patterns:
|
|
71
|
+
// /abs/2501.12948, /pdf/2501.12948, /abs/2501.12948v2
|
|
72
|
+
const idMatch = path.match(/\/(abs|pdf|html)\/(\d{4}\.\d{4,5}(?:v\d+)?)/);
|
|
73
|
+
if (!idMatch)
|
|
74
|
+
return null;
|
|
75
|
+
const paperId = idMatch[2];
|
|
76
|
+
try {
|
|
77
|
+
// Use ArXiv API
|
|
78
|
+
const apiUrl = `https://export.arxiv.org/api/query?id_list=${paperId}`;
|
|
79
|
+
const result = await simpleFetch(apiUrl, 'WebPeel/0.17.1', 15000, { Accept: 'application/xml' });
|
|
80
|
+
if (!result?.html)
|
|
81
|
+
return null;
|
|
82
|
+
const xml = result.html;
|
|
83
|
+
// Parse XML (simple regex-based for these known fields)
|
|
84
|
+
const getTag = (tag) => {
|
|
85
|
+
const match = xml.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`));
|
|
86
|
+
return match ? stripHtml(match[1]).trim() : '';
|
|
87
|
+
};
|
|
88
|
+
// getAllTags removed — unused
|
|
89
|
+
// ArXiv Atom feed: <feed><title>query URL</title> ... <entry><title>Paper Title</title>...
|
|
90
|
+
// We must grab the entry title, not the feed title.
|
|
91
|
+
const entryMatch = xml.match(/<entry[\s\S]*?<\/entry>/);
|
|
92
|
+
const entryXml = entryMatch ? entryMatch[0] : xml;
|
|
93
|
+
const getEntryTag = (tag) => {
|
|
94
|
+
const match = entryXml.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`));
|
|
95
|
+
return match ? stripHtml(match[1]).trim() : '';
|
|
96
|
+
};
|
|
97
|
+
const getAllEntryTags = (tag) => {
|
|
98
|
+
const matches = [...entryXml.matchAll(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`, 'g'))];
|
|
99
|
+
return matches.map(m => stripHtml(m[1]).trim()).filter(Boolean);
|
|
100
|
+
};
|
|
101
|
+
const title = getEntryTag('title') || getTag('title');
|
|
102
|
+
const summary = getEntryTag('summary') || getTag('summary');
|
|
103
|
+
const published = getEntryTag('published') || getTag('published');
|
|
104
|
+
const updated = getEntryTag('updated') || getTag('updated');
|
|
105
|
+
const authors = getAllEntryTags('name');
|
|
106
|
+
// Extract categories
|
|
107
|
+
const categories = [...xml.matchAll(/category[^>]*term="([^"]+)"/g)].map(m => m[1]);
|
|
108
|
+
// Extract DOI and journal ref if available
|
|
109
|
+
const doi = getTag('arxiv:doi');
|
|
110
|
+
const journalRef = getTag('arxiv:journal_ref');
|
|
111
|
+
if (!title)
|
|
112
|
+
return null;
|
|
113
|
+
const structured = {
|
|
114
|
+
title,
|
|
115
|
+
authors,
|
|
116
|
+
abstract: summary,
|
|
117
|
+
published: published || undefined,
|
|
118
|
+
updated: updated || undefined,
|
|
119
|
+
categories,
|
|
120
|
+
doi: doi || undefined,
|
|
121
|
+
journalRef: journalRef || undefined,
|
|
122
|
+
paperId,
|
|
123
|
+
pdfUrl: `https://arxiv.org/pdf/${paperId}`,
|
|
124
|
+
absUrl: `https://arxiv.org/abs/${paperId}`,
|
|
125
|
+
};
|
|
126
|
+
const authorLine = authors.length <= 5
|
|
127
|
+
? authors.join(', ')
|
|
128
|
+
: `${authors.slice(0, 5).join(', ')} et al. (${authors.length} authors)`;
|
|
129
|
+
const cleanContent = `# 📄 arXiv: ${title} (${paperId})\n\n**Authors:** ${authorLine}\n**Submitted:** ${published?.split('T')[0] || 'N/A'}${categories.length ? `\n**Categories:** ${categories.join(', ')}` : ''}${doi ? `\n**DOI:** ${doi}` : ''}${journalRef ? `\n**Journal:** ${journalRef}` : ''}\n\n## Abstract\n\n${summary}\n\n**PDF:** [Download](${structured.pdfUrl}) | **HTML:** [View](https://arxiv.org/html/${paperId})`;
|
|
130
|
+
return { domain: 'arxiv.org', type: 'paper', structured, cleanContent };
|
|
131
|
+
}
|
|
132
|
+
catch (e) {
|
|
133
|
+
if (process.env.DEBUG)
|
|
134
|
+
console.debug('[webpeel]', 'ArXiv API failed:', e instanceof Error ? e.message : e);
|
|
135
|
+
return null;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { fetchJson } from './shared.js';
|
|
2
|
+
// ---------------------------------------------------------------------------
|
|
3
|
+
// 10. Best Buy extractor (Best Buy Products API)
|
|
4
|
+
// ---------------------------------------------------------------------------
|
|
5
|
+
export async function bestBuyExtractor(_html, url) {
|
|
6
|
+
const apiKey = process.env.BESTBUY_API_KEY;
|
|
7
|
+
if (!apiKey)
|
|
8
|
+
return null; // No API key, skip
|
|
9
|
+
// Extract SKU from URL: /site/.../6587822.p → 6587822
|
|
10
|
+
const skuMatch = url.match(/\/(\d{7,})\.p/);
|
|
11
|
+
if (!skuMatch)
|
|
12
|
+
return null;
|
|
13
|
+
const sku = skuMatch[1];
|
|
14
|
+
const apiUrl = `https://api.bestbuy.com/v1/products/${sku}.json?apiKey=${apiKey}&show=sku,name,salePrice,regularPrice,onSale,shortDescription,longDescription,image,largeFrontImage,url,customerReviewAverage,customerReviewCount,categoryPath,manufacturer,modelNumber,upc,freeShipping,inStoreAvailability,onlineAvailability,condition,features.feature`;
|
|
15
|
+
try {
|
|
16
|
+
const data = await fetchJson(apiUrl);
|
|
17
|
+
if (!data || data.error)
|
|
18
|
+
return null;
|
|
19
|
+
// Build clean markdown
|
|
20
|
+
const lines = [];
|
|
21
|
+
lines.push(`# ${data.name}`);
|
|
22
|
+
lines.push('');
|
|
23
|
+
if (data.onSale) {
|
|
24
|
+
lines.push(`**Sale Price:** $${data.salePrice} (was $${data.regularPrice})`);
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
lines.push(`**Price:** $${data.regularPrice}`);
|
|
28
|
+
}
|
|
29
|
+
lines.push(`**SKU:** ${data.sku}`);
|
|
30
|
+
if (data.manufacturer)
|
|
31
|
+
lines.push(`**Brand:** ${data.manufacturer}`);
|
|
32
|
+
if (data.modelNumber)
|
|
33
|
+
lines.push(`**Model:** ${data.modelNumber}`);
|
|
34
|
+
if (data.customerReviewAverage) {
|
|
35
|
+
lines.push(`**Rating:** ${data.customerReviewAverage}/5 (${data.customerReviewCount} reviews)`);
|
|
36
|
+
}
|
|
37
|
+
lines.push(`**Availability:** ${data.onlineAvailability ? 'In Stock Online' : 'Out of Stock Online'} | ${data.inStoreAvailability ? 'Available In Store' : 'Not Available In Store'}`);
|
|
38
|
+
if (data.freeShipping)
|
|
39
|
+
lines.push('**Free Shipping:** Yes');
|
|
40
|
+
lines.push('');
|
|
41
|
+
if (data.shortDescription)
|
|
42
|
+
lines.push(data.shortDescription);
|
|
43
|
+
lines.push('');
|
|
44
|
+
if (data.longDescription)
|
|
45
|
+
lines.push(data.longDescription);
|
|
46
|
+
if (data.features?.feature) {
|
|
47
|
+
lines.push('');
|
|
48
|
+
lines.push('## Features');
|
|
49
|
+
for (const f of data.features.feature) {
|
|
50
|
+
lines.push(`- ${f}`);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
const structured = {
|
|
54
|
+
sku: data.sku,
|
|
55
|
+
name: data.name,
|
|
56
|
+
price: data.salePrice || data.regularPrice,
|
|
57
|
+
regularPrice: data.regularPrice,
|
|
58
|
+
onSale: data.onSale,
|
|
59
|
+
brand: data.manufacturer,
|
|
60
|
+
model: data.modelNumber,
|
|
61
|
+
upc: data.upc,
|
|
62
|
+
rating: data.customerReviewAverage,
|
|
63
|
+
reviewCount: data.customerReviewCount,
|
|
64
|
+
image: data.largeFrontImage || data.image,
|
|
65
|
+
url: data.url,
|
|
66
|
+
inStock: data.onlineAvailability,
|
|
67
|
+
freeShipping: data.freeShipping,
|
|
68
|
+
condition: data.condition,
|
|
69
|
+
category: data.categoryPath?.map((c) => c.name).join(' > '),
|
|
70
|
+
};
|
|
71
|
+
return { domain: 'bestbuy.com', type: 'product', structured, cleanContent: lines.join('\n') };
|
|
72
|
+
}
|
|
73
|
+
catch (e) {
|
|
74
|
+
if (process.env.DEBUG)
|
|
75
|
+
console.debug('[webpeel]', 'Best Buy API failed:', e instanceof Error ? e.message : e);
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
}
|