@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { fetchJson } from './shared.js';
|
|
2
|
+
// ---------------------------------------------------------------------------
|
|
3
|
+
// 18. PyPI extractor
|
|
4
|
+
// ---------------------------------------------------------------------------
|
|
5
|
+
export async function pypiExtractor(_html, url) {
|
|
6
|
+
const urlObj = new URL(url);
|
|
7
|
+
const path = urlObj.pathname;
|
|
8
|
+
// Match /project/name or /project/name/version/
|
|
9
|
+
const packageMatch = path.match(/\/project\/([^/]+)/);
|
|
10
|
+
if (!packageMatch)
|
|
11
|
+
return null;
|
|
12
|
+
const packageName = packageMatch[1];
|
|
13
|
+
try {
|
|
14
|
+
const apiUrl = `https://pypi.org/pypi/${encodeURIComponent(packageName)}/json`;
|
|
15
|
+
const data = await fetchJson(apiUrl);
|
|
16
|
+
if (!data?.info)
|
|
17
|
+
return null;
|
|
18
|
+
const info = data.info;
|
|
19
|
+
const structured = {
|
|
20
|
+
title: `${info.name} ${info.version}`,
|
|
21
|
+
name: info.name,
|
|
22
|
+
version: info.version,
|
|
23
|
+
description: info.summary || '',
|
|
24
|
+
author: info.author || '',
|
|
25
|
+
authorEmail: info.author_email || '',
|
|
26
|
+
license: info.license || 'N/A',
|
|
27
|
+
homepage: info.home_page || info.project_url || null,
|
|
28
|
+
projectUrls: info.project_urls || {},
|
|
29
|
+
keywords: info.keywords ? info.keywords.split(/[,\s]+/).filter(Boolean) : [],
|
|
30
|
+
requiresPython: info.requires_python || '',
|
|
31
|
+
requiresDist: (info.requires_dist || []).slice(0, 20),
|
|
32
|
+
classifiers: (info.classifiers || []).slice(0, 10),
|
|
33
|
+
};
|
|
34
|
+
// Full description/README from PyPI (info.description is the full README in markdown)
|
|
35
|
+
const fullDescription = info.description && info.description.length > 100 &&
|
|
36
|
+
info.description !== 'UNKNOWN' && info.description !== info.summary
|
|
37
|
+
? info.description.slice(0, 8000)
|
|
38
|
+
: null;
|
|
39
|
+
// Store full description in structured
|
|
40
|
+
structured.fullDescription = fullDescription;
|
|
41
|
+
const installCmd = `pip install ${info.name}`;
|
|
42
|
+
const keywordsLine = structured.keywords.length ? `\n**Keywords:** ${structured.keywords.join(', ')}` : '';
|
|
43
|
+
const pyVersionLine = structured.requiresPython ? `\n**Requires Python:** ${structured.requiresPython}` : '';
|
|
44
|
+
// Show all dependencies
|
|
45
|
+
const depsLine = structured.requiresDist.length
|
|
46
|
+
? `\n\n## Dependencies\n\n${structured.requiresDist.map((d) => `- ${d}`).join('\n')}`
|
|
47
|
+
: '';
|
|
48
|
+
// Classifiers — extract useful ones (license, status, Python versions)
|
|
49
|
+
const usefulClassifiers = structured.classifiers.filter((c) => c.startsWith('Programming Language') || c.startsWith('License') || c.startsWith('Development Status'));
|
|
50
|
+
const classifiersSection = usefulClassifiers.length
|
|
51
|
+
? `\n\n## Classifiers\n\n${usefulClassifiers.map((c) => `- ${c}`).join('\n')}`
|
|
52
|
+
: '';
|
|
53
|
+
// Find project URLs
|
|
54
|
+
const projectUrlLines = [];
|
|
55
|
+
for (const [label, u] of Object.entries(structured.projectUrls)) {
|
|
56
|
+
projectUrlLines.push(`- **${label}:** ${u}`);
|
|
57
|
+
}
|
|
58
|
+
// Full description section (package README from PyPI)
|
|
59
|
+
const descSection = fullDescription
|
|
60
|
+
? `\n\n## Description\n\n${fullDescription}`
|
|
61
|
+
: '';
|
|
62
|
+
const cleanContent = `# 📦 ${info.name} ${info.version}
|
|
63
|
+
|
|
64
|
+
${info.summary || ''}
|
|
65
|
+
|
|
66
|
+
\`\`\`
|
|
67
|
+
${installCmd}
|
|
68
|
+
\`\`\`
|
|
69
|
+
|
|
70
|
+
**Author:** ${info.author || 'N/A'} | **License:** ${info.license || 'N/A'}${keywordsLine}${pyVersionLine}
|
|
71
|
+
|
|
72
|
+
${projectUrlLines.length ? `## Links\n\n${projectUrlLines.join('\n')}\n` : ''}${depsLine}${classifiersSection}${descSection}`;
|
|
73
|
+
return { domain: 'pypi.org', type: 'package', structured, cleanContent };
|
|
74
|
+
}
|
|
75
|
+
catch (e) {
|
|
76
|
+
if (process.env.DEBUG)
|
|
77
|
+
console.debug('[webpeel]', 'PyPI API failed:', e instanceof Error ? e.message : e);
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
import { unixToIso, fetchJsonWithRetry } from './shared.js';
|
|
2
|
+
async function resolveRedditShareUrl(url) {
|
|
3
|
+
const urlObj = new URL(url);
|
|
4
|
+
// Match /r/subreddit/s/CODE or /s/CODE patterns
|
|
5
|
+
if (!urlObj.pathname.includes('/s/'))
|
|
6
|
+
return url;
|
|
7
|
+
try {
|
|
8
|
+
const { default: https } = await import('https');
|
|
9
|
+
const { default: http } = await import('http');
|
|
10
|
+
return new Promise((resolve) => {
|
|
11
|
+
const client = url.startsWith('https') ? https : http;
|
|
12
|
+
const req = client.get(url, {
|
|
13
|
+
headers: { 'User-Agent': 'WebPeel/0.17.1 (web data platform; https://webpeel.dev) Node.js' },
|
|
14
|
+
timeout: 10000,
|
|
15
|
+
}, (res) => {
|
|
16
|
+
// Follow redirect (one hop)
|
|
17
|
+
if (res.statusCode && res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
18
|
+
const redirectUrl = res.headers.location.startsWith('http')
|
|
19
|
+
? res.headers.location
|
|
20
|
+
: new URL(res.headers.location, url).href;
|
|
21
|
+
resolve(redirectUrl);
|
|
22
|
+
}
|
|
23
|
+
else {
|
|
24
|
+
resolve(url); // No redirect, return original
|
|
25
|
+
}
|
|
26
|
+
res.resume(); // Consume response
|
|
27
|
+
});
|
|
28
|
+
req.on('error', () => resolve(url));
|
|
29
|
+
req.on('timeout', () => {
|
|
30
|
+
req.destroy();
|
|
31
|
+
resolve(url);
|
|
32
|
+
});
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
catch {
|
|
36
|
+
return url; // On any error, return original URL
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
function parseRedditComment(data, depth) {
|
|
40
|
+
if (!data || data.kind === 'more')
|
|
41
|
+
return null;
|
|
42
|
+
const d = data.kind === 't1' ? data.data : data;
|
|
43
|
+
if (!d || !d.body)
|
|
44
|
+
return null;
|
|
45
|
+
const replies = [];
|
|
46
|
+
if (depth > 0 && d.replies && d.replies.data?.children) {
|
|
47
|
+
for (const child of d.replies.data.children) {
|
|
48
|
+
const c = parseRedditComment(child, depth - 1);
|
|
49
|
+
if (c)
|
|
50
|
+
replies.push(c);
|
|
51
|
+
}
|
|
52
|
+
// Sort replies by score
|
|
53
|
+
replies.sort((a, b) => b.score - a.score);
|
|
54
|
+
replies.splice(3); // max 3 replies per level
|
|
55
|
+
}
|
|
56
|
+
return {
|
|
57
|
+
author: `u/${d.author || '[deleted]'}`,
|
|
58
|
+
text: d.body || '',
|
|
59
|
+
score: d.score || 0,
|
|
60
|
+
replies,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
export async function redditExtractor(_html, url) {
|
|
64
|
+
// Resolve Reddit share URLs (/s/CODE) to actual post URLs before any processing
|
|
65
|
+
let workingUrl = url;
|
|
66
|
+
if (url.includes('/s/')) {
|
|
67
|
+
const resolved = await resolveRedditShareUrl(url);
|
|
68
|
+
if (resolved !== url) {
|
|
69
|
+
if (process.env.DEBUG)
|
|
70
|
+
console.debug('[webpeel]', `Reddit share URL resolved: ${url} → ${resolved}`);
|
|
71
|
+
workingUrl = resolved;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
const urlObj = new URL(workingUrl);
|
|
75
|
+
const path = urlObj.pathname;
|
|
76
|
+
const domain = 'reddit.com';
|
|
77
|
+
// Normalize old.reddit.com → www.reddit.com for JSON API
|
|
78
|
+
const normalizedUrl = workingUrl.replace(/old\.reddit\.com/, 'www.reddit.com');
|
|
79
|
+
const REDDIT_UA = { 'User-Agent': 'WebPeel/0.17.1 (web data platform; https://webpeel.dev) Node.js' };
|
|
80
|
+
// Detect page type
|
|
81
|
+
const isPost = /\/r\/[^/]+\/comments\//.test(path) || /^\/comments\//.test(path);
|
|
82
|
+
const isGallery = /\/gallery\//.test(path);
|
|
83
|
+
// Subreddit with any sort/filter: /r/sub, /r/sub/, /r/sub/hot, /r/sub/top, /r/sub/new, /r/sub/rising
|
|
84
|
+
const isSubreddit = /^\/r\/[^/]+\/?$/.test(path) || /^\/r\/[^/]+\/(hot|new|top|rising|controversial|best)\/?$/.test(path);
|
|
85
|
+
const isUser = /^\/(u|user)\/[^/]+/.test(path);
|
|
86
|
+
const isSearch = /\/search\/?/.test(path);
|
|
87
|
+
// Home/popular/all pages
|
|
88
|
+
const isHomeListing = /^\/(hot|new|top|rising|controversial|best|popular|all)\/?$/.test(path) || path === '/' || path === '';
|
|
89
|
+
const type = isPost || isGallery ? 'post' : isSearch ? 'search' : isSubreddit ? 'subreddit' : isUser ? 'user' : isHomeListing ? 'listing' : 'listing';
|
|
90
|
+
if (isGallery) {
|
|
91
|
+
// Gallery posts: fetch the gallery JSON and extract the post data
|
|
92
|
+
const galleryJsonUrl = normalizedUrl.split('?')[0].replace(/\/?$/, '') + '.json?limit=25&sort=top';
|
|
93
|
+
const requestedGallerySub = path.match(/\/r\/([^/]+)/)?.[1] || 'unknown';
|
|
94
|
+
let galleryData;
|
|
95
|
+
try {
|
|
96
|
+
galleryData = await fetchJsonWithRetry(galleryJsonUrl, REDDIT_UA);
|
|
97
|
+
}
|
|
98
|
+
catch (e) {
|
|
99
|
+
return {
|
|
100
|
+
domain,
|
|
101
|
+
type: 'post',
|
|
102
|
+
structured: { error: 'Post not found or has been deleted', subreddit: `r/${requestedGallerySub}` },
|
|
103
|
+
cleanContent: `## ❌ Reddit Post Not Found\n\nThe post at r/${requestedGallerySub} could not be found. It may have been deleted or removed.`,
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
if (!Array.isArray(galleryData) || galleryData.length < 1) {
|
|
107
|
+
return {
|
|
108
|
+
domain,
|
|
109
|
+
type: 'post',
|
|
110
|
+
structured: { error: 'Post not found', subreddit: `r/${requestedGallerySub}` },
|
|
111
|
+
cleanContent: `## ❌ Reddit Post Not Found\n\nThe post at r/${requestedGallerySub} could not be found. It may have been deleted or removed.`,
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
const postData = galleryData[0]?.data?.children?.[0]?.data;
|
|
115
|
+
if (!postData) {
|
|
116
|
+
return {
|
|
117
|
+
domain,
|
|
118
|
+
type: 'post',
|
|
119
|
+
structured: { error: 'Post not found', subreddit: `r/${requestedGallerySub}` },
|
|
120
|
+
cleanContent: `## ❌ Reddit Post Not Found\n\nThe post at r/${requestedGallerySub} could not be found. It may have been deleted or removed.`,
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
// Validate subreddit matches the request
|
|
124
|
+
const actualGallerySub = postData.subreddit?.toLowerCase();
|
|
125
|
+
if (requestedGallerySub !== 'unknown' && actualGallerySub && requestedGallerySub.toLowerCase() !== actualGallerySub) {
|
|
126
|
+
return {
|
|
127
|
+
domain,
|
|
128
|
+
type: 'post',
|
|
129
|
+
structured: { error: 'Post not found in requested subreddit', requestedSubreddit: `r/${requestedGallerySub}`, actualSubreddit: `r/${actualGallerySub}` },
|
|
130
|
+
cleanContent: `## ❌ Reddit Post Not Found\n\nThe post was not found in r/${requestedGallerySub}. It may have been deleted or moved.`,
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
const structured = {
|
|
134
|
+
subreddit: `r/${postData.subreddit || ''}`,
|
|
135
|
+
title: postData.title || '',
|
|
136
|
+
author: `u/${postData.author || '[deleted]'}`,
|
|
137
|
+
score: postData.score ?? 0,
|
|
138
|
+
upvoteRatio: postData.upvote_ratio ?? 1,
|
|
139
|
+
url: postData.url || url,
|
|
140
|
+
selftext: postData.selftext || '',
|
|
141
|
+
commentCount: postData.num_comments ?? 0,
|
|
142
|
+
created: unixToIso(postData.created_utc),
|
|
143
|
+
flair: postData.link_flair_text || null,
|
|
144
|
+
comments: [],
|
|
145
|
+
isGallery: true,
|
|
146
|
+
};
|
|
147
|
+
const cleanContent = `## 📋 ${structured.subreddit}: ${structured.title}
|
|
148
|
+
|
|
149
|
+
**Posted by** ${structured.author} | Score: ${structured.score} | ${structured.commentCount} comments
|
|
150
|
+
*${structured.created}*
|
|
151
|
+
|
|
152
|
+
*(Gallery post)*`;
|
|
153
|
+
return { domain, type: 'post', structured, cleanContent };
|
|
154
|
+
}
|
|
155
|
+
if (isPost) {
|
|
156
|
+
// Fetch post data via Reddit JSON API
|
|
157
|
+
const jsonUrl = normalizedUrl.split('?')[0].replace(/\/?$/, '') + '.json?limit=25&sort=top';
|
|
158
|
+
const requestedPostSub = path.match(/\/r\/([^/]+)/)?.[1] || 'unknown';
|
|
159
|
+
let data;
|
|
160
|
+
try {
|
|
161
|
+
data = await fetchJsonWithRetry(jsonUrl, REDDIT_UA);
|
|
162
|
+
}
|
|
163
|
+
catch (e) {
|
|
164
|
+
// Post not found or API error — return a "not found" result
|
|
165
|
+
// instead of null (which would trigger browser fallback with wrong content)
|
|
166
|
+
return {
|
|
167
|
+
domain,
|
|
168
|
+
type: 'post',
|
|
169
|
+
structured: { error: 'Post not found or has been deleted', subreddit: `r/${requestedPostSub}` },
|
|
170
|
+
cleanContent: `## ❌ Reddit Post Not Found\n\nThe post at r/${requestedPostSub} could not be found. It may have been deleted or removed.`,
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
if (!Array.isArray(data) || data.length < 2) {
|
|
174
|
+
return {
|
|
175
|
+
domain,
|
|
176
|
+
type: 'post',
|
|
177
|
+
structured: { error: 'Post not found', subreddit: `r/${requestedPostSub}` },
|
|
178
|
+
cleanContent: `## ❌ Reddit Post Not Found\n\nThe post at r/${requestedPostSub} could not be found. It may have been deleted or removed.`,
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
const postData = data[0]?.data?.children?.[0]?.data;
|
|
182
|
+
if (!postData) {
|
|
183
|
+
return {
|
|
184
|
+
domain,
|
|
185
|
+
type: 'post',
|
|
186
|
+
structured: { error: 'Post not found', subreddit: `r/${requestedPostSub}` },
|
|
187
|
+
cleanContent: `## ❌ Reddit Post Not Found\n\nThe post at r/${requestedPostSub} could not be found. It may have been deleted or removed.`,
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
// CRITICAL: Validate subreddit matches the request (prevents cross-subreddit ID reuse exploits)
|
|
191
|
+
const actualPostSub = postData.subreddit?.toLowerCase();
|
|
192
|
+
if (requestedPostSub !== 'unknown' && actualPostSub && requestedPostSub.toLowerCase() !== actualPostSub) {
|
|
193
|
+
// Reddit reused the post ID in a different subreddit — return error instead of wrong content
|
|
194
|
+
return {
|
|
195
|
+
domain,
|
|
196
|
+
type: 'post',
|
|
197
|
+
structured: { error: 'Post not found in requested subreddit', requestedSubreddit: `r/${requestedPostSub}`, actualSubreddit: `r/${actualPostSub}` },
|
|
198
|
+
cleanContent: `## ❌ Reddit Post Not Found\n\nThe post was not found in r/${requestedPostSub}. It may have been deleted or moved.`,
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
// Parse top comments (max 20)
|
|
202
|
+
const commentChildren = data[1]?.data?.children || [];
|
|
203
|
+
const comments = [];
|
|
204
|
+
for (const child of commentChildren) {
|
|
205
|
+
const c = parseRedditComment(child, 3);
|
|
206
|
+
if (c)
|
|
207
|
+
comments.push(c);
|
|
208
|
+
if (comments.length >= 20)
|
|
209
|
+
break;
|
|
210
|
+
}
|
|
211
|
+
comments.sort((a, b) => b.score - a.score);
|
|
212
|
+
const structured = {
|
|
213
|
+
subreddit: `r/${postData.subreddit}`,
|
|
214
|
+
title: postData.title || '',
|
|
215
|
+
author: `u/${postData.author || '[deleted]'}`,
|
|
216
|
+
score: postData.score ?? 0,
|
|
217
|
+
upvoteRatio: postData.upvote_ratio ?? 1,
|
|
218
|
+
url: postData.url || url,
|
|
219
|
+
selftext: postData.selftext || '',
|
|
220
|
+
commentCount: postData.num_comments ?? 0,
|
|
221
|
+
created: unixToIso(postData.created_utc),
|
|
222
|
+
flair: postData.link_flair_text || null,
|
|
223
|
+
comments,
|
|
224
|
+
};
|
|
225
|
+
// Build clean markdown
|
|
226
|
+
const commentsMd = comments.slice(0, 10).map(c => {
|
|
227
|
+
const repliesMd = c.replies.slice(0, 2).map(r => ` > **${r.author}** (${r.score}): ${r.text.slice(0, 200)}`).join('\n');
|
|
228
|
+
return `**${c.author}** (score: ${c.score})\n${c.text.slice(0, 300)}${repliesMd ? '\n' + repliesMd : ''}`;
|
|
229
|
+
}).join('\n\n---\n\n');
|
|
230
|
+
const selftextSection = structured.selftext
|
|
231
|
+
? `\n\n${structured.selftext.slice(0, 1000)}`
|
|
232
|
+
: '';
|
|
233
|
+
const cleanContent = `## 📋 ${structured.subreddit}: ${structured.title}
|
|
234
|
+
|
|
235
|
+
**Posted by** ${structured.author} | Score: ${structured.score} (${Math.round(structured.upvoteRatio * 100)}% upvoted) | ${structured.commentCount} comments
|
|
236
|
+
${structured.flair ? `**Flair:** ${structured.flair}` : ''}
|
|
237
|
+
*${structured.created}*${selftextSection}
|
|
238
|
+
|
|
239
|
+
---
|
|
240
|
+
|
|
241
|
+
### Top Comments
|
|
242
|
+
|
|
243
|
+
${commentsMd || '*No comments found.*'}`;
|
|
244
|
+
return { domain, type, structured, cleanContent };
|
|
245
|
+
}
|
|
246
|
+
if (isSearch) {
|
|
247
|
+
const subredditMatch = path.match(/\/r\/([^/]+)\/search/);
|
|
248
|
+
const subredditName = subredditMatch ? subredditMatch[1] : null;
|
|
249
|
+
// Extract search params from URL
|
|
250
|
+
const q = urlObj.searchParams.get('q') || '';
|
|
251
|
+
const sort = urlObj.searchParams.get('sort') || 'relevance';
|
|
252
|
+
const t = urlObj.searchParams.get('t') || 'all';
|
|
253
|
+
const after = urlObj.searchParams.get('after') || '';
|
|
254
|
+
const searchType = urlObj.searchParams.get('type') || '';
|
|
255
|
+
if (!q) {
|
|
256
|
+
return {
|
|
257
|
+
domain,
|
|
258
|
+
type: 'search',
|
|
259
|
+
structured: { error: 'No search query provided' },
|
|
260
|
+
cleanContent: '## ❌ No Search Query\n\nProvide a search query: /r/subreddit/search?q=your+query',
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
// Build JSON search URL
|
|
264
|
+
const searchParams = new URLSearchParams({
|
|
265
|
+
q,
|
|
266
|
+
sort,
|
|
267
|
+
t,
|
|
268
|
+
limit: '25',
|
|
269
|
+
raw_json: '1',
|
|
270
|
+
});
|
|
271
|
+
if (subredditName)
|
|
272
|
+
searchParams.set('restrict_sr', 'on');
|
|
273
|
+
if (after)
|
|
274
|
+
searchParams.set('after', after);
|
|
275
|
+
if (searchType)
|
|
276
|
+
searchParams.set('type', searchType);
|
|
277
|
+
const jsonUrl = subredditName
|
|
278
|
+
? `https://www.reddit.com/r/${subredditName}/search.json?${searchParams}`
|
|
279
|
+
: `https://www.reddit.com/search.json?${searchParams}`;
|
|
280
|
+
let data;
|
|
281
|
+
try {
|
|
282
|
+
data = await fetchJsonWithRetry(jsonUrl, REDDIT_UA);
|
|
283
|
+
}
|
|
284
|
+
catch {
|
|
285
|
+
return {
|
|
286
|
+
domain,
|
|
287
|
+
type: 'search',
|
|
288
|
+
structured: { error: 'Search failed', query: q },
|
|
289
|
+
cleanContent: `## ❌ Reddit Search Failed\n\nCould not search for "${q}". Reddit may be rate-limiting.`,
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
// Handle comment search (type=comment returns t1 children)
|
|
293
|
+
if (searchType === 'comment') {
|
|
294
|
+
if (!data?.data?.children) {
|
|
295
|
+
return {
|
|
296
|
+
domain,
|
|
297
|
+
type: 'search',
|
|
298
|
+
structured: { query: q, comments: [], resultCount: 0 },
|
|
299
|
+
cleanContent: `## 🔍 Reddit Comment Search: "${q}"\n\nNo results found.`,
|
|
300
|
+
};
|
|
301
|
+
}
|
|
302
|
+
const comments = data.data.children
|
|
303
|
+
.filter((c) => c.kind === 't1')
|
|
304
|
+
.map((c) => parseRedditComment(c, 0))
|
|
305
|
+
.filter(Boolean);
|
|
306
|
+
const afterCursor = data.data.after || null;
|
|
307
|
+
const scope = subredditName ? `r/${subredditName}` : 'all of Reddit';
|
|
308
|
+
const structured = {
|
|
309
|
+
query: q,
|
|
310
|
+
scope,
|
|
311
|
+
sort,
|
|
312
|
+
timeFilter: t,
|
|
313
|
+
resultCount: comments.length,
|
|
314
|
+
comments,
|
|
315
|
+
after: afterCursor,
|
|
316
|
+
hasMore: !!afterCursor,
|
|
317
|
+
};
|
|
318
|
+
const commentsMd = comments.map((c, i) => `### ${i + 1}. **${c.author}** (score: ${c.score})\n${c.text.slice(0, 400)}${c.text.length > 400 ? '...' : ''}`).join('\n\n---\n\n');
|
|
319
|
+
const paginationNote = afterCursor
|
|
320
|
+
? `\n\n*Page has more results. Add \`&after=${afterCursor}\` to get the next page.*`
|
|
321
|
+
: '';
|
|
322
|
+
const cleanContent = `## 🔍 Reddit Comment Search: "${q}" in ${scope}\n*Sorted by ${sort} | Time: ${t} | ${comments.length} results*\n\n${commentsMd}${paginationNote}`;
|
|
323
|
+
return { domain, type: 'search', structured, cleanContent };
|
|
324
|
+
}
|
|
325
|
+
if (!data?.data?.children) {
|
|
326
|
+
return {
|
|
327
|
+
domain,
|
|
328
|
+
type: 'search',
|
|
329
|
+
structured: { query: q, posts: [], resultCount: 0 },
|
|
330
|
+
cleanContent: `## 🔍 Reddit Search: "${q}"\n\nNo results found.`,
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
const posts = data.data.children
|
|
334
|
+
.filter((c) => c.kind === 't3')
|
|
335
|
+
.map((c) => {
|
|
336
|
+
const d = c.data;
|
|
337
|
+
return {
|
|
338
|
+
title: d.title || '',
|
|
339
|
+
author: `u/${d.author || '[deleted]'}`,
|
|
340
|
+
score: d.score ?? 0,
|
|
341
|
+
commentCount: d.num_comments ?? 0,
|
|
342
|
+
selftext: d.selftext || '',
|
|
343
|
+
subreddit: `r/${d.subreddit || ''}`,
|
|
344
|
+
url: `https://reddit.com${d.permalink}`,
|
|
345
|
+
created: unixToIso(d.created_utc),
|
|
346
|
+
flair: d.link_flair_text || null,
|
|
347
|
+
isNsfw: d.over_18 || false,
|
|
348
|
+
};
|
|
349
|
+
});
|
|
350
|
+
const afterCursor = data.data.after || null;
|
|
351
|
+
const scope = subredditName ? `r/${subredditName}` : 'all of Reddit';
|
|
352
|
+
const structured = {
|
|
353
|
+
query: q,
|
|
354
|
+
scope,
|
|
355
|
+
sort,
|
|
356
|
+
timeFilter: t,
|
|
357
|
+
resultCount: posts.length,
|
|
358
|
+
posts,
|
|
359
|
+
after: afterCursor, // pagination cursor
|
|
360
|
+
hasMore: !!afterCursor,
|
|
361
|
+
};
|
|
362
|
+
// Build clean markdown with full post text (not just snippets!)
|
|
363
|
+
const postsMd = posts.map((p, i) => {
|
|
364
|
+
const selftext = p.selftext
|
|
365
|
+
? `\n${p.selftext.slice(0, 500)}${p.selftext.length > 500 ? '...' : ''}`
|
|
366
|
+
: '';
|
|
367
|
+
return `### ${i + 1}. ${p.title}\n**${p.author}** in ${p.subreddit} | ↑ ${p.score} | 💬 ${p.commentCount} comments${p.flair ? ` | ${p.flair}` : ''}\n*${p.created}*${selftext}\n[Read full thread →](${p.url})`;
|
|
368
|
+
}).join('\n\n---\n\n');
|
|
369
|
+
const paginationNote = afterCursor
|
|
370
|
+
? `\n\n*Page has more results. Add \`&after=${afterCursor}\` to get the next page.*`
|
|
371
|
+
: '';
|
|
372
|
+
const cleanContent = `## 🔍 Reddit Search: "${q}" in ${scope}\n*Sorted by ${sort} | Time: ${t} | ${posts.length} results*\n\n${postsMd}${paginationNote}`;
|
|
373
|
+
return { domain, type: 'search', structured, cleanContent };
|
|
374
|
+
}
|
|
375
|
+
if (isSubreddit) {
|
|
376
|
+
// Fetch subreddit listing
|
|
377
|
+
// Preserve query params (especially t=day, t=week etc. for sorted views)
|
|
378
|
+
const queryString = urlObj.search || '';
|
|
379
|
+
const sortMatch = path.match(/\/r\/[^/]+\/(hot|new|top|rising|controversial|best)/);
|
|
380
|
+
const sortPath = sortMatch ? `/${sortMatch[1]}` : '';
|
|
381
|
+
const baseSubUrl = normalizedUrl.match(/\/r\/[^/]+/)?.[0] || normalizedUrl.split('?')[0];
|
|
382
|
+
const jsonUrl = `https://www.reddit.com${baseSubUrl}${sortPath}.json?limit=15${queryString ? '&' + queryString.slice(1) : ''}`;
|
|
383
|
+
const data = await fetchJsonWithRetry(jsonUrl, REDDIT_UA);
|
|
384
|
+
if (!data?.data?.children)
|
|
385
|
+
return null;
|
|
386
|
+
const posts = data.data.children
|
|
387
|
+
.filter((c) => c.kind === 't3')
|
|
388
|
+
.map((c) => {
|
|
389
|
+
const d = c.data;
|
|
390
|
+
return {
|
|
391
|
+
title: d.title || '',
|
|
392
|
+
author: `u/${d.author || '[deleted]'}`,
|
|
393
|
+
score: d.score ?? 0,
|
|
394
|
+
commentCount: d.num_comments ?? 0,
|
|
395
|
+
url: `https://reddit.com${d.permalink}`,
|
|
396
|
+
flair: d.link_flair_text || null,
|
|
397
|
+
};
|
|
398
|
+
});
|
|
399
|
+
const subredditName = posts[0]?.url?.match(/\/r\/([^/]+)\//)?.[1] || path.match(/\/r\/([^/]+)/)?.[1] || '';
|
|
400
|
+
const structured = { title: `r/${subredditName} — Top Posts`, subreddit: `r/${subredditName}`, posts };
|
|
401
|
+
const cleanContent = `## 📋 r/${subredditName} — Hot Posts
|
|
402
|
+
|
|
403
|
+
${posts.map((p, i) => `${i + 1}. **${p.title}**\n ${p.author} | ↑ ${p.score} | 💬 ${p.commentCount}${p.flair ? ` | ${p.flair}` : ''}\n ${p.url}`).join('\n\n')}`;
|
|
404
|
+
return { domain, type, structured, cleanContent };
|
|
405
|
+
}
|
|
406
|
+
if (isHomeListing) {
|
|
407
|
+
const sortMatch = path.match(/\/(hot|new|top|rising|controversial|best|popular|all)/);
|
|
408
|
+
const sortType = sortMatch ? sortMatch[1] : 'hot';
|
|
409
|
+
const queryString = urlObj.search || '';
|
|
410
|
+
const jsonUrl = `https://www.reddit.com/${sortType}.json?limit=15${queryString ? '&' + queryString.slice(1) : ''}`;
|
|
411
|
+
const data = await fetchJsonWithRetry(jsonUrl, REDDIT_UA);
|
|
412
|
+
if (!data?.data?.children)
|
|
413
|
+
return null;
|
|
414
|
+
const posts = data.data.children
|
|
415
|
+
.filter((c) => c.kind === 't3')
|
|
416
|
+
.map((c) => {
|
|
417
|
+
const d = c.data;
|
|
418
|
+
return {
|
|
419
|
+
title: d.title || '',
|
|
420
|
+
author: `u/${d.author || '[deleted]'}`,
|
|
421
|
+
score: d.score ?? 0,
|
|
422
|
+
commentCount: d.num_comments ?? 0,
|
|
423
|
+
url: `https://reddit.com${d.permalink}`,
|
|
424
|
+
subreddit: `r/${d.subreddit}`,
|
|
425
|
+
flair: d.link_flair_text || null,
|
|
426
|
+
};
|
|
427
|
+
});
|
|
428
|
+
const structured = { title: `Reddit — ${sortType.charAt(0).toUpperCase() + sortType.slice(1)} Posts`, sortType, posts, postCount: posts.length };
|
|
429
|
+
const listMd = posts.map((p, i) => {
|
|
430
|
+
const flairTag = p.flair ? ` | ${p.flair}` : '';
|
|
431
|
+
return `${i + 1}. **${p.title}**\n ${p.author} in ${p.subreddit} | ↑ ${p.score} | 💬 ${p.commentCount}${flairTag}\n ${p.url}`;
|
|
432
|
+
}).join('\n\n');
|
|
433
|
+
const cleanContent = `## 📋 Reddit — ${sortType.charAt(0).toUpperCase() + sortType.slice(1)} Posts\n\n${listMd}`;
|
|
434
|
+
return { domain: 'reddit.com', type: 'listing', structured, cleanContent };
|
|
435
|
+
}
|
|
436
|
+
// User or other — fall back to null (let normal HTML extraction handle it)
|
|
437
|
+
return null;
|
|
438
|
+
}
|