@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rate Governor — controls application rate to stay within safe limits.
|
|
3
|
+
*
|
|
4
|
+
* Prevents over-application and bot-detection triggers by:
|
|
5
|
+
* - Capping daily application count
|
|
6
|
+
* - Enforcing minimum/maximum delays between applications
|
|
7
|
+
* - Restricting activity to configured active hours
|
|
8
|
+
* - Supporting optional weekday-only mode
|
|
9
|
+
* - Entering cooldown after a CAPTCHA / warning signal is detected
|
|
10
|
+
*
|
|
11
|
+
* State is persisted to ~/.webpeel/rate-state.json.
|
|
12
|
+
*/
|
|
13
|
+
export interface RateConfig {
|
|
14
|
+
/** Max applications per day. Default: 5 */
|
|
15
|
+
maxPerDay: number;
|
|
16
|
+
/** Min delay between applications in ms. Default: 900000 (15 min) */
|
|
17
|
+
minDelayMs: number;
|
|
18
|
+
/** Max delay between applications in ms. Default: 2700000 (45 min) */
|
|
19
|
+
maxDelayMs: number;
|
|
20
|
+
/** Active hours range [start, end] in 24h format. Default: [9, 18] */
|
|
21
|
+
activeHours: [number, number];
|
|
22
|
+
/** Only apply on weekdays. Default: true */
|
|
23
|
+
weekdaysOnly: boolean;
|
|
24
|
+
/** Cooldown period after CAPTCHA/warning detection in ms. Default: 172800000 (48h) */
|
|
25
|
+
warningCooldownMs: number;
|
|
26
|
+
}
|
|
27
|
+
export interface RateState {
|
|
28
|
+
/** Applications submitted today (resets at midnight) */
|
|
29
|
+
todayCount: number;
|
|
30
|
+
/** Date string (YYYY-MM-DD) for today tracking */
|
|
31
|
+
todayDate: string;
|
|
32
|
+
/** Timestamp of last application */
|
|
33
|
+
lastApplyTimestamp: number;
|
|
34
|
+
/** Timestamp when cooldown expires (0 if no cooldown) */
|
|
35
|
+
cooldownUntil: number;
|
|
36
|
+
/** Total applications all time */
|
|
37
|
+
totalApplications: number;
|
|
38
|
+
}
|
|
39
|
+
export interface CanApplyResult {
|
|
40
|
+
allowed: boolean;
|
|
41
|
+
reason?: string;
|
|
42
|
+
/** Milliseconds to wait before trying again */
|
|
43
|
+
waitMs?: number;
|
|
44
|
+
}
|
|
45
|
+
export declare class RateGovernor {
|
|
46
|
+
private config;
|
|
47
|
+
private state;
|
|
48
|
+
constructor(config?: Partial<RateConfig>);
|
|
49
|
+
/**
|
|
50
|
+
* Check whether we are allowed to submit an application right now.
|
|
51
|
+
* Returns `{ allowed: true }` if all checks pass, otherwise returns
|
|
52
|
+
* `{ allowed: false, reason, waitMs }`.
|
|
53
|
+
*/
|
|
54
|
+
canApply(): CanApplyResult;
|
|
55
|
+
/** Record a successful application submission. */
|
|
56
|
+
recordApplication(): void;
|
|
57
|
+
/**
|
|
58
|
+
* Record a warning signal (CAPTCHA detected, rate-limit notice, etc.).
|
|
59
|
+
* Activates a cooldown period to avoid further detection.
|
|
60
|
+
*/
|
|
61
|
+
recordWarning(): void;
|
|
62
|
+
/** Get a snapshot of the current rate state. */
|
|
63
|
+
getState(): RateState;
|
|
64
|
+
/** Manually clear the active cooldown. */
|
|
65
|
+
resetCooldown(): void;
|
|
66
|
+
/**
|
|
67
|
+
* Calculate a randomised delay for the next application submission.
|
|
68
|
+
* Returns a value uniformly distributed between minDelayMs and maxDelayMs.
|
|
69
|
+
*/
|
|
70
|
+
getNextDelay(): number;
|
|
71
|
+
/** Expose resolved config (useful for CLI display). */
|
|
72
|
+
getConfig(): RateConfig;
|
|
73
|
+
/** Reset today's counter if the calendar date has changed. */
|
|
74
|
+
private maybeResetDay;
|
|
75
|
+
private loadState;
|
|
76
|
+
private freshState;
|
|
77
|
+
private saveState;
|
|
78
|
+
}
|
|
79
|
+
/** Human-readable duration string from milliseconds. */
|
|
80
|
+
export declare function formatDuration(ms: number): string;
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rate Governor — controls application rate to stay within safe limits.
|
|
3
|
+
*
|
|
4
|
+
* Prevents over-application and bot-detection triggers by:
|
|
5
|
+
* - Capping daily application count
|
|
6
|
+
* - Enforcing minimum/maximum delays between applications
|
|
7
|
+
* - Restricting activity to configured active hours
|
|
8
|
+
* - Supporting optional weekday-only mode
|
|
9
|
+
* - Entering cooldown after a CAPTCHA / warning signal is detected
|
|
10
|
+
*
|
|
11
|
+
* State is persisted to ~/.webpeel/rate-state.json.
|
|
12
|
+
*/
|
|
13
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'fs';
|
|
14
|
+
import { homedir } from 'os';
|
|
15
|
+
import { join } from 'path';
|
|
16
|
+
// ── Defaults ───────────────────────────────────────────────────────────
|
|
17
|
+
const DEFAULT_CONFIG = {
|
|
18
|
+
maxPerDay: 5,
|
|
19
|
+
minDelayMs: 900_000, // 15 minutes
|
|
20
|
+
maxDelayMs: 2_700_000, // 45 minutes
|
|
21
|
+
activeHours: [9, 18],
|
|
22
|
+
weekdaysOnly: true,
|
|
23
|
+
warningCooldownMs: 172_800_000, // 48 hours
|
|
24
|
+
};
|
|
25
|
+
const WEBPEEL_DIR = join(homedir(), '.webpeel');
|
|
26
|
+
const RATE_STATE_PATH = join(WEBPEEL_DIR, 'rate-state.json');
|
|
27
|
+
// ── Helpers ────────────────────────────────────────────────────────────
|
|
28
|
+
/** Format a Date as YYYY-MM-DD in local time */
|
|
29
|
+
function toDateString(d) {
|
|
30
|
+
const y = d.getFullYear();
|
|
31
|
+
const m = String(d.getMonth() + 1).padStart(2, '0');
|
|
32
|
+
const day = String(d.getDate()).padStart(2, '0');
|
|
33
|
+
return `${y}-${m}-${day}`;
|
|
34
|
+
}
|
|
35
|
+
/** Milliseconds until the next occurrence of a given hour (local time) */
|
|
36
|
+
function msUntilHour(hour) {
|
|
37
|
+
const now = new Date();
|
|
38
|
+
const target = new Date(now);
|
|
39
|
+
target.setHours(hour, 0, 0, 0);
|
|
40
|
+
if (target <= now) {
|
|
41
|
+
target.setDate(target.getDate() + 1);
|
|
42
|
+
}
|
|
43
|
+
return target.getTime() - now.getTime();
|
|
44
|
+
}
|
|
45
|
+
/** Milliseconds until next weekday morning (Monday 9am) */
|
|
46
|
+
function msUntilNextWeekdayMorning(startHour) {
|
|
47
|
+
const now = new Date();
|
|
48
|
+
const target = new Date(now);
|
|
49
|
+
// Step forward day by day until we hit a weekday
|
|
50
|
+
for (let i = 1; i <= 7; i++) {
|
|
51
|
+
target.setDate(now.getDate() + i);
|
|
52
|
+
target.setHours(startHour, 0, 0, 0);
|
|
53
|
+
const dow = target.getDay(); // 0=Sun, 6=Sat
|
|
54
|
+
if (dow >= 1 && dow <= 5)
|
|
55
|
+
break;
|
|
56
|
+
}
|
|
57
|
+
return target.getTime() - now.getTime();
|
|
58
|
+
}
|
|
59
|
+
// ── RateGovernor class ─────────────────────────────────────────────────
|
|
60
|
+
export class RateGovernor {
|
|
61
|
+
config;
|
|
62
|
+
state;
|
|
63
|
+
constructor(config) {
|
|
64
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
65
|
+
this.state = this.loadState();
|
|
66
|
+
this.maybeResetDay();
|
|
67
|
+
}
|
|
68
|
+
// ── Public API ───────────────────────────────────────────────────────
|
|
69
|
+
/**
|
|
70
|
+
* Check whether we are allowed to submit an application right now.
|
|
71
|
+
* Returns `{ allowed: true }` if all checks pass, otherwise returns
|
|
72
|
+
* `{ allowed: false, reason, waitMs }`.
|
|
73
|
+
*/
|
|
74
|
+
canApply() {
|
|
75
|
+
const now = Date.now();
|
|
76
|
+
const nowDate = new Date();
|
|
77
|
+
// 1. Active cooldown check
|
|
78
|
+
if (this.state.cooldownUntil > 0 && now < this.state.cooldownUntil) {
|
|
79
|
+
const waitMs = this.state.cooldownUntil - now;
|
|
80
|
+
return {
|
|
81
|
+
allowed: false,
|
|
82
|
+
reason: `Warning cooldown active — resumes in ${formatDuration(waitMs)}`,
|
|
83
|
+
waitMs,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
// 2. Weekday-only check
|
|
87
|
+
if (this.config.weekdaysOnly) {
|
|
88
|
+
const dow = nowDate.getDay(); // 0=Sun, 6=Sat
|
|
89
|
+
if (dow === 0 || dow === 6) {
|
|
90
|
+
const waitMs = msUntilNextWeekdayMorning(this.config.activeHours[0]);
|
|
91
|
+
return {
|
|
92
|
+
allowed: false,
|
|
93
|
+
reason: 'Weekday-only mode — today is a weekend',
|
|
94
|
+
waitMs,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
// 3. Active hours check
|
|
99
|
+
const [startHour, endHour] = this.config.activeHours;
|
|
100
|
+
const currentHour = nowDate.getHours();
|
|
101
|
+
if (currentHour < startHour) {
|
|
102
|
+
const waitMs = msUntilHour(startHour);
|
|
103
|
+
return {
|
|
104
|
+
allowed: false,
|
|
105
|
+
reason: `Outside active hours — active between ${startHour}:00 and ${endHour}:00`,
|
|
106
|
+
waitMs,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
if (currentHour >= endHour) {
|
|
110
|
+
// Past today's window; next window is tomorrow morning
|
|
111
|
+
const waitMs = msUntilHour(startHour) + 0; // already points to tomorrow
|
|
112
|
+
return {
|
|
113
|
+
allowed: false,
|
|
114
|
+
reason: `Outside active hours — active between ${startHour}:00 and ${endHour}:00`,
|
|
115
|
+
waitMs,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
// 4. Daily cap check
|
|
119
|
+
if (this.state.todayCount >= this.config.maxPerDay) {
|
|
120
|
+
const waitMs = msUntilHour(startHour); // Try again tomorrow
|
|
121
|
+
return {
|
|
122
|
+
allowed: false,
|
|
123
|
+
reason: `Daily limit reached (${this.config.maxPerDay} applications today)`,
|
|
124
|
+
waitMs,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
// 5. Minimum delay between applications
|
|
128
|
+
if (this.state.lastApplyTimestamp > 0) {
|
|
129
|
+
const elapsed = now - this.state.lastApplyTimestamp;
|
|
130
|
+
if (elapsed < this.config.minDelayMs) {
|
|
131
|
+
const waitMs = this.config.minDelayMs - elapsed;
|
|
132
|
+
return {
|
|
133
|
+
allowed: false,
|
|
134
|
+
reason: `Minimum delay not elapsed — please wait ${formatDuration(waitMs)}`,
|
|
135
|
+
waitMs,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return { allowed: true };
|
|
140
|
+
}
|
|
141
|
+
/** Record a successful application submission. */
|
|
142
|
+
recordApplication() {
|
|
143
|
+
this.maybeResetDay();
|
|
144
|
+
this.state.todayCount += 1;
|
|
145
|
+
this.state.lastApplyTimestamp = Date.now();
|
|
146
|
+
this.state.totalApplications += 1;
|
|
147
|
+
this.saveState();
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Record a warning signal (CAPTCHA detected, rate-limit notice, etc.).
|
|
151
|
+
* Activates a cooldown period to avoid further detection.
|
|
152
|
+
*/
|
|
153
|
+
recordWarning() {
|
|
154
|
+
this.state.cooldownUntil = Date.now() + this.config.warningCooldownMs;
|
|
155
|
+
this.saveState();
|
|
156
|
+
}
|
|
157
|
+
/** Get a snapshot of the current rate state. */
|
|
158
|
+
getState() {
|
|
159
|
+
this.maybeResetDay();
|
|
160
|
+
return { ...this.state };
|
|
161
|
+
}
|
|
162
|
+
/** Manually clear the active cooldown. */
|
|
163
|
+
resetCooldown() {
|
|
164
|
+
this.state.cooldownUntil = 0;
|
|
165
|
+
this.saveState();
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Calculate a randomised delay for the next application submission.
|
|
169
|
+
* Returns a value uniformly distributed between minDelayMs and maxDelayMs.
|
|
170
|
+
*/
|
|
171
|
+
getNextDelay() {
|
|
172
|
+
const range = this.config.maxDelayMs - this.config.minDelayMs;
|
|
173
|
+
return Math.floor(this.config.minDelayMs + Math.random() * range);
|
|
174
|
+
}
|
|
175
|
+
/** Expose resolved config (useful for CLI display). */
|
|
176
|
+
getConfig() {
|
|
177
|
+
return { ...this.config };
|
|
178
|
+
}
|
|
179
|
+
// ── Private helpers ──────────────────────────────────────────────────
|
|
180
|
+
/** Reset today's counter if the calendar date has changed. */
|
|
181
|
+
maybeResetDay() {
|
|
182
|
+
const today = toDateString(new Date());
|
|
183
|
+
if (this.state.todayDate !== today) {
|
|
184
|
+
this.state.todayCount = 0;
|
|
185
|
+
this.state.todayDate = today;
|
|
186
|
+
this.saveState();
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
loadState() {
|
|
190
|
+
try {
|
|
191
|
+
if (existsSync(RATE_STATE_PATH)) {
|
|
192
|
+
const raw = readFileSync(RATE_STATE_PATH, 'utf-8');
|
|
193
|
+
return JSON.parse(raw);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
catch (e) {
|
|
197
|
+
if (process.env.DEBUG)
|
|
198
|
+
console.debug('[webpeel]', 'rate state load failed:', e instanceof Error ? e.message : e);
|
|
199
|
+
}
|
|
200
|
+
return this.freshState();
|
|
201
|
+
}
|
|
202
|
+
freshState() {
|
|
203
|
+
return {
|
|
204
|
+
todayCount: 0,
|
|
205
|
+
todayDate: toDateString(new Date()),
|
|
206
|
+
lastApplyTimestamp: 0,
|
|
207
|
+
cooldownUntil: 0,
|
|
208
|
+
totalApplications: 0,
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
saveState() {
|
|
212
|
+
try {
|
|
213
|
+
mkdirSync(WEBPEEL_DIR, { recursive: true });
|
|
214
|
+
writeFileSync(RATE_STATE_PATH, JSON.stringify(this.state, null, 2), 'utf-8');
|
|
215
|
+
}
|
|
216
|
+
catch (err) {
|
|
217
|
+
// Non-fatal — state will be recomputed on next run
|
|
218
|
+
console.error('[rate-governor] Failed to save state:', err);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
// ── Utility ────────────────────────────────────────────────────────────
|
|
223
|
+
/** Human-readable duration string from milliseconds. */
|
|
224
|
+
export function formatDuration(ms) {
|
|
225
|
+
if (ms <= 0)
|
|
226
|
+
return '0s';
|
|
227
|
+
const seconds = Math.floor(ms / 1000);
|
|
228
|
+
if (seconds < 60)
|
|
229
|
+
return `${seconds}s`;
|
|
230
|
+
const minutes = Math.floor(seconds / 60);
|
|
231
|
+
if (minutes < 60)
|
|
232
|
+
return `${minutes}m ${seconds % 60}s`;
|
|
233
|
+
const hours = Math.floor(minutes / 60);
|
|
234
|
+
if (hours < 24)
|
|
235
|
+
return `${hours}h ${minutes % 60}m`;
|
|
236
|
+
const days = Math.floor(hours / 24);
|
|
237
|
+
return `${days}d ${hours % 24}h`;
|
|
238
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Readability Engine
|
|
3
|
+
*
|
|
4
|
+
* Extracts the core article content from a web page — like Pocket, Instapaper,
|
|
5
|
+
* or Safari Reader Mode but deterministic, fast, and purpose-built for AI agents.
|
|
6
|
+
*
|
|
7
|
+
* Algorithm:
|
|
8
|
+
* 1. Noise removal — strip nav, footer, aside, ads, cookie banners, etc.
|
|
9
|
+
* 2. Candidate scoring — score block elements by text density, link density,
|
|
10
|
+
* paragraph count, and structural signals.
|
|
11
|
+
* 3. Best candidate selection — prefer <article> > <main> > highest-scoring div.
|
|
12
|
+
* 4. Post-selection cleaning — remove inline noise (share buttons, etc.).
|
|
13
|
+
* 5. Metadata extraction — title, author, date, site name from meta tags / bylines.
|
|
14
|
+
* 6. Markdown output — via existing htmlToMarkdown().
|
|
15
|
+
*/
|
|
16
|
+
export interface ReadabilityOptions {
|
|
17
|
+
/** Keep image references in output (default: true) */
|
|
18
|
+
includeImages?: boolean;
|
|
19
|
+
/** Keep hyperlinks in output (default: true) */
|
|
20
|
+
includeLinks?: boolean;
|
|
21
|
+
/** Keep code blocks in output (default: true) */
|
|
22
|
+
includeCode?: boolean;
|
|
23
|
+
/** Keep tables in output (default: true) */
|
|
24
|
+
includeTables?: boolean;
|
|
25
|
+
/** Maximum characters to return (applied after conversion) */
|
|
26
|
+
maxLength?: number;
|
|
27
|
+
}
|
|
28
|
+
export interface ReadabilityResult {
|
|
29
|
+
/** Article title */
|
|
30
|
+
title: string;
|
|
31
|
+
/** Author name or null */
|
|
32
|
+
author: string | null;
|
|
33
|
+
/** Publication date string or null */
|
|
34
|
+
date: string | null;
|
|
35
|
+
/** Site name or null */
|
|
36
|
+
siteName: string | null;
|
|
37
|
+
/** Clean article content as markdown */
|
|
38
|
+
content: string;
|
|
39
|
+
/** First 2 complete sentences as excerpt */
|
|
40
|
+
excerpt: string;
|
|
41
|
+
/** Estimated word count */
|
|
42
|
+
wordCount: number;
|
|
43
|
+
/** Human-readable reading time, e.g. "5 min read" */
|
|
44
|
+
readingTime: string;
|
|
45
|
+
/** Language code from <html lang> or null */
|
|
46
|
+
language: string | null;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Extract clean, readable article content from raw HTML.
|
|
50
|
+
*
|
|
51
|
+
* Mimics browser Reader Mode but deterministic and purpose-built for AI agents.
|
|
52
|
+
*
|
|
53
|
+
* @param html - Raw HTML of the page
|
|
54
|
+
* @param url - Source URL (used for resolving relative links in metadata)
|
|
55
|
+
* @param options - Extraction options
|
|
56
|
+
*/
|
|
57
|
+
export declare function extractReadableContent(html: string, _url: string, options?: ReadabilityOptions): ReadabilityResult;
|