@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Auto-interact: automatically dismiss cookie banners, consent popups,
|
|
3
|
+
* overlay modals, and optionally click "load more" / "show all" buttons.
|
|
4
|
+
*
|
|
5
|
+
* Runs after page.goto() and before content extraction.
|
|
6
|
+
* Never blocks extraction — each interaction has a tight timeout.
|
|
7
|
+
* Total budget: 3s max.
|
|
8
|
+
*/
|
|
9
|
+
// ── Selector lists ─────────────────────────────────────────────────────────
|
|
10
|
+
const COOKIE_DISMISS_SELECTORS = [
|
|
11
|
+
// OneTrust (very common consent management platform)
|
|
12
|
+
'#onetrust-accept-btn-handler',
|
|
13
|
+
// Cookiebot
|
|
14
|
+
'#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll',
|
|
15
|
+
// Cookie Consent library
|
|
16
|
+
'.cc-btn.cc-dismiss',
|
|
17
|
+
'.cc-btn.cc-allow',
|
|
18
|
+
// Osano
|
|
19
|
+
'.osano-cm-accept',
|
|
20
|
+
'.osano-cm-accept-all',
|
|
21
|
+
// TrustArc
|
|
22
|
+
'#truste-consent-button',
|
|
23
|
+
// Quantcast
|
|
24
|
+
'#qc-cmp2-ui button[mode="primary"]',
|
|
25
|
+
// Didomi
|
|
26
|
+
'#didomi-notice-agree-button',
|
|
27
|
+
// Testing library markers
|
|
28
|
+
'[data-testid="cookie-policy-dialog-accept-button"]',
|
|
29
|
+
'[data-testid="accept-cookies"]',
|
|
30
|
+
'[data-testid="cookie-accept"]',
|
|
31
|
+
// ARIA labels
|
|
32
|
+
'button[aria-label*="cookie" i]',
|
|
33
|
+
'button[aria-label*="accept cookie" i]',
|
|
34
|
+
'button[aria-label*="agree" i]',
|
|
35
|
+
'button[aria-label*="consent" i]',
|
|
36
|
+
// Class-based matchers (broad)
|
|
37
|
+
'[class*="cookie"] button[class*="accept"]',
|
|
38
|
+
'[class*="cookie"] button[class*="dismiss"]',
|
|
39
|
+
'[class*="cookie"] button[class*="close"]',
|
|
40
|
+
'[class*="cookie"] button[class*="agree"]',
|
|
41
|
+
'[class*="cookie"] button[class*="allow"]',
|
|
42
|
+
'[class*="consent"] button[class*="accept"]',
|
|
43
|
+
'[class*="consent"] button[class*="agree"]',
|
|
44
|
+
'[class*="consent"] button[class*="allow"]',
|
|
45
|
+
'[id*="cookie"] button[class*="accept"]',
|
|
46
|
+
'[id*="cookie"] button[class*="agree"]',
|
|
47
|
+
'.cookie-banner button:first-of-type',
|
|
48
|
+
'.cookie-notice button:first-of-type',
|
|
49
|
+
'#cookie-notice button:first-of-type',
|
|
50
|
+
];
|
|
51
|
+
const CONSENT_SELECTORS = [
|
|
52
|
+
// GDPR / privacy
|
|
53
|
+
'[class*="gdpr"] button[class*="accept"]',
|
|
54
|
+
'[class*="gdpr"] button[class*="agree"]',
|
|
55
|
+
'[class*="privacy"] button[class*="accept"]',
|
|
56
|
+
'[class*="privacy"] button[class*="agree"]',
|
|
57
|
+
// Modal/overlay consent
|
|
58
|
+
'.modal-overlay [class*="accept"]',
|
|
59
|
+
'[role="dialog"] button[class*="accept"]',
|
|
60
|
+
'[role="dialog"] button[class*="agree"]',
|
|
61
|
+
'[role="alertdialog"] button[class*="accept"]',
|
|
62
|
+
// Age gates and terms
|
|
63
|
+
'[class*="age-gate"] button[class*="confirm"]',
|
|
64
|
+
'[class*="terms"] button[class*="accept"]',
|
|
65
|
+
];
|
|
66
|
+
const OVERLAY_DISMISS_SELECTORS = [
|
|
67
|
+
// Generic close buttons
|
|
68
|
+
'.modal-close',
|
|
69
|
+
'.overlay-close',
|
|
70
|
+
'[class*="modal"] [class*="close"]',
|
|
71
|
+
'[class*="modal"] button[aria-label="Close"]',
|
|
72
|
+
'[role="dialog"] [aria-label="Close"]',
|
|
73
|
+
'[role="dialog"] [aria-label="close"]',
|
|
74
|
+
'[role="dialog"] button[class*="close"]',
|
|
75
|
+
'[class*="popup"] [class*="close"]',
|
|
76
|
+
'[class*="popup"] button[aria-label="Close"]',
|
|
77
|
+
'button[class*="dismiss"]',
|
|
78
|
+
// Newsletter/email capture popups
|
|
79
|
+
'[class*="newsletter"] [class*="close"]',
|
|
80
|
+
'[class*="subscribe"] [class*="close"]',
|
|
81
|
+
'[class*="signup"] [class*="close"]',
|
|
82
|
+
// Survey/feedback popups
|
|
83
|
+
'[class*="survey"] [class*="close"]',
|
|
84
|
+
'[class*="feedback"] [class*="close"]',
|
|
85
|
+
// Notification/alert banners
|
|
86
|
+
'[class*="notification"] button[class*="close"]',
|
|
87
|
+
'[class*="alert"] button[class*="close"]',
|
|
88
|
+
'[class*="banner"] button[class*="close"]',
|
|
89
|
+
];
|
|
90
|
+
const LOAD_MORE_SELECTORS = [
|
|
91
|
+
'button[class*="load-more"]',
|
|
92
|
+
'button[class*="loadmore"]',
|
|
93
|
+
'button[class*="load_more"]',
|
|
94
|
+
'[class*="load-more"] button',
|
|
95
|
+
'a[class*="load-more"]',
|
|
96
|
+
'button[class*="show-more"]',
|
|
97
|
+
'button[class*="show_more"]',
|
|
98
|
+
'button[class*="showmore"]',
|
|
99
|
+
'[class*="show-more"] button',
|
|
100
|
+
'button[aria-label*="load more" i]',
|
|
101
|
+
'button[aria-label*="show more" i]',
|
|
102
|
+
'[data-testid*="load-more"]',
|
|
103
|
+
'[data-testid*="show-more"]',
|
|
104
|
+
];
|
|
105
|
+
// ── Helpers ────────────────────────────────────────────────────────────────
|
|
106
|
+
/**
|
|
107
|
+
* Check if an element is visible (has dimensions + not hidden).
|
|
108
|
+
* Returns false if the element doesn't exist or is invisible.
|
|
109
|
+
*/
|
|
110
|
+
async function isVisible(page, selector) {
|
|
111
|
+
try {
|
|
112
|
+
const visible = await page.evaluate((sel) => {
|
|
113
|
+
const el = document.querySelector(sel);
|
|
114
|
+
if (!el)
|
|
115
|
+
return false;
|
|
116
|
+
const rect = el.getBoundingClientRect();
|
|
117
|
+
if (rect.width === 0 || rect.height === 0)
|
|
118
|
+
return false;
|
|
119
|
+
const style = window.getComputedStyle(el);
|
|
120
|
+
if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0')
|
|
121
|
+
return false;
|
|
122
|
+
return true;
|
|
123
|
+
}, selector);
|
|
124
|
+
return !!visible;
|
|
125
|
+
}
|
|
126
|
+
catch {
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Try to click a selector with a 1s timeout. Returns true if clicked.
|
|
132
|
+
*/
|
|
133
|
+
async function tryClick(page, selector) {
|
|
134
|
+
const CLICK_TIMEOUT_MS = 1000;
|
|
135
|
+
try {
|
|
136
|
+
const visible = await isVisible(page, selector);
|
|
137
|
+
if (!visible)
|
|
138
|
+
return false;
|
|
139
|
+
await Promise.race([
|
|
140
|
+
page.click(selector, { timeout: CLICK_TIMEOUT_MS }),
|
|
141
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('click timeout')), CLICK_TIMEOUT_MS)),
|
|
142
|
+
]);
|
|
143
|
+
// Brief pause to let DOM settle after click
|
|
144
|
+
await page.waitForTimeout(300).catch(() => { });
|
|
145
|
+
return true;
|
|
146
|
+
}
|
|
147
|
+
catch {
|
|
148
|
+
return false;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Try each selector in the list; click the first visible one.
|
|
153
|
+
* Returns the selector that was clicked, or null.
|
|
154
|
+
*/
|
|
155
|
+
async function tryClickFirst(page, selectors) {
|
|
156
|
+
for (const selector of selectors) {
|
|
157
|
+
const clicked = await tryClick(page, selector);
|
|
158
|
+
if (clicked) {
|
|
159
|
+
if (process.env.DEBUG) {
|
|
160
|
+
console.debug('[webpeel:auto-interact]', 'clicked:', selector);
|
|
161
|
+
}
|
|
162
|
+
return selector;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return null;
|
|
166
|
+
}
|
|
167
|
+
// ── Main export ────────────────────────────────────────────────────────────
|
|
168
|
+
/**
|
|
169
|
+
* Automatically interact with the page to dismiss common UI overlays before
|
|
170
|
+
* content extraction. Never throws — all errors are swallowed.
|
|
171
|
+
*
|
|
172
|
+
* @param page - Playwright page (already navigated)
|
|
173
|
+
* @returns Summary of what was dismissed
|
|
174
|
+
*/
|
|
175
|
+
export async function autoInteract(page) {
|
|
176
|
+
const TOTAL_BUDGET_MS = 3000;
|
|
177
|
+
const startTime = Date.now();
|
|
178
|
+
const result = {
|
|
179
|
+
cookieBannerDismissed: false,
|
|
180
|
+
consentHandled: false,
|
|
181
|
+
loadMoreClicked: 0,
|
|
182
|
+
overlaysDismissed: 0,
|
|
183
|
+
};
|
|
184
|
+
const remaining = () => TOTAL_BUDGET_MS - (Date.now() - startTime);
|
|
185
|
+
try {
|
|
186
|
+
// ── 1. Cookie banners ─────────────────────────────────────────────────
|
|
187
|
+
if (remaining() > 0) {
|
|
188
|
+
const clicked = await tryClickFirst(page, COOKIE_DISMISS_SELECTORS);
|
|
189
|
+
if (clicked) {
|
|
190
|
+
result.cookieBannerDismissed = true;
|
|
191
|
+
if (process.env.DEBUG)
|
|
192
|
+
console.debug('[webpeel:auto-interact]', 'cookie banner dismissed');
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
// ── 2. Consent popups ────────────────────────────────────────────────
|
|
196
|
+
if (remaining() > 500) {
|
|
197
|
+
const clicked = await tryClickFirst(page, CONSENT_SELECTORS);
|
|
198
|
+
if (clicked) {
|
|
199
|
+
result.consentHandled = true;
|
|
200
|
+
if (process.env.DEBUG)
|
|
201
|
+
console.debug('[webpeel:auto-interact]', 'consent handled');
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
// ── 3. Overlay/modal dismiss ──────────────────────────────────────────
|
|
205
|
+
if (remaining() > 500) {
|
|
206
|
+
let dismissed = 0;
|
|
207
|
+
// Try up to 2 overlays to avoid infinite loops on persistent UI
|
|
208
|
+
for (let i = 0; i < 2 && remaining() > 300; i++) {
|
|
209
|
+
const clicked = await tryClickFirst(page, OVERLAY_DISMISS_SELECTORS);
|
|
210
|
+
if (!clicked)
|
|
211
|
+
break;
|
|
212
|
+
dismissed++;
|
|
213
|
+
}
|
|
214
|
+
result.overlaysDismissed = dismissed;
|
|
215
|
+
if (dismissed > 0 && process.env.DEBUG) {
|
|
216
|
+
console.debug('[webpeel:auto-interact]', `overlays dismissed: ${dismissed}`);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
// ── 4. Load more (optional, only if budget remains) ───────────────────
|
|
220
|
+
if (remaining() > 500) {
|
|
221
|
+
let clicked = 0;
|
|
222
|
+
// Click at most 1 "load more" button to get more content without infinite looping
|
|
223
|
+
const loadMoreClicked = await tryClickFirst(page, LOAD_MORE_SELECTORS);
|
|
224
|
+
if (loadMoreClicked) {
|
|
225
|
+
clicked++;
|
|
226
|
+
// Wait briefly for new content to render
|
|
227
|
+
await page.waitForTimeout(500).catch(() => { });
|
|
228
|
+
}
|
|
229
|
+
result.loadMoreClicked = clicked;
|
|
230
|
+
if (clicked > 0 && process.env.DEBUG) {
|
|
231
|
+
console.debug('[webpeel:auto-interact]', `load-more clicked: ${clicked}`);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
catch (err) {
|
|
236
|
+
// Never block extraction due to auto-interact errors
|
|
237
|
+
if (process.env.DEBUG) {
|
|
238
|
+
console.debug('[webpeel:auto-interact]', 'error (ignored):', err instanceof Error ? err.message : err);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
const elapsed = Date.now() - startTime;
|
|
242
|
+
if (process.env.DEBUG) {
|
|
243
|
+
console.debug('[webpeel:auto-interact]', 'complete in', elapsed, 'ms', JSON.stringify(result));
|
|
244
|
+
}
|
|
245
|
+
return result;
|
|
246
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BM25 Query-Focused Content Filter
|
|
3
|
+
*
|
|
4
|
+
* Filters markdown content by BM25 relevance to a query, keeping only the
|
|
5
|
+
* blocks that are most relevant. This can reduce token usage by 70-90% for
|
|
6
|
+
* focused tasks (e.g., "find hotel prices" should not return navigation menus,
|
|
7
|
+
* footer text, or unrelated article sections).
|
|
8
|
+
*
|
|
9
|
+
* Algorithm: BM25 (Best Matching 25) — Okapi BM25
|
|
10
|
+
* score(D, Q) = Σ IDF(qi) * tf(qi,D)*(k1+1) / (tf(qi,D) + k1*(1 - b + b*|D|/avgdl))
|
|
11
|
+
*/
|
|
12
|
+
export interface BM25FilterOptions {
|
|
13
|
+
/** Query to rank content against */
|
|
14
|
+
query: string;
|
|
15
|
+
/** BM25 threshold score. Blocks below this are removed. Default: auto-calculated */
|
|
16
|
+
threshold?: number;
|
|
17
|
+
/** Whether to return scores in output. Default: false */
|
|
18
|
+
includeScores?: boolean;
|
|
19
|
+
}
|
|
20
|
+
export interface BM25FilterResult {
|
|
21
|
+
/** Filtered content (relevant paragraphs only) */
|
|
22
|
+
content: string;
|
|
23
|
+
/** Number of blocks kept */
|
|
24
|
+
kept: number;
|
|
25
|
+
/** Total number of blocks */
|
|
26
|
+
total: number;
|
|
27
|
+
/** Percentage of content removed */
|
|
28
|
+
reductionPercent: number;
|
|
29
|
+
}
|
|
30
|
+
interface ContentBlock {
|
|
31
|
+
/** Original markdown text (preserved verbatim in output) */
|
|
32
|
+
raw: string;
|
|
33
|
+
/** Index in the original block list (for order preservation) */
|
|
34
|
+
index: number;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Split markdown content into logical blocks for scoring:
|
|
38
|
+
* - Code fences (``` ... ```) → single block
|
|
39
|
+
* - Heading + immediately following paragraph → single block
|
|
40
|
+
* - Lists (contiguous lines starting with - / * / + / number.) → single block
|
|
41
|
+
* - Tables → single block
|
|
42
|
+
* - Paragraphs → one block each
|
|
43
|
+
*/
|
|
44
|
+
export declare function splitIntoBlocks(content: string): ContentBlock[];
|
|
45
|
+
/**
|
|
46
|
+
* Calculate BM25 scores for all blocks against a query.
|
|
47
|
+
* Returns array of scores in same order as blocks.
|
|
48
|
+
*/
|
|
49
|
+
export declare function scoreBM25(blocks: ContentBlock[], queryTerms: string[]): number[];
|
|
50
|
+
/**
|
|
51
|
+
* Compute a normalized relevance score (0-1) for content against a query.
|
|
52
|
+
* Uses BM25 at the block level and returns the weighted average score,
|
|
53
|
+
* normalized by query term count for comparability across queries.
|
|
54
|
+
*
|
|
55
|
+
* This is more meaningful than `reductionPercent` for ranking search results,
|
|
56
|
+
* because it measures actual term overlap and importance rather than how much
|
|
57
|
+
* content was filtered out.
|
|
58
|
+
*/
|
|
59
|
+
export declare function computeRelevanceScore(content: string, query: string): number;
|
|
60
|
+
/**
|
|
61
|
+
* Filter markdown content by BM25 relevance to a query.
|
|
62
|
+
* Splits content into blocks (paragraphs, headings+body, list items),
|
|
63
|
+
* scores each by BM25, and returns only blocks above threshold.
|
|
64
|
+
*/
|
|
65
|
+
export declare function filterByRelevance(content: string, options: BM25FilterOptions): BM25FilterResult;
|
|
66
|
+
export {};
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BM25 Query-Focused Content Filter
|
|
3
|
+
*
|
|
4
|
+
* Filters markdown content by BM25 relevance to a query, keeping only the
|
|
5
|
+
* blocks that are most relevant. This can reduce token usage by 70-90% for
|
|
6
|
+
* focused tasks (e.g., "find hotel prices" should not return navigation menus,
|
|
7
|
+
* footer text, or unrelated article sections).
|
|
8
|
+
*
|
|
9
|
+
* Algorithm: BM25 (Best Matching 25) — Okapi BM25
|
|
10
|
+
* score(D, Q) = Σ IDF(qi) * tf(qi,D)*(k1+1) / (tf(qi,D) + k1*(1 - b + b*|D|/avgdl))
|
|
11
|
+
*/
|
|
12
|
+
// BM25 tuning parameters
|
|
13
|
+
const K1 = 1.5; // term frequency saturation
|
|
14
|
+
const B = 0.75; // length normalization
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Tokenization
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
/**
|
|
19
|
+
* Tokenize text into lowercase terms, stripping punctuation.
|
|
20
|
+
* Markdown formatting characters are also stripped.
|
|
21
|
+
*/
|
|
22
|
+
function tokenize(text) {
|
|
23
|
+
return text
|
|
24
|
+
.toLowerCase()
|
|
25
|
+
// Strip markdown formatting (bold, italic, code, links, images, headings)
|
|
26
|
+
.replace(/!\[.*?\]\(.*?\)/g, ' ') // images
|
|
27
|
+
.replace(/\[.*?\]\(.*?\)/g, ' ') // links
|
|
28
|
+
.replace(/`{1,3}[^`]*`{1,3}/g, ' ') // inline code
|
|
29
|
+
.replace(/[#*_~`>|\\]/g, ' ') // formatting chars
|
|
30
|
+
.replace(/[^\w\s]/g, ' ') // remaining punctuation
|
|
31
|
+
.split(/\s+/)
|
|
32
|
+
.filter(t => t.length > 0);
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Strip markdown formatting from text for scoring purposes.
|
|
36
|
+
* Preserves words but removes symbols.
|
|
37
|
+
*/
|
|
38
|
+
function stripMarkdown(text) {
|
|
39
|
+
return text
|
|
40
|
+
.replace(/```[\s\S]*?```/g, ' ') // fenced code blocks
|
|
41
|
+
.replace(/`[^`]+`/g, ' ') // inline code
|
|
42
|
+
.replace(/!\[.*?\]\(.*?\)/g, ' ') // images
|
|
43
|
+
.replace(/\[([^\]]*)\]\([^)]*\)/g, '$1') // links → text
|
|
44
|
+
.replace(/^#{1,6}\s+/gm, '') // headings
|
|
45
|
+
.replace(/[*_~`>|\\]/g, ' ') // formatting
|
|
46
|
+
.replace(/^\s*[-*+]\s+/gm, ' ') // list bullets
|
|
47
|
+
.replace(/^\s*\d+\.\s+/gm, ' ') // numbered list
|
|
48
|
+
.replace(/\s+/g, ' ')
|
|
49
|
+
.trim();
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Split markdown content into logical blocks for scoring:
|
|
53
|
+
* - Code fences (``` ... ```) → single block
|
|
54
|
+
* - Heading + immediately following paragraph → single block
|
|
55
|
+
* - Lists (contiguous lines starting with - / * / + / number.) → single block
|
|
56
|
+
* - Tables → single block
|
|
57
|
+
* - Paragraphs → one block each
|
|
58
|
+
*/
|
|
59
|
+
export function splitIntoBlocks(content) {
|
|
60
|
+
// Normalise line endings
|
|
61
|
+
const text = content.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
62
|
+
// First, extract fenced code blocks so they aren't broken apart
|
|
63
|
+
// We'll replace them with placeholders, split, then restore.
|
|
64
|
+
const codeBlocks = [];
|
|
65
|
+
const withPlaceholders = text.replace(/```[\s\S]*?```/g, (match) => {
|
|
66
|
+
const id = codeBlocks.length;
|
|
67
|
+
codeBlocks.push(match);
|
|
68
|
+
return `\x00CODE_BLOCK_${id}\x00`;
|
|
69
|
+
});
|
|
70
|
+
// Split on double newlines
|
|
71
|
+
const rawChunks = withPlaceholders.split(/\n{2,}/);
|
|
72
|
+
// Re-join heading with its following paragraph
|
|
73
|
+
const merged = [];
|
|
74
|
+
for (let i = 0; i < rawChunks.length; i++) {
|
|
75
|
+
const chunk = rawChunks[i].trim();
|
|
76
|
+
if (!chunk)
|
|
77
|
+
continue;
|
|
78
|
+
const isHeading = /^#{1,6}\s/.test(chunk);
|
|
79
|
+
const nextChunk = rawChunks[i + 1]?.trim();
|
|
80
|
+
if (isHeading && nextChunk && !/^#{1,6}\s/.test(nextChunk)) {
|
|
81
|
+
// Merge heading + following paragraph
|
|
82
|
+
merged.push(chunk + '\n\n' + nextChunk);
|
|
83
|
+
i++; // skip next
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
merged.push(chunk);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
// Now merge contiguous list lines that got split
|
|
90
|
+
const regrouped = [];
|
|
91
|
+
for (const chunk of merged) {
|
|
92
|
+
const lines = chunk.split('\n');
|
|
93
|
+
const isListBlock = lines.every(l => l.trim() === '' || /^\s*[-*+]\s/.test(l) || /^\s*\d+\.\s/.test(l) || /^\s*\d+\)\s/.test(l)) && lines.some(l => /^\s*[-*+]\s/.test(l) || /^\s*\d+[.)]\s/.test(l));
|
|
94
|
+
const isTableBlock = lines.some(l => /^\|/.test(l.trim()));
|
|
95
|
+
if (isListBlock || isTableBlock) {
|
|
96
|
+
// Check if previous block was the same type (adjacent lists should merge)
|
|
97
|
+
const prev = regrouped[regrouped.length - 1];
|
|
98
|
+
if (prev) {
|
|
99
|
+
const prevLines = prev.split('\n');
|
|
100
|
+
const prevIsListOrTable = prevLines.some(l => /^\s*[-*+]\s/.test(l) || /^\s*\d+[.)]\s/.test(l) || /^\|/.test(l.trim()));
|
|
101
|
+
if (prevIsListOrTable && isListBlock === prevIsListOrTable) {
|
|
102
|
+
regrouped[regrouped.length - 1] = prev + '\n' + chunk;
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
regrouped.push(chunk);
|
|
108
|
+
}
|
|
109
|
+
// Restore code blocks and build final ContentBlock array
|
|
110
|
+
const blocks = [];
|
|
111
|
+
for (let i = 0; i < regrouped.length; i++) {
|
|
112
|
+
let raw = regrouped[i];
|
|
113
|
+
// Restore code block placeholders
|
|
114
|
+
raw = raw.replace(/\x00CODE_BLOCK_(\d+)\x00/g, (_m, idx) => codeBlocks[Number(idx)]);
|
|
115
|
+
if (raw.trim()) {
|
|
116
|
+
blocks.push({ raw: raw.trim(), index: i });
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return blocks;
|
|
120
|
+
}
|
|
121
|
+
// ---------------------------------------------------------------------------
|
|
122
|
+
// BM25 Scoring
|
|
123
|
+
// ---------------------------------------------------------------------------
|
|
124
|
+
/**
|
|
125
|
+
* Calculate BM25 scores for all blocks against a query.
|
|
126
|
+
* Returns array of scores in same order as blocks.
|
|
127
|
+
*/
|
|
128
|
+
export function scoreBM25(blocks, queryTerms) {
|
|
129
|
+
if (blocks.length === 0 || queryTerms.length === 0) {
|
|
130
|
+
return blocks.map(() => 0);
|
|
131
|
+
}
|
|
132
|
+
const N = blocks.length;
|
|
133
|
+
// Tokenize each block (strip markdown for scoring)
|
|
134
|
+
const blockTokens = blocks.map(b => tokenize(stripMarkdown(b.raw)));
|
|
135
|
+
const blockLengths = blockTokens.map(t => t.length);
|
|
136
|
+
const avgdl = blockLengths.reduce((s, l) => s + l, 0) / N || 1;
|
|
137
|
+
// Build term frequency maps for each block
|
|
138
|
+
const tfMaps = blockTokens.map(tokens => {
|
|
139
|
+
const tf = new Map();
|
|
140
|
+
for (const t of tokens) {
|
|
141
|
+
tf.set(t, (tf.get(t) ?? 0) + 1);
|
|
142
|
+
}
|
|
143
|
+
return tf;
|
|
144
|
+
});
|
|
145
|
+
// For each query term, compute IDF and score contribution
|
|
146
|
+
const scores = new Array(N).fill(0);
|
|
147
|
+
for (const term of queryTerms) {
|
|
148
|
+
// n(qi) = number of documents containing the term
|
|
149
|
+
let nqi = 0;
|
|
150
|
+
for (const tf of tfMaps) {
|
|
151
|
+
if (tf.has(term))
|
|
152
|
+
nqi++;
|
|
153
|
+
}
|
|
154
|
+
// IDF(qi) = log((N - n(qi) + 0.5) / (n(qi) + 0.5) + 1)
|
|
155
|
+
const idf = Math.log((N - nqi + 0.5) / (nqi + 0.5) + 1);
|
|
156
|
+
for (let d = 0; d < N; d++) {
|
|
157
|
+
const tf = tfMaps[d].get(term) ?? 0;
|
|
158
|
+
if (tf === 0)
|
|
159
|
+
continue;
|
|
160
|
+
const dl = blockLengths[d];
|
|
161
|
+
// BM25 term score
|
|
162
|
+
const termScore = idf * (tf * (K1 + 1)) / (tf + K1 * (1 - B + B * dl / avgdl));
|
|
163
|
+
scores[d] += termScore;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return scores;
|
|
167
|
+
}
|
|
168
|
+
// ---------------------------------------------------------------------------
|
|
169
|
+
// Relevance scoring (document-level)
|
|
170
|
+
// ---------------------------------------------------------------------------
|
|
171
|
+
/**
|
|
172
|
+
* Compute a normalized relevance score (0-1) for content against a query.
|
|
173
|
+
* Uses BM25 at the block level and returns the weighted average score,
|
|
174
|
+
* normalized by query term count for comparability across queries.
|
|
175
|
+
*
|
|
176
|
+
* This is more meaningful than `reductionPercent` for ranking search results,
|
|
177
|
+
* because it measures actual term overlap and importance rather than how much
|
|
178
|
+
* content was filtered out.
|
|
179
|
+
*/
|
|
180
|
+
export function computeRelevanceScore(content, query) {
|
|
181
|
+
if (!content || !query || !query.trim())
|
|
182
|
+
return 0;
|
|
183
|
+
const blocks = splitIntoBlocks(content);
|
|
184
|
+
if (blocks.length === 0)
|
|
185
|
+
return 0;
|
|
186
|
+
const queryTerms = tokenize(query);
|
|
187
|
+
if (queryTerms.length === 0)
|
|
188
|
+
return 0;
|
|
189
|
+
const scores = scoreBM25(blocks, queryTerms);
|
|
190
|
+
// Compute weighted average score — weight by block length to avoid
|
|
191
|
+
// short blocks (e.g. headers) dominating the score
|
|
192
|
+
const blockTexts = blocks.map(b => stripMarkdown(b.raw));
|
|
193
|
+
const blockLens = blockTexts.map(t => t.length);
|
|
194
|
+
const totalLen = blockLens.reduce((s, l) => s + l, 0) || 1;
|
|
195
|
+
let weightedSum = 0;
|
|
196
|
+
for (let i = 0; i < scores.length; i++) {
|
|
197
|
+
weightedSum += scores[i] * (blockLens[i] / totalLen);
|
|
198
|
+
}
|
|
199
|
+
// Normalize: divide by query term count to make scores comparable
|
|
200
|
+
// across queries with different numbers of terms, then apply sigmoid
|
|
201
|
+
// to squash to [0, 1] range. The constant 8 is tuned so that a
|
|
202
|
+
// well-matching document scores ~0.6-0.9 and a poor match ~0.0-0.2.
|
|
203
|
+
// perTermScore typical range: 0 (no match) to ~0.5+ (strong match)
|
|
204
|
+
const perTermScore = weightedSum / queryTerms.length;
|
|
205
|
+
const normalized = 2 / (1 + Math.exp(-perTermScore * 8)) - 1;
|
|
206
|
+
return Math.max(0, Math.min(1, normalized));
|
|
207
|
+
}
|
|
208
|
+
// ---------------------------------------------------------------------------
|
|
209
|
+
// Main filter function
|
|
210
|
+
// ---------------------------------------------------------------------------
|
|
211
|
+
/**
|
|
212
|
+
* Filter markdown content by BM25 relevance to a query.
|
|
213
|
+
* Splits content into blocks (paragraphs, headings+body, list items),
|
|
214
|
+
* scores each by BM25, and returns only blocks above threshold.
|
|
215
|
+
*/
|
|
216
|
+
export function filterByRelevance(content, options) {
|
|
217
|
+
const { query, threshold, includeScores = false } = options;
|
|
218
|
+
// Empty query → return full content
|
|
219
|
+
if (!query || !query.trim()) {
|
|
220
|
+
return {
|
|
221
|
+
content,
|
|
222
|
+
kept: 0,
|
|
223
|
+
total: 0,
|
|
224
|
+
reductionPercent: 0,
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
const blocks = splitIntoBlocks(content);
|
|
228
|
+
const total = blocks.length;
|
|
229
|
+
if (total === 0) {
|
|
230
|
+
return { content, kept: 0, total: 0, reductionPercent: 0 };
|
|
231
|
+
}
|
|
232
|
+
const queryTerms = tokenize(query);
|
|
233
|
+
if (queryTerms.length === 0) {
|
|
234
|
+
return { content, kept: total, total, reductionPercent: 0 };
|
|
235
|
+
}
|
|
236
|
+
const scores = scoreBM25(blocks, queryTerms);
|
|
237
|
+
// Determine threshold
|
|
238
|
+
let effectiveThreshold;
|
|
239
|
+
if (threshold !== undefined) {
|
|
240
|
+
effectiveThreshold = threshold;
|
|
241
|
+
}
|
|
242
|
+
else {
|
|
243
|
+
const meanScore = scores.reduce((s, v) => s + v, 0) / scores.length;
|
|
244
|
+
effectiveThreshold = meanScore * 0.5;
|
|
245
|
+
}
|
|
246
|
+
// Select blocks above threshold
|
|
247
|
+
let keptIndices = scores
|
|
248
|
+
.map((score, i) => ({ score, i }))
|
|
249
|
+
.filter(({ score }) => score >= effectiveThreshold)
|
|
250
|
+
.map(({ i }) => i);
|
|
251
|
+
// Fallback: never return empty — keep top 3
|
|
252
|
+
if (keptIndices.length === 0) {
|
|
253
|
+
keptIndices = scores
|
|
254
|
+
.map((score, i) => ({ score, i }))
|
|
255
|
+
.sort((a, b) => b.score - a.score)
|
|
256
|
+
.slice(0, 3)
|
|
257
|
+
.map(({ i }) => i)
|
|
258
|
+
.sort((a, b) => a - b); // restore document order
|
|
259
|
+
}
|
|
260
|
+
// Preserve original document order
|
|
261
|
+
keptIndices.sort((a, b) => a - b);
|
|
262
|
+
const keptBlocks = keptIndices.map(i => blocks[i]);
|
|
263
|
+
const kept = keptBlocks.length;
|
|
264
|
+
// Build output content
|
|
265
|
+
let outputParts;
|
|
266
|
+
if (includeScores) {
|
|
267
|
+
outputParts = keptBlocks.map(b => {
|
|
268
|
+
const score = scores[b.index];
|
|
269
|
+
return `<!-- BM25: ${score.toFixed(4)} -->\n${b.raw}`;
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
else {
|
|
273
|
+
outputParts = keptBlocks.map(b => b.raw);
|
|
274
|
+
}
|
|
275
|
+
const filteredContent = outputParts.join('\n\n');
|
|
276
|
+
// Calculate reduction percent based on character count
|
|
277
|
+
const originalLen = content.length;
|
|
278
|
+
const filteredLen = filteredContent.length;
|
|
279
|
+
const reductionPercent = originalLen > 0
|
|
280
|
+
? Math.round(((originalLen - filteredLen) / originalLen) * 100)
|
|
281
|
+
: 0;
|
|
282
|
+
return {
|
|
283
|
+
content: filteredContent,
|
|
284
|
+
kept,
|
|
285
|
+
total,
|
|
286
|
+
reductionPercent,
|
|
287
|
+
};
|
|
288
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Branding and design system extraction from web pages
|
|
3
|
+
* Extracts colors, fonts, typography, spacing, components, and CSS variables
|
|
4
|
+
*/
|
|
5
|
+
import type { Page } from 'playwright';
|
|
6
|
+
export interface BrandingProfile {
|
|
7
|
+
colorScheme: 'light' | 'dark' | 'both';
|
|
8
|
+
logo?: string;
|
|
9
|
+
favicon?: string;
|
|
10
|
+
colors: {
|
|
11
|
+
primary?: string;
|
|
12
|
+
secondary?: string;
|
|
13
|
+
accent?: string;
|
|
14
|
+
background?: string;
|
|
15
|
+
textPrimary?: string;
|
|
16
|
+
textSecondary?: string;
|
|
17
|
+
[key: string]: string | undefined;
|
|
18
|
+
};
|
|
19
|
+
fonts: Array<{
|
|
20
|
+
family: string;
|
|
21
|
+
weights?: number[];
|
|
22
|
+
source?: string;
|
|
23
|
+
}>;
|
|
24
|
+
typography: {
|
|
25
|
+
fontFamilies: Record<string, string>;
|
|
26
|
+
fontSizes: Record<string, string>;
|
|
27
|
+
fontWeights: Record<string, number>;
|
|
28
|
+
lineHeights?: Record<string, string>;
|
|
29
|
+
};
|
|
30
|
+
spacing: {
|
|
31
|
+
baseUnit?: number;
|
|
32
|
+
borderRadius?: string;
|
|
33
|
+
containerMaxWidth?: string;
|
|
34
|
+
};
|
|
35
|
+
components: Record<string, Record<string, string>>;
|
|
36
|
+
cssVariables: Record<string, string>;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Extract branding and design system from a webpage
|
|
40
|
+
* This must run inside a Playwright browser context to access computed styles
|
|
41
|
+
*
|
|
42
|
+
* @param page - Playwright Page object
|
|
43
|
+
* @returns Complete branding profile
|
|
44
|
+
*
|
|
45
|
+
* @example
|
|
46
|
+
* ```typescript
|
|
47
|
+
* const browser = await chromium.launch();
|
|
48
|
+
* const page = await browser.newPage();
|
|
49
|
+
* await page.goto('https://example.com');
|
|
50
|
+
* const branding = await extractBranding(page);
|
|
51
|
+
* console.log(branding.colors.primary);
|
|
52
|
+
* ```
|
|
53
|
+
*/
|
|
54
|
+
export declare function extractBranding(page: Page): Promise<BrandingProfile>;
|