@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SearXNG Search Provider
|
|
3
|
+
*
|
|
4
|
+
* Connects to a self-hosted SearXNG instance (running on Mac Mini with residential IP,
|
|
5
|
+
* exposed via Cloudflare Tunnel). SearXNG aggregates Google, Bing, Brave, Startpage, etc.
|
|
6
|
+
* and is not rate-limited or blocked since it runs on a residential IP.
|
|
7
|
+
*
|
|
8
|
+
* Config (env vars):
|
|
9
|
+
* SEARXNG_URL — Base URL of SearXNG instance (e.g. https://search.webpeel.dev)
|
|
10
|
+
*
|
|
11
|
+
* Falls back gracefully if SEARXNG_URL is not set or instance is unreachable.
|
|
12
|
+
*/
|
|
13
|
+
export interface SearXNGSearchResult {
|
|
14
|
+
title: string;
|
|
15
|
+
url: string;
|
|
16
|
+
description?: string;
|
|
17
|
+
publishedDate?: string;
|
|
18
|
+
score?: number;
|
|
19
|
+
imageUrl?: string;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Fetches search results from a SearXNG instance.
|
|
23
|
+
* Returns results compatible with WebSearchResult interface in search-provider.ts.
|
|
24
|
+
*/
|
|
25
|
+
export declare function searchViaSearXNG(query: string, options?: {
|
|
26
|
+
count?: number;
|
|
27
|
+
signal?: AbortSignal;
|
|
28
|
+
timeoutMs?: number;
|
|
29
|
+
engines?: string;
|
|
30
|
+
language?: string;
|
|
31
|
+
}): Promise<SearXNGSearchResult[]>;
|
|
32
|
+
/**
|
|
33
|
+
* Quick health check — true if SearXNG is reachable and returning results.
|
|
34
|
+
*/
|
|
35
|
+
export declare function isSearXNGHealthy(): Promise<boolean>;
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SearXNG Search Provider
|
|
3
|
+
*
|
|
4
|
+
* Connects to a self-hosted SearXNG instance (running on Mac Mini with residential IP,
|
|
5
|
+
* exposed via Cloudflare Tunnel). SearXNG aggregates Google, Bing, Brave, Startpage, etc.
|
|
6
|
+
* and is not rate-limited or blocked since it runs on a residential IP.
|
|
7
|
+
*
|
|
8
|
+
* Config (env vars):
|
|
9
|
+
* SEARXNG_URL — Base URL of SearXNG instance (e.g. https://search.webpeel.dev)
|
|
10
|
+
*
|
|
11
|
+
* Falls back gracefully if SEARXNG_URL is not set or instance is unreachable.
|
|
12
|
+
*/
|
|
13
|
+
import { fetch as undiciFetch } from 'undici';
|
|
14
|
+
import { createLogger } from './logger.js';
|
|
15
|
+
const log = createLogger('searxng');
|
|
16
|
+
/**
|
|
17
|
+
* Fetches search results from a SearXNG instance.
|
|
18
|
+
* Returns results compatible with WebSearchResult interface in search-provider.ts.
|
|
19
|
+
*/
|
|
20
|
+
export async function searchViaSearXNG(query, options = {}) {
|
|
21
|
+
const baseUrl = process.env.SEARXNG_URL;
|
|
22
|
+
if (!baseUrl)
|
|
23
|
+
return [];
|
|
24
|
+
const { count = 10, signal, timeoutMs = 15000, engines = '', language = 'en', } = options;
|
|
25
|
+
const controller = new AbortController();
|
|
26
|
+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
27
|
+
if (signal)
|
|
28
|
+
signal.addEventListener('abort', () => controller.abort());
|
|
29
|
+
try {
|
|
30
|
+
const params = new URLSearchParams({
|
|
31
|
+
q: query,
|
|
32
|
+
format: 'json',
|
|
33
|
+
language,
|
|
34
|
+
safesearch: '0',
|
|
35
|
+
categories: 'general',
|
|
36
|
+
});
|
|
37
|
+
if (engines)
|
|
38
|
+
params.set('engines', engines);
|
|
39
|
+
const url = `${baseUrl.replace(/\/$/, '')}/search?${params.toString()}`;
|
|
40
|
+
const response = await undiciFetch(url, {
|
|
41
|
+
signal: controller.signal,
|
|
42
|
+
headers: {
|
|
43
|
+
'Accept': 'application/json',
|
|
44
|
+
'User-Agent': 'WebPeel/1.0 (internal search aggregator)',
|
|
45
|
+
},
|
|
46
|
+
});
|
|
47
|
+
if (!response.ok) {
|
|
48
|
+
log.debug(`HTTP ${response.status}`);
|
|
49
|
+
return [];
|
|
50
|
+
}
|
|
51
|
+
const data = (await response.json());
|
|
52
|
+
const results = data?.results ?? [];
|
|
53
|
+
if (results.length === 0) {
|
|
54
|
+
log.debug('0 results returned');
|
|
55
|
+
return [];
|
|
56
|
+
}
|
|
57
|
+
const seen = new Set();
|
|
58
|
+
const output = [];
|
|
59
|
+
for (const r of results) {
|
|
60
|
+
if (!r.url || !r.title)
|
|
61
|
+
continue;
|
|
62
|
+
const normalized = r.url.replace(/\/$/, '').toLowerCase();
|
|
63
|
+
if (seen.has(normalized))
|
|
64
|
+
continue;
|
|
65
|
+
seen.add(normalized);
|
|
66
|
+
output.push({
|
|
67
|
+
title: r.title,
|
|
68
|
+
url: r.url,
|
|
69
|
+
description: r.content ?? undefined,
|
|
70
|
+
publishedDate: r.publishedDate ?? undefined,
|
|
71
|
+
score: r.score ?? undefined,
|
|
72
|
+
imageUrl: r.img_src ?? r.thumbnail ?? undefined,
|
|
73
|
+
});
|
|
74
|
+
if (output.length >= count)
|
|
75
|
+
break;
|
|
76
|
+
}
|
|
77
|
+
log.debug(`${output.length} results for "${query.substring(0, 40)}"`);
|
|
78
|
+
return output;
|
|
79
|
+
}
|
|
80
|
+
catch (e) {
|
|
81
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
82
|
+
if (msg.includes('abort') || msg.includes('timeout') || msg.includes('AbortError')) {
|
|
83
|
+
log.debug(`timed out after ${timeoutMs}ms`);
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
log.debug('fetch error:', msg);
|
|
87
|
+
}
|
|
88
|
+
return [];
|
|
89
|
+
}
|
|
90
|
+
finally {
|
|
91
|
+
clearTimeout(timeoutId);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Quick health check — true if SearXNG is reachable and returning results.
|
|
96
|
+
*/
|
|
97
|
+
export async function isSearXNGHealthy() {
|
|
98
|
+
try {
|
|
99
|
+
const results = await searchViaSearXNG('test', { count: 1, timeoutMs: 10000 });
|
|
100
|
+
return results.length > 0;
|
|
101
|
+
}
|
|
102
|
+
catch {
|
|
103
|
+
return false;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Selective Evidence Aggregation
|
|
3
|
+
*
|
|
4
|
+
* AttnRes-inspired evidence selection: instead of naively concatenating all
|
|
5
|
+
* sources, score and select evidence blocks that maximise relevance,
|
|
6
|
+
* credibility, and source diversity for a given query.
|
|
7
|
+
*
|
|
8
|
+
* Design goals:
|
|
9
|
+
* 1. Query-aware block scoring — BM25 relevance per content block
|
|
10
|
+
* 2. Credibility/authority weighting — higher-authority sources get a boost
|
|
11
|
+
* 3. Structured-signal detection — detect structured data even when
|
|
12
|
+
* domainData.structured is absent (prices, dates, tables, lists, JSON-LD)
|
|
13
|
+
* 4. Per-domain diversity limits — configurable cap per registered domain
|
|
14
|
+
* 5. Query-type-aware policy — factual vs exploratory queries use
|
|
15
|
+
* different diversity/concentration knobs
|
|
16
|
+
* 6. Exact facts preserved — numbers, prices, dates are never mutated
|
|
17
|
+
*
|
|
18
|
+
* No external dependencies — pure TypeScript, reuses existing helpers.
|
|
19
|
+
*/
|
|
20
|
+
/** A single evidence source with content and metadata */
|
|
21
|
+
export interface EvidenceSource {
|
|
22
|
+
url: string;
|
|
23
|
+
title: string;
|
|
24
|
+
content: string;
|
|
25
|
+
/** Snippet from search results (fallback when content is empty) */
|
|
26
|
+
snippet?: string;
|
|
27
|
+
/** Pre-computed structured data from domain extractors */
|
|
28
|
+
structured?: unknown;
|
|
29
|
+
/** Page metadata (publish dates, etc.) */
|
|
30
|
+
metadata?: Record<string, unknown>;
|
|
31
|
+
}
|
|
32
|
+
/** A scored and selected evidence block */
|
|
33
|
+
export interface SelectedBlock {
|
|
34
|
+
/** The text content of this block */
|
|
35
|
+
text: string;
|
|
36
|
+
/** Source URL this block came from */
|
|
37
|
+
sourceUrl: string;
|
|
38
|
+
/** Source title */
|
|
39
|
+
sourceTitle: string;
|
|
40
|
+
/** Composite score used for ranking (0-1) */
|
|
41
|
+
score: number;
|
|
42
|
+
/** Whether a structured signal was detected in this block */
|
|
43
|
+
hasStructuredSignal: boolean;
|
|
44
|
+
/** Whether the selector used full page content or a snippet fallback */
|
|
45
|
+
contentMode: 'content' | 'snippet';
|
|
46
|
+
}
|
|
47
|
+
/** Result of selectEvidence() */
|
|
48
|
+
export interface SelectionResult {
|
|
49
|
+
/** Selected evidence blocks, in score-descending order */
|
|
50
|
+
blocks: SelectedBlock[];
|
|
51
|
+
/** Number of total candidate blocks evaluated */
|
|
52
|
+
totalCandidates: number;
|
|
53
|
+
/** Number of sources that contributed at least one block */
|
|
54
|
+
sourcesUsed: number;
|
|
55
|
+
/** The query type policy that was applied */
|
|
56
|
+
policy: QueryPolicy;
|
|
57
|
+
}
|
|
58
|
+
export type QueryType = 'factual' | 'comparison' | 'exploratory';
|
|
59
|
+
export interface QueryPolicy {
|
|
60
|
+
/** The detected query type */
|
|
61
|
+
type: QueryType;
|
|
62
|
+
/** Max blocks from any single registered domain */
|
|
63
|
+
maxBlocksPerDomain: number;
|
|
64
|
+
/** Weight multiplier for authority score (0-1) */
|
|
65
|
+
authorityWeight: number;
|
|
66
|
+
/** Weight multiplier for BM25 relevance (0-1) */
|
|
67
|
+
relevanceWeight: number;
|
|
68
|
+
/** Weight multiplier for structured signal boost (0-1) */
|
|
69
|
+
structuredWeight: number;
|
|
70
|
+
/** Minimum number of unique domains to try to include */
|
|
71
|
+
minDomains: number;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Classify a query and return the appropriate diversity/weighting policy.
|
|
75
|
+
*
|
|
76
|
+
* - **factual**: pricing, version, limit, spec queries → tight authority
|
|
77
|
+
* concentration, fewer domains needed, structured signals weighted high
|
|
78
|
+
* - **comparison**: "X vs Y", "alternatives", "pros and cons" → moderate
|
|
79
|
+
* diversity, balanced weights
|
|
80
|
+
* - **exploratory**: "how does X work", "explain Y", research queries →
|
|
81
|
+
* maximum diversity, many domains encouraged
|
|
82
|
+
*/
|
|
83
|
+
export declare function classifyQuery(query: string): QueryPolicy;
|
|
84
|
+
/**
|
|
85
|
+
* Detect whether a text block contains structured information signals.
|
|
86
|
+
*
|
|
87
|
+
* This does NOT rely on domainData.structured being present — it looks at
|
|
88
|
+
* the actual content for patterns that indicate structured data:
|
|
89
|
+
* - Price/currency patterns ($X.XX, €, £)
|
|
90
|
+
* - Markdown tables (lines starting with |)
|
|
91
|
+
* - Key-value patterns ("Key: Value")
|
|
92
|
+
* - Numeric data density (percentages, measurements, dates)
|
|
93
|
+
* - JSON-LD or schema.org markers
|
|
94
|
+
* - Ordered/numbered lists with data
|
|
95
|
+
*
|
|
96
|
+
* Returns a score 0-1 representing structured signal strength.
|
|
97
|
+
*/
|
|
98
|
+
export declare function detectStructuredSignal(text: string): number;
|
|
99
|
+
/**
|
|
100
|
+
* Compute a structured signal score for a source, combining:
|
|
101
|
+
* 1. Pre-existing structured data (domainData.structured) if present
|
|
102
|
+
* 2. Content-derived structured signals from detectStructuredSignal()
|
|
103
|
+
*
|
|
104
|
+
* Returns 0-1.
|
|
105
|
+
*/
|
|
106
|
+
export declare function sourceStructuredScore(source: EvidenceSource): number;
|
|
107
|
+
/**
|
|
108
|
+
* Returns true when fetched content is a WebPeel placeholder / error shell rather
|
|
109
|
+
* than usable evidence for synthesis.
|
|
110
|
+
*/
|
|
111
|
+
export declare function isUnusableEvidenceContent(text: string | undefined | null): boolean;
|
|
112
|
+
/**
|
|
113
|
+
* Choose the best evidence text for a source.
|
|
114
|
+
* - Prefer full fetched content when it is usable
|
|
115
|
+
* - Fall back to the search snippet when the fetch content is blocked/error placeholder
|
|
116
|
+
*/
|
|
117
|
+
export declare function getBestEvidenceText(source: EvidenceSource): {
|
|
118
|
+
text: string;
|
|
119
|
+
mode: 'content' | 'snippet' | 'none';
|
|
120
|
+
};
|
|
121
|
+
export interface SelectEvidenceOptions {
|
|
122
|
+
/** The user query */
|
|
123
|
+
query: string;
|
|
124
|
+
/** All candidate sources */
|
|
125
|
+
sources: EvidenceSource[];
|
|
126
|
+
/** Maximum total blocks to return. Default: 12 */
|
|
127
|
+
maxBlocks?: number;
|
|
128
|
+
/** Maximum character budget for all selected blocks combined. Default: 6000 */
|
|
129
|
+
maxChars?: number;
|
|
130
|
+
/** Override the auto-detected policy */
|
|
131
|
+
policyOverride?: Partial<QueryPolicy>;
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Select the best evidence blocks from multiple sources for a given query.
|
|
135
|
+
*
|
|
136
|
+
* Pipeline:
|
|
137
|
+
* 1. Classify query → policy (diversity caps, weight distribution)
|
|
138
|
+
* 2. For each source: split into blocks, score BM25 against query
|
|
139
|
+
* 3. Compute composite score per block: relevance × authority × structured
|
|
140
|
+
* 4. Apply per-domain diversity cap
|
|
141
|
+
* 5. Ensure minimum domain diversity (promote under-represented domains)
|
|
142
|
+
* 6. Return top blocks within budget
|
|
143
|
+
*/
|
|
144
|
+
export declare function selectEvidence(options: SelectEvidenceOptions): SelectionResult;
|
|
145
|
+
/**
|
|
146
|
+
* Format selected evidence blocks into a numbered, source-attributed string
|
|
147
|
+
* suitable for LLM context injection.
|
|
148
|
+
*
|
|
149
|
+
* Preserves exact facts/numbers — no summarization or transformation.
|
|
150
|
+
*/
|
|
151
|
+
export declare function formatEvidenceForLLM(result: SelectionResult): string;
|
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Selective Evidence Aggregation
|
|
3
|
+
*
|
|
4
|
+
* AttnRes-inspired evidence selection: instead of naively concatenating all
|
|
5
|
+
* sources, score and select evidence blocks that maximise relevance,
|
|
6
|
+
* credibility, and source diversity for a given query.
|
|
7
|
+
*
|
|
8
|
+
* Design goals:
|
|
9
|
+
* 1. Query-aware block scoring — BM25 relevance per content block
|
|
10
|
+
* 2. Credibility/authority weighting — higher-authority sources get a boost
|
|
11
|
+
* 3. Structured-signal detection — detect structured data even when
|
|
12
|
+
* domainData.structured is absent (prices, dates, tables, lists, JSON-LD)
|
|
13
|
+
* 4. Per-domain diversity limits — configurable cap per registered domain
|
|
14
|
+
* 5. Query-type-aware policy — factual vs exploratory queries use
|
|
15
|
+
* different diversity/concentration knobs
|
|
16
|
+
* 6. Exact facts preserved — numbers, prices, dates are never mutated
|
|
17
|
+
*
|
|
18
|
+
* No external dependencies — pure TypeScript, reuses existing helpers.
|
|
19
|
+
*/
|
|
20
|
+
import { splitIntoBlocks, scoreBM25 } from './bm25-filter.js';
|
|
21
|
+
import { scoreDomainAuthority, extractRegisteredDomain, isFactualQuery, } from './source-scoring.js';
|
|
22
|
+
// Comparison / research query patterns
|
|
23
|
+
const COMPARISON_PATTERN = /\b(compare|comparison|vs\.?|versus|difference|differences|between|pros?\s+and\s+cons?|alternatives?|better|which\s+is|review|benchmark|ranking)\b/i;
|
|
24
|
+
// Exploratory / open-ended query patterns
|
|
25
|
+
const EXPLORATORY_PATTERN = /\b(how\s+(?:does|do|to|can)|what\s+(?:is|are|does)|explain|overview|introduction|guide|tutorial|learn|understand|history|background|research|explore|survey)\b/i;
|
|
26
|
+
/**
|
|
27
|
+
* Classify a query and return the appropriate diversity/weighting policy.
|
|
28
|
+
*
|
|
29
|
+
* - **factual**: pricing, version, limit, spec queries → tight authority
|
|
30
|
+
* concentration, fewer domains needed, structured signals weighted high
|
|
31
|
+
* - **comparison**: "X vs Y", "alternatives", "pros and cons" → moderate
|
|
32
|
+
* diversity, balanced weights
|
|
33
|
+
* - **exploratory**: "how does X work", "explain Y", research queries →
|
|
34
|
+
* maximum diversity, many domains encouraged
|
|
35
|
+
*/
|
|
36
|
+
export function classifyQuery(query) {
|
|
37
|
+
// Order matters: check factual first (most specific), then comparison
|
|
38
|
+
if (isFactualQuery(query)) {
|
|
39
|
+
return {
|
|
40
|
+
type: 'factual',
|
|
41
|
+
maxBlocksPerDomain: 4,
|
|
42
|
+
authorityWeight: 0.35,
|
|
43
|
+
relevanceWeight: 0.40,
|
|
44
|
+
structuredWeight: 0.25,
|
|
45
|
+
minDomains: 2,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
if (COMPARISON_PATTERN.test(query)) {
|
|
49
|
+
return {
|
|
50
|
+
type: 'comparison',
|
|
51
|
+
maxBlocksPerDomain: 3,
|
|
52
|
+
authorityWeight: 0.25,
|
|
53
|
+
relevanceWeight: 0.45,
|
|
54
|
+
structuredWeight: 0.15,
|
|
55
|
+
minDomains: 3,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
if (EXPLORATORY_PATTERN.test(query)) {
|
|
59
|
+
return {
|
|
60
|
+
type: 'exploratory',
|
|
61
|
+
maxBlocksPerDomain: 2,
|
|
62
|
+
authorityWeight: 0.20,
|
|
63
|
+
relevanceWeight: 0.50,
|
|
64
|
+
structuredWeight: 0.10,
|
|
65
|
+
minDomains: 4,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
// Default: balanced
|
|
69
|
+
return {
|
|
70
|
+
type: 'exploratory',
|
|
71
|
+
maxBlocksPerDomain: 3,
|
|
72
|
+
authorityWeight: 0.25,
|
|
73
|
+
relevanceWeight: 0.45,
|
|
74
|
+
structuredWeight: 0.15,
|
|
75
|
+
minDomains: 3,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
// Structured-signal detection (lightweight, no giant dependency)
|
|
80
|
+
// ---------------------------------------------------------------------------
|
|
81
|
+
/**
|
|
82
|
+
* Detect whether a text block contains structured information signals.
|
|
83
|
+
*
|
|
84
|
+
* This does NOT rely on domainData.structured being present — it looks at
|
|
85
|
+
* the actual content for patterns that indicate structured data:
|
|
86
|
+
* - Price/currency patterns ($X.XX, €, £)
|
|
87
|
+
* - Markdown tables (lines starting with |)
|
|
88
|
+
* - Key-value patterns ("Key: Value")
|
|
89
|
+
* - Numeric data density (percentages, measurements, dates)
|
|
90
|
+
* - JSON-LD or schema.org markers
|
|
91
|
+
* - Ordered/numbered lists with data
|
|
92
|
+
*
|
|
93
|
+
* Returns a score 0-1 representing structured signal strength.
|
|
94
|
+
*/
|
|
95
|
+
export function detectStructuredSignal(text) {
|
|
96
|
+
if (!text || text.length < 10)
|
|
97
|
+
return 0;
|
|
98
|
+
let signal = 0;
|
|
99
|
+
const lines = text.split('\n');
|
|
100
|
+
// Price/currency patterns — strong signal
|
|
101
|
+
const priceMatches = text.match(/[$€£¥]\s?\d[\d,.]+/g);
|
|
102
|
+
if (priceMatches && priceMatches.length > 0) {
|
|
103
|
+
signal += Math.min(0.3, priceMatches.length * 0.1);
|
|
104
|
+
}
|
|
105
|
+
// Markdown table rows (|col1|col2|)
|
|
106
|
+
const tableRows = lines.filter(l => /^\s*\|.*\|/.test(l));
|
|
107
|
+
if (tableRows.length >= 2) {
|
|
108
|
+
signal += Math.min(0.3, tableRows.length * 0.05);
|
|
109
|
+
}
|
|
110
|
+
// Key-value patterns ("Label: Value" at start of line)
|
|
111
|
+
const kvMatches = lines.filter(l => /^\s*[A-Z][A-Za-z\s]{1,25}:\s+\S/.test(l));
|
|
112
|
+
if (kvMatches.length >= 2) {
|
|
113
|
+
signal += Math.min(0.2, kvMatches.length * 0.04);
|
|
114
|
+
}
|
|
115
|
+
// Numeric data density — dates, percentages, measurements
|
|
116
|
+
const numericPatterns = text.match(/\b\d{1,3}(?:[.,]\d{1,3})*\s*(?:%|GB|MB|TB|kg|lb|mph|km|mi|ms|sec|min|hr|days?|months?|years?)\b/gi);
|
|
117
|
+
if (numericPatterns && numericPatterns.length >= 2) {
|
|
118
|
+
signal += Math.min(0.2, numericPatterns.length * 0.04);
|
|
119
|
+
}
|
|
120
|
+
// Explicit version/spec patterns (v2.0, API v3, version 4.1)
|
|
121
|
+
if (/\bv(?:ersion)?\s?\d+(?:\.\d+)+/i.test(text)) {
|
|
122
|
+
signal += 0.1;
|
|
123
|
+
}
|
|
124
|
+
// JSON-LD / schema.org markers
|
|
125
|
+
if (/@context|schema\.org|itemtype|itemprop/i.test(text)) {
|
|
126
|
+
signal += 0.15;
|
|
127
|
+
}
|
|
128
|
+
return Math.min(1.0, signal);
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Compute a structured signal score for a source, combining:
|
|
132
|
+
* 1. Pre-existing structured data (domainData.structured) if present
|
|
133
|
+
* 2. Content-derived structured signals from detectStructuredSignal()
|
|
134
|
+
*
|
|
135
|
+
* Returns 0-1.
|
|
136
|
+
*/
|
|
137
|
+
export function sourceStructuredScore(source) {
|
|
138
|
+
let score = 0;
|
|
139
|
+
// If domain extractor provided structured data, strong signal
|
|
140
|
+
if (source.structured != null) {
|
|
141
|
+
const str = typeof source.structured === 'string'
|
|
142
|
+
? source.structured
|
|
143
|
+
: JSON.stringify(source.structured);
|
|
144
|
+
// Non-trivial structured data (more than just {})
|
|
145
|
+
if (str.length > 5) {
|
|
146
|
+
score += 0.5;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
// Content-derived structured signal
|
|
150
|
+
const contentSignal = detectStructuredSignal(source.content || '');
|
|
151
|
+
score += contentSignal * 0.5;
|
|
152
|
+
return Math.min(1.0, score);
|
|
153
|
+
}
|
|
154
|
+
// ---------------------------------------------------------------------------
|
|
155
|
+
// Evidence quality / fallback helpers
|
|
156
|
+
// ---------------------------------------------------------------------------
|
|
157
|
+
const UNUSABLE_EVIDENCE_PATTERNS = [
|
|
158
|
+
/^#\s*⚠️\s+.+?\s+—\s+Access Blocked/im,
|
|
159
|
+
/This site uses advanced bot protection and blocked our request\./i,
|
|
160
|
+
/^##\s*❌\s+Reddit Post Not Found/im,
|
|
161
|
+
/The post at r\/.+ could not be found\./i,
|
|
162
|
+
/Server returned an error page \(522\)/i,
|
|
163
|
+
/fetch_failed/i,
|
|
164
|
+
];
|
|
165
|
+
/**
|
|
166
|
+
* Returns true when fetched content is a WebPeel placeholder / error shell rather
|
|
167
|
+
* than usable evidence for synthesis.
|
|
168
|
+
*/
|
|
169
|
+
export function isUnusableEvidenceContent(text) {
|
|
170
|
+
if (!text)
|
|
171
|
+
return true;
|
|
172
|
+
const trimmed = text.trim();
|
|
173
|
+
if (!trimmed)
|
|
174
|
+
return true;
|
|
175
|
+
return UNUSABLE_EVIDENCE_PATTERNS.some((pattern) => pattern.test(trimmed));
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Choose the best evidence text for a source.
|
|
179
|
+
* - Prefer full fetched content when it is usable
|
|
180
|
+
* - Fall back to the search snippet when the fetch content is blocked/error placeholder
|
|
181
|
+
*/
|
|
182
|
+
export function getBestEvidenceText(source) {
|
|
183
|
+
if (!isUnusableEvidenceContent(source.content)) {
|
|
184
|
+
return { text: source.content, mode: 'content' };
|
|
185
|
+
}
|
|
186
|
+
const snippet = source.snippet?.trim() ?? '';
|
|
187
|
+
if (snippet.length >= 20) {
|
|
188
|
+
return { text: snippet, mode: 'snippet' };
|
|
189
|
+
}
|
|
190
|
+
return { text: '', mode: 'none' };
|
|
191
|
+
}
|
|
192
|
+
// ---------------------------------------------------------------------------
|
|
193
|
+
// Main selector
|
|
194
|
+
// ---------------------------------------------------------------------------
|
|
195
|
+
/**
|
|
196
|
+
* Select the best evidence blocks from multiple sources for a given query.
|
|
197
|
+
*
|
|
198
|
+
* Pipeline:
|
|
199
|
+
* 1. Classify query → policy (diversity caps, weight distribution)
|
|
200
|
+
* 2. For each source: split into blocks, score BM25 against query
|
|
201
|
+
* 3. Compute composite score per block: relevance × authority × structured
|
|
202
|
+
* 4. Apply per-domain diversity cap
|
|
203
|
+
* 5. Ensure minimum domain diversity (promote under-represented domains)
|
|
204
|
+
* 6. Return top blocks within budget
|
|
205
|
+
*/
|
|
206
|
+
export function selectEvidence(options) {
|
|
207
|
+
const { query, sources, maxBlocks = 12, maxChars = 6000, policyOverride, } = options;
|
|
208
|
+
// Step 1: Classify query and build policy
|
|
209
|
+
const basePolicy = classifyQuery(query);
|
|
210
|
+
const policy = { ...basePolicy, ...policyOverride };
|
|
211
|
+
if (sources.length === 0) {
|
|
212
|
+
return { blocks: [], totalCandidates: 0, sourcesUsed: 0, policy };
|
|
213
|
+
}
|
|
214
|
+
// Tokenize query for BM25
|
|
215
|
+
const queryTerms = query
|
|
216
|
+
.toLowerCase()
|
|
217
|
+
.replace(/[^\w\s]/g, ' ')
|
|
218
|
+
.split(/\s+/)
|
|
219
|
+
.filter(t => t.length > 1);
|
|
220
|
+
const candidates = [];
|
|
221
|
+
for (const source of sources) {
|
|
222
|
+
const bestText = getBestEvidenceText(source);
|
|
223
|
+
const text = bestText.text;
|
|
224
|
+
if (!text || text.length < 20)
|
|
225
|
+
continue;
|
|
226
|
+
const blocks = splitIntoBlocks(text);
|
|
227
|
+
if (blocks.length === 0)
|
|
228
|
+
continue;
|
|
229
|
+
const bm25Scores = queryTerms.length > 0
|
|
230
|
+
? scoreBM25(blocks, queryTerms)
|
|
231
|
+
: blocks.map(() => 0.1); // small baseline when no query terms
|
|
232
|
+
const authority = scoreDomainAuthority(source.url);
|
|
233
|
+
const structuredSrc = sourceStructuredScore({ ...source, content: text });
|
|
234
|
+
const domain = extractRegisteredDomain(source.url);
|
|
235
|
+
for (let i = 0; i < blocks.length; i++) {
|
|
236
|
+
const raw = blocks[i].raw;
|
|
237
|
+
// Skip very short blocks (nav fragments, single words)
|
|
238
|
+
if (raw.length < 30)
|
|
239
|
+
continue;
|
|
240
|
+
// Normalize BM25 to 0-1 range using sigmoid
|
|
241
|
+
const rawBm25 = bm25Scores[i];
|
|
242
|
+
const normBm25 = rawBm25 > 0
|
|
243
|
+
? 2 / (1 + Math.exp(-rawBm25 * 4)) - 1
|
|
244
|
+
: 0;
|
|
245
|
+
// Per-block structured signal
|
|
246
|
+
const blockStructured = detectStructuredSignal(raw);
|
|
247
|
+
const combinedStructured = Math.min(1.0, structuredSrc * 0.6 + blockStructured * 0.4);
|
|
248
|
+
// Composite: weighted sum per policy
|
|
249
|
+
const composite = normBm25 * policy.relevanceWeight +
|
|
250
|
+
authority * policy.authorityWeight +
|
|
251
|
+
combinedStructured * policy.structuredWeight;
|
|
252
|
+
candidates.push({
|
|
253
|
+
text: raw,
|
|
254
|
+
sourceUrl: source.url,
|
|
255
|
+
sourceTitle: source.title,
|
|
256
|
+
domain,
|
|
257
|
+
bm25Score: normBm25,
|
|
258
|
+
authorityScore: authority,
|
|
259
|
+
structuredScore: combinedStructured,
|
|
260
|
+
compositeScore: composite,
|
|
261
|
+
hasStructuredSignal: combinedStructured > 0.15,
|
|
262
|
+
contentMode: bestText.mode === 'snippet' ? 'snippet' : 'content',
|
|
263
|
+
});
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
const totalCandidates = candidates.length;
|
|
267
|
+
if (totalCandidates === 0) {
|
|
268
|
+
return { blocks: [], totalCandidates: 0, sourcesUsed: 0, policy };
|
|
269
|
+
}
|
|
270
|
+
// Step 4: Sort by composite score, apply per-domain cap
|
|
271
|
+
candidates.sort((a, b) => b.compositeScore - a.compositeScore);
|
|
272
|
+
const domainBlockCounts = new Map();
|
|
273
|
+
const selected = [];
|
|
274
|
+
let charBudget = maxChars;
|
|
275
|
+
for (const c of candidates) {
|
|
276
|
+
if (selected.length >= maxBlocks)
|
|
277
|
+
break;
|
|
278
|
+
if (charBudget <= 0)
|
|
279
|
+
break;
|
|
280
|
+
const domainCount = domainBlockCounts.get(c.domain) ?? 0;
|
|
281
|
+
if (domainCount >= policy.maxBlocksPerDomain)
|
|
282
|
+
continue;
|
|
283
|
+
// Don't exceed char budget
|
|
284
|
+
if (c.text.length > charBudget) {
|
|
285
|
+
// If block is small enough to partially fit and we have no blocks yet, take it
|
|
286
|
+
if (selected.length === 0) {
|
|
287
|
+
selected.push({ ...c, text: c.text.substring(0, charBudget) });
|
|
288
|
+
charBudget = 0;
|
|
289
|
+
domainBlockCounts.set(c.domain, domainCount + 1);
|
|
290
|
+
}
|
|
291
|
+
continue;
|
|
292
|
+
}
|
|
293
|
+
selected.push(c);
|
|
294
|
+
charBudget -= c.text.length;
|
|
295
|
+
domainBlockCounts.set(c.domain, domainCount + 1);
|
|
296
|
+
}
|
|
297
|
+
// Step 5: Ensure minimum domain diversity
|
|
298
|
+
// If we haven't hit minDomains, try to swap in blocks from under-represented domains
|
|
299
|
+
const selectedDomains = new Set(selected.map(s => s.domain));
|
|
300
|
+
if (selectedDomains.size < policy.minDomains && selected.length > 1) {
|
|
301
|
+
// Find domains not yet represented
|
|
302
|
+
const allDomains = new Set(candidates.map(c => c.domain));
|
|
303
|
+
const missingDomains = [...allDomains].filter(d => !selectedDomains.has(d));
|
|
304
|
+
for (const missingDomain of missingDomains) {
|
|
305
|
+
if (selectedDomains.size >= policy.minDomains)
|
|
306
|
+
break;
|
|
307
|
+
// Find best block from this domain
|
|
308
|
+
const domainBest = candidates.find(c => c.domain === missingDomain && !selected.includes(c));
|
|
309
|
+
if (!domainBest || domainBest.compositeScore <= 0)
|
|
310
|
+
continue;
|
|
311
|
+
// Replace the lowest-scored block from the most-represented domain
|
|
312
|
+
// (only if the replacement isn't drastically worse)
|
|
313
|
+
const domainCounts = new Map();
|
|
314
|
+
for (const s of selected) {
|
|
315
|
+
domainCounts.set(s.domain, (domainCounts.get(s.domain) ?? 0) + 1);
|
|
316
|
+
}
|
|
317
|
+
// Find the domain with the most blocks
|
|
318
|
+
let maxDomain = '';
|
|
319
|
+
let maxCount = 0;
|
|
320
|
+
for (const [d, c] of domainCounts) {
|
|
321
|
+
if (c > maxCount) {
|
|
322
|
+
maxCount = c;
|
|
323
|
+
maxDomain = d;
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
// Only swap if the over-represented domain has 2+ blocks
|
|
327
|
+
if (maxCount < 2)
|
|
328
|
+
continue;
|
|
329
|
+
// Find the worst block from that domain
|
|
330
|
+
const worstIdx = selected.reduce((worst, s, i) => {
|
|
331
|
+
if (s.domain !== maxDomain)
|
|
332
|
+
return worst;
|
|
333
|
+
if (worst === -1)
|
|
334
|
+
return i;
|
|
335
|
+
return s.compositeScore < selected[worst].compositeScore ? i : worst;
|
|
336
|
+
}, -1);
|
|
337
|
+
if (worstIdx === -1)
|
|
338
|
+
continue;
|
|
339
|
+
// Only swap if the replacement isn't more than 40% worse
|
|
340
|
+
const worstScore = selected[worstIdx].compositeScore;
|
|
341
|
+
if (domainBest.compositeScore >= worstScore * 0.6) {
|
|
342
|
+
selected[worstIdx] = domainBest;
|
|
343
|
+
selectedDomains.add(missingDomain);
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
// Build result
|
|
348
|
+
const sourcesUsed = new Set(selected.map(s => s.sourceUrl)).size;
|
|
349
|
+
const blocks = selected.map(c => ({
|
|
350
|
+
text: c.text,
|
|
351
|
+
sourceUrl: c.sourceUrl,
|
|
352
|
+
sourceTitle: c.sourceTitle,
|
|
353
|
+
score: c.compositeScore,
|
|
354
|
+
hasStructuredSignal: c.hasStructuredSignal,
|
|
355
|
+
contentMode: c.contentMode,
|
|
356
|
+
}));
|
|
357
|
+
return { blocks, totalCandidates, sourcesUsed, policy };
|
|
358
|
+
}
|
|
359
|
+
// ---------------------------------------------------------------------------
|
|
360
|
+
// Convenience: format selected evidence for LLM context
|
|
361
|
+
// ---------------------------------------------------------------------------
|
|
362
|
+
/**
|
|
363
|
+
* Format selected evidence blocks into a numbered, source-attributed string
|
|
364
|
+
* suitable for LLM context injection.
|
|
365
|
+
*
|
|
366
|
+
* Preserves exact facts/numbers — no summarization or transformation.
|
|
367
|
+
*/
|
|
368
|
+
export function formatEvidenceForLLM(result) {
|
|
369
|
+
if (result.blocks.length === 0)
|
|
370
|
+
return '';
|
|
371
|
+
// Group blocks by source for readability
|
|
372
|
+
const sourceGroups = new Map();
|
|
373
|
+
for (const block of result.blocks) {
|
|
374
|
+
const key = block.sourceUrl;
|
|
375
|
+
if (!sourceGroups.has(key))
|
|
376
|
+
sourceGroups.set(key, []);
|
|
377
|
+
sourceGroups.get(key).push(block);
|
|
378
|
+
}
|
|
379
|
+
const parts = [];
|
|
380
|
+
let sourceIdx = 1;
|
|
381
|
+
for (const [url, blocks] of sourceGroups) {
|
|
382
|
+
const title = blocks[0].sourceTitle;
|
|
383
|
+
const structuredTag = blocks.some(b => b.hasStructuredSignal) ? ' [structured]' : '';
|
|
384
|
+
const snippetTag = blocks.every(b => b.contentMode === 'snippet') ? ' [snippet]' : '';
|
|
385
|
+
parts.push(`[${sourceIdx}] ${title}${structuredTag}${snippetTag}\nURL: ${url}\n\n${blocks.map(b => b.text).join('\n\n')}`);
|
|
386
|
+
sourceIdx++;
|
|
387
|
+
}
|
|
388
|
+
return parts.join('\n\n---\n\n');
|
|
389
|
+
}
|