@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,972 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebPeel Deep Research
|
|
3
|
+
*
|
|
4
|
+
* Multi-step search agent that turns one question into a comprehensive,
|
|
5
|
+
* cited research report. Orchestrates:
|
|
6
|
+
*
|
|
7
|
+
* 1. Query Decomposition — LLM breaks question into 3-5 sub-queries
|
|
8
|
+
* 2. Parallel Multi-Search — All sub-queries across DDG + Stealth
|
|
9
|
+
* 3. Source Fetching — peel() on top results per sub-query
|
|
10
|
+
* 4. Relevance Scoring — BM25 against the original question
|
|
11
|
+
* 5. Gap Detection — LLM: "Is there enough info? What's missing?"
|
|
12
|
+
* 6. Re-Search Loop — Generate new queries if gaps found (max N rounds)
|
|
13
|
+
* 7. Synthesis — LLM generates final cited report
|
|
14
|
+
*/
|
|
15
|
+
import { peel } from '../index.js';
|
|
16
|
+
import { getSearchProvider } from './search-provider.js';
|
|
17
|
+
import { scoreBM25, splitIntoBlocks } from './bm25-filter.js';
|
|
18
|
+
import { selectEvidence, } from './selective-evidence.js';
|
|
19
|
+
import { callLLM, getDefaultLLMConfig, isFreeTierLimitError, } from './llm-provider.js';
|
|
20
|
+
import { sanitizeForLLM } from './prompt-guard.js';
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// Helpers
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
function clamp(n, min, max) {
|
|
25
|
+
return Math.min(Math.max(n, min), max);
|
|
26
|
+
}
|
|
27
|
+
function truncate(text, maxChars) {
|
|
28
|
+
if (text.length <= maxChars)
|
|
29
|
+
return text;
|
|
30
|
+
return text.slice(0, maxChars) + '\n\n[Truncated]';
|
|
31
|
+
}
|
|
32
|
+
function normalizeUrl(url) {
|
|
33
|
+
try {
|
|
34
|
+
const u = new URL(url);
|
|
35
|
+
const host = u.hostname.toLowerCase().replace(/^www\./, '');
|
|
36
|
+
const path = (u.pathname || '/').replace(/\/+$/, '');
|
|
37
|
+
return `${host}${path}`;
|
|
38
|
+
}
|
|
39
|
+
catch {
|
|
40
|
+
return url.toLowerCase().replace(/^https?:\/\/(www\.)?/, '').replace(/\/$/, '');
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
/** Extract bare hostname (no www) from a URL, or return empty string on failure */
|
|
44
|
+
function extractDomain(url) {
|
|
45
|
+
try {
|
|
46
|
+
return new URL(url).hostname.toLowerCase().replace(/^www\./, '');
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
return url.toLowerCase().replace(/^https?:\/\/(www\.)?/, '').split('/')[0] ?? '';
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
// Source Credibility
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
/** Official TLDs and hostnames that indicate high-authority sources */
|
|
56
|
+
const OFFICIAL_TLDS = new Set(['.gov', '.edu', '.mil']);
|
|
57
|
+
const OFFICIAL_HOSTNAMES = new Set([
|
|
58
|
+
// Academic / research
|
|
59
|
+
'arxiv.org', 'scholar.google.com', 'pubmed.ncbi.nlm.nih.gov', 'ncbi.nlm.nih.gov',
|
|
60
|
+
'jstor.org', 'nature.com', 'science.org', 'cell.com', 'nejm.org', 'bmj.com',
|
|
61
|
+
'thelancet.com', 'plos.org', 'springer.com', 'elsevier.com',
|
|
62
|
+
// International organisations
|
|
63
|
+
'who.int', 'un.org', 'worldbank.org', 'imf.org', 'oecd.org', 'europa.eu',
|
|
64
|
+
// Official tech documentation
|
|
65
|
+
'docs.python.org', 'developer.mozilla.org', 'nodejs.org', 'rust-lang.org',
|
|
66
|
+
'docs.microsoft.com', 'learn.microsoft.com', 'developer.apple.com',
|
|
67
|
+
'developer.android.com', 'php.net', 'ruby-lang.org', 'golang.org', 'go.dev',
|
|
68
|
+
]);
|
|
69
|
+
const VERIFIED_HOSTNAMES = new Set([
|
|
70
|
+
// Encyclopaedia / reference
|
|
71
|
+
'wikipedia.org', 'en.wikipedia.org',
|
|
72
|
+
// Reputable news agencies
|
|
73
|
+
'reuters.com', 'apnews.com', 'bbc.com', 'bbc.co.uk', 'nytimes.com',
|
|
74
|
+
'washingtonpost.com', 'theguardian.com', 'economist.com', 'ft.com',
|
|
75
|
+
// Developer resources
|
|
76
|
+
'github.com', 'stackoverflow.com', 'npmjs.com', 'pypi.org',
|
|
77
|
+
'crates.io', 'docs.rs', 'packagist.org',
|
|
78
|
+
// Official cloud / vendor docs
|
|
79
|
+
'docs.aws.amazon.com', 'cloud.google.com', 'docs.github.com',
|
|
80
|
+
'azure.microsoft.com', 'registry.terraform.io',
|
|
81
|
+
]);
|
|
82
|
+
/**
|
|
83
|
+
* Assess the credibility of a source URL.
|
|
84
|
+
*
|
|
85
|
+
* Returns:
|
|
86
|
+
* - tier: 'official' | 'verified' | 'general'
|
|
87
|
+
* - stars: 3 / 2 / 1
|
|
88
|
+
* - label: human-readable string for the synthesis prompt
|
|
89
|
+
*/
|
|
90
|
+
export function getSourceCredibility(url) {
|
|
91
|
+
try {
|
|
92
|
+
const hostname = new URL(url).hostname.toLowerCase().replace(/^www\./, '');
|
|
93
|
+
// Check official TLDs
|
|
94
|
+
for (const tld of OFFICIAL_TLDS) {
|
|
95
|
+
if (hostname.endsWith(tld)) {
|
|
96
|
+
return { tier: 'official', stars: 3, label: 'OFFICIAL SOURCE' };
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
// Check known official hostnames
|
|
100
|
+
if (OFFICIAL_HOSTNAMES.has(hostname)) {
|
|
101
|
+
return { tier: 'official', stars: 3, label: 'OFFICIAL SOURCE' };
|
|
102
|
+
}
|
|
103
|
+
// Check known verified hostnames
|
|
104
|
+
if (VERIFIED_HOSTNAMES.has(hostname)) {
|
|
105
|
+
return { tier: 'verified', stars: 2, label: 'VERIFIED' };
|
|
106
|
+
}
|
|
107
|
+
// Everything else
|
|
108
|
+
return { tier: 'general', stars: 1, label: 'UNVERIFIED' };
|
|
109
|
+
}
|
|
110
|
+
catch {
|
|
111
|
+
return { tier: 'general', stars: 1, label: 'UNVERIFIED' };
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
/** Render stars string for a credibility tier */
|
|
115
|
+
export function starsString(stars) {
|
|
116
|
+
if (stars >= 3)
|
|
117
|
+
return '★★★';
|
|
118
|
+
if (stars >= 2)
|
|
119
|
+
return '★★☆';
|
|
120
|
+
return '★☆☆';
|
|
121
|
+
}
|
|
122
|
+
// ---------------------------------------------------------------------------
|
|
123
|
+
// LLM call with merged token tracking
|
|
124
|
+
// ---------------------------------------------------------------------------
|
|
125
|
+
async function callWithTracking(config, messages, tokenAccumulator, opts = {}) {
|
|
126
|
+
const result = await callLLM(config, {
|
|
127
|
+
messages,
|
|
128
|
+
stream: opts.stream,
|
|
129
|
+
onChunk: opts.onChunk,
|
|
130
|
+
signal: opts.signal,
|
|
131
|
+
maxTokens: opts.maxTokens ?? 4096,
|
|
132
|
+
temperature: 0.3,
|
|
133
|
+
});
|
|
134
|
+
tokenAccumulator.input += result.usage.input;
|
|
135
|
+
tokenAccumulator.output += result.usage.output;
|
|
136
|
+
return result.text;
|
|
137
|
+
}
|
|
138
|
+
// ---------------------------------------------------------------------------
|
|
139
|
+
// Step 1: Query Decomposition
|
|
140
|
+
// ---------------------------------------------------------------------------
|
|
141
|
+
async function decomposeQuery(question, config, tokens, signal) {
|
|
142
|
+
const messages = [
|
|
143
|
+
{
|
|
144
|
+
role: 'system',
|
|
145
|
+
content: [
|
|
146
|
+
'You are a research assistant that helps decompose complex questions.',
|
|
147
|
+
'Given a research question, generate 3-5 specific search sub-queries that together would provide comprehensive coverage of the topic.',
|
|
148
|
+
'Each sub-query should target a different aspect of the question.',
|
|
149
|
+
'Output ONLY the sub-queries, one per line, no numbering, no explanation.',
|
|
150
|
+
].join('\n'),
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
role: 'user',
|
|
154
|
+
content: `Research question: "${question}"\n\nGenerate 3-5 focused search sub-queries:`,
|
|
155
|
+
},
|
|
156
|
+
];
|
|
157
|
+
const text = await callWithTracking(config, messages, tokens, {
|
|
158
|
+
signal,
|
|
159
|
+
maxTokens: 500,
|
|
160
|
+
});
|
|
161
|
+
// Parse lines, filter empties and numbering
|
|
162
|
+
const queries = text
|
|
163
|
+
.split('\n')
|
|
164
|
+
.map((line) => line
|
|
165
|
+
.trim()
|
|
166
|
+
.replace(/^\d+[.)]\s*/, '')
|
|
167
|
+
.replace(/^[-*•]\s*/, '')
|
|
168
|
+
.trim())
|
|
169
|
+
.filter((line) => line.length > 5 && line.length < 300);
|
|
170
|
+
// Ensure the original question is always in the mix
|
|
171
|
+
const all = [question, ...queries];
|
|
172
|
+
// Deduplicate (case-insensitive)
|
|
173
|
+
const seen = new Set();
|
|
174
|
+
const deduped = [];
|
|
175
|
+
for (const q of all) {
|
|
176
|
+
const key = q.toLowerCase();
|
|
177
|
+
if (!seen.has(key)) {
|
|
178
|
+
seen.add(key);
|
|
179
|
+
deduped.push(q);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
// Return at most 6 queries (1 original + up to 5 generated)
|
|
183
|
+
return deduped.slice(0, 6);
|
|
184
|
+
}
|
|
185
|
+
// ---------------------------------------------------------------------------
|
|
186
|
+
// Step 2: Parallel Multi-Search
|
|
187
|
+
// ---------------------------------------------------------------------------
|
|
188
|
+
async function searchAll(queries, signal) {
|
|
189
|
+
const resultsMap = new Map();
|
|
190
|
+
const searchWithDDG = async (query) => {
|
|
191
|
+
try {
|
|
192
|
+
const provider = getSearchProvider('duckduckgo');
|
|
193
|
+
return await provider.searchWeb(query, {
|
|
194
|
+
count: 5,
|
|
195
|
+
signal,
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
catch {
|
|
199
|
+
return [];
|
|
200
|
+
}
|
|
201
|
+
};
|
|
202
|
+
// Run all queries in parallel
|
|
203
|
+
const settled = await Promise.allSettled(queries.map(async (query) => {
|
|
204
|
+
const results = await searchWithDDG(query);
|
|
205
|
+
return { query, results };
|
|
206
|
+
}));
|
|
207
|
+
for (const outcome of settled) {
|
|
208
|
+
if (outcome.status === 'fulfilled') {
|
|
209
|
+
resultsMap.set(outcome.value.query, outcome.value.results);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
return resultsMap;
|
|
213
|
+
}
|
|
214
|
+
// ---------------------------------------------------------------------------
|
|
215
|
+
// Step 3: Source Fetching
|
|
216
|
+
// ---------------------------------------------------------------------------
|
|
217
|
+
async function fetchSources(searchResults, maxSources, signal) {
|
|
218
|
+
// Collect top 3 per sub-query, deduplicated by URL
|
|
219
|
+
const seen = new Set();
|
|
220
|
+
const toFetch = [];
|
|
221
|
+
for (const [subQuery, results] of searchResults) {
|
|
222
|
+
let count = 0;
|
|
223
|
+
for (const result of results) {
|
|
224
|
+
if (count >= 3)
|
|
225
|
+
break;
|
|
226
|
+
const key = normalizeUrl(result.url);
|
|
227
|
+
if (seen.has(key))
|
|
228
|
+
continue;
|
|
229
|
+
seen.add(key);
|
|
230
|
+
toFetch.push({ result, subQuery });
|
|
231
|
+
count++;
|
|
232
|
+
if (toFetch.length >= maxSources)
|
|
233
|
+
break;
|
|
234
|
+
}
|
|
235
|
+
if (toFetch.length >= maxSources)
|
|
236
|
+
break;
|
|
237
|
+
}
|
|
238
|
+
// Fetch in parallel batches of 5
|
|
239
|
+
const BATCH_SIZE = 5;
|
|
240
|
+
const fetched = [];
|
|
241
|
+
for (let i = 0; i < toFetch.length; i += BATCH_SIZE) {
|
|
242
|
+
if (signal?.aborted)
|
|
243
|
+
break;
|
|
244
|
+
const batch = toFetch.slice(i, i + BATCH_SIZE);
|
|
245
|
+
const settled = await Promise.allSettled(batch.map(async ({ result, subQuery }) => {
|
|
246
|
+
try {
|
|
247
|
+
const pr = await peel(result.url, {
|
|
248
|
+
format: 'markdown',
|
|
249
|
+
maxTokens: 2000,
|
|
250
|
+
timeout: 25_000,
|
|
251
|
+
render: false,
|
|
252
|
+
});
|
|
253
|
+
return { result, content: pr.content || '', subQuery };
|
|
254
|
+
}
|
|
255
|
+
catch (err) {
|
|
256
|
+
return {
|
|
257
|
+
result,
|
|
258
|
+
content: result.snippet || '',
|
|
259
|
+
subQuery,
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
}));
|
|
263
|
+
for (const outcome of settled) {
|
|
264
|
+
if (outcome.status === 'fulfilled') {
|
|
265
|
+
const src = outcome.value;
|
|
266
|
+
fetched.push({
|
|
267
|
+
...src,
|
|
268
|
+
relevanceScore: 0, // filled in step 4
|
|
269
|
+
credibility: getSourceCredibility(src.result.url),
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
return fetched;
|
|
275
|
+
}
|
|
276
|
+
// ---------------------------------------------------------------------------
|
|
277
|
+
// Step 4: Relevance Scoring
|
|
278
|
+
// ---------------------------------------------------------------------------
|
|
279
|
+
function scoreSources(sources, question) {
|
|
280
|
+
const queryTerms = question
|
|
281
|
+
.toLowerCase()
|
|
282
|
+
.replace(/[^\w\s]/g, ' ')
|
|
283
|
+
.split(/\s+/)
|
|
284
|
+
.filter((t) => t.length > 2);
|
|
285
|
+
return sources.map((source) => {
|
|
286
|
+
const content = source.content;
|
|
287
|
+
if (!content || queryTerms.length === 0) {
|
|
288
|
+
return { ...source, relevanceScore: 0 };
|
|
289
|
+
}
|
|
290
|
+
const blocks = splitIntoBlocks(content);
|
|
291
|
+
if (blocks.length === 0) {
|
|
292
|
+
return { ...source, relevanceScore: 0 };
|
|
293
|
+
}
|
|
294
|
+
const scores = scoreBM25(blocks, queryTerms);
|
|
295
|
+
// Weighted average by block length
|
|
296
|
+
const blockLens = blocks.map((b) => b.raw.length);
|
|
297
|
+
const totalLen = blockLens.reduce((s, l) => s + l, 0) || 1;
|
|
298
|
+
let weightedSum = 0;
|
|
299
|
+
for (let i = 0; i < scores.length; i++) {
|
|
300
|
+
weightedSum += scores[i] * (blockLens[i] / totalLen);
|
|
301
|
+
}
|
|
302
|
+
// Normalize to 0-1 using sigmoid
|
|
303
|
+
const perTerm = weightedSum / (queryTerms.length || 1);
|
|
304
|
+
const normalized = Math.max(0, Math.min(1, 2 / (1 + Math.exp(-perTerm * 8)) - 1));
|
|
305
|
+
return { ...source, relevanceScore: normalized };
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
async function detectGaps(question, sources, config, tokens, signal) {
|
|
309
|
+
// ── Heuristic pre-checks (no LLM call needed) ──────────────────────────
|
|
310
|
+
if (sources.length >= 3) {
|
|
311
|
+
// Heuristic 1: All sources from the same domain → need diversity
|
|
312
|
+
const domains = sources.map((s) => extractDomain(s.result.url));
|
|
313
|
+
const uniqueDomains = new Set(domains.filter((d) => d.length > 0));
|
|
314
|
+
if (uniqueDomains.size === 1) {
|
|
315
|
+
const soloDomain = [...uniqueDomains][0];
|
|
316
|
+
return {
|
|
317
|
+
hasEnoughInfo: false,
|
|
318
|
+
gaps: [
|
|
319
|
+
`All ${sources.length} sources are from the same domain (${soloDomain}). Diverse sources needed for reliable research.`,
|
|
320
|
+
],
|
|
321
|
+
additionalQueries: [
|
|
322
|
+
`${question} alternative perspectives`,
|
|
323
|
+
`${question} overview explanation`,
|
|
324
|
+
],
|
|
325
|
+
conflicts: [],
|
|
326
|
+
confidence: 'low',
|
|
327
|
+
};
|
|
328
|
+
}
|
|
329
|
+
// Heuristic 2: Question implies need for official docs but no official sources found
|
|
330
|
+
const hasOfficialSource = sources.some((s) => (s.credibility || getSourceCredibility(s.result.url)).tier === 'official');
|
|
331
|
+
const questionWantsOfficial = /\b(official|documentation|docs|policy|government|authority|academic|standards?|specification|rfc)\b/i.test(question);
|
|
332
|
+
if (!hasOfficialSource && questionWantsOfficial) {
|
|
333
|
+
return {
|
|
334
|
+
hasEnoughInfo: false,
|
|
335
|
+
gaps: ['No official or academic sources found. The question requires authoritative documentation.'],
|
|
336
|
+
additionalQueries: [
|
|
337
|
+
`${question} site:.gov OR site:.edu`,
|
|
338
|
+
`${question} official documentation`,
|
|
339
|
+
],
|
|
340
|
+
conflicts: [],
|
|
341
|
+
confidence: 'low',
|
|
342
|
+
};
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
// ── LLM-based gap + conflict detection ─────────────────────────────────
|
|
346
|
+
const topSources = sources
|
|
347
|
+
.sort((a, b) => b.relevanceScore - a.relevanceScore)
|
|
348
|
+
.slice(0, 8);
|
|
349
|
+
const contextSummary = topSources
|
|
350
|
+
.map((s, i) => {
|
|
351
|
+
const snippet = truncate(s.content || s.result.snippet || '', 800);
|
|
352
|
+
return `[${i + 1}] ${s.result.title}\nURL: ${s.result.url}\n${snippet}`;
|
|
353
|
+
})
|
|
354
|
+
.join('\n\n---\n\n');
|
|
355
|
+
const messages = [
|
|
356
|
+
{
|
|
357
|
+
role: 'system',
|
|
358
|
+
content: [
|
|
359
|
+
'You are a research quality assessor. Given a question and the sources collected so far,',
|
|
360
|
+
'determine if there is sufficient information to write a comprehensive answer.',
|
|
361
|
+
'Also detect any factual conflicts between sources.',
|
|
362
|
+
'',
|
|
363
|
+
'Respond in this EXACT JSON format (no markdown, no code blocks):',
|
|
364
|
+
'{',
|
|
365
|
+
' "hasEnoughInfo": boolean,',
|
|
366
|
+
' "gaps": ["gap1", "gap2"],',
|
|
367
|
+
' "additionalQueries": ["query1", "query2"],',
|
|
368
|
+
' "conflicts": ["Source A says X while Source B says Y"],',
|
|
369
|
+
' "confidence": "high" | "medium" | "low"',
|
|
370
|
+
'}',
|
|
371
|
+
'',
|
|
372
|
+
'"gaps" should be 0-3 specific aspects not covered by the sources.',
|
|
373
|
+
'"additionalQueries" should be 0-3 new search queries to fill those gaps.',
|
|
374
|
+
'"conflicts" should be 0-3 factual disagreements found between sources.',
|
|
375
|
+
'"confidence": high = consistent official sources, medium = mixed, low = conflicting or poor sources.',
|
|
376
|
+
'If hasEnoughInfo is true, set gaps and additionalQueries to empty arrays.',
|
|
377
|
+
].join('\n'),
|
|
378
|
+
},
|
|
379
|
+
{
|
|
380
|
+
role: 'user',
|
|
381
|
+
content: `Question: "${question}"\n\nSources collected:\n\n${contextSummary}\n\nAnalyze coverage, gaps, and conflicts:`,
|
|
382
|
+
},
|
|
383
|
+
];
|
|
384
|
+
let text;
|
|
385
|
+
try {
|
|
386
|
+
text = await callWithTracking(config, messages, tokens, {
|
|
387
|
+
signal,
|
|
388
|
+
maxTokens: 700,
|
|
389
|
+
});
|
|
390
|
+
}
|
|
391
|
+
catch (err) {
|
|
392
|
+
if (isFreeTierLimitError(err))
|
|
393
|
+
throw err;
|
|
394
|
+
// On LLM failure, assume we have enough info
|
|
395
|
+
return { hasEnoughInfo: true, gaps: [], additionalQueries: [], conflicts: [], confidence: 'medium' };
|
|
396
|
+
}
|
|
397
|
+
// Parse JSON response
|
|
398
|
+
try {
|
|
399
|
+
const cleaned = text
|
|
400
|
+
.replace(/```json\s*/gi, '')
|
|
401
|
+
.replace(/```\s*/g, '')
|
|
402
|
+
.trim();
|
|
403
|
+
const json = JSON.parse(cleaned);
|
|
404
|
+
return {
|
|
405
|
+
hasEnoughInfo: Boolean(json.hasEnoughInfo),
|
|
406
|
+
gaps: Array.isArray(json.gaps) ? json.gaps.slice(0, 3) : [],
|
|
407
|
+
additionalQueries: Array.isArray(json.additionalQueries)
|
|
408
|
+
? json.additionalQueries.slice(0, 3)
|
|
409
|
+
: [],
|
|
410
|
+
conflicts: Array.isArray(json.conflicts) ? json.conflicts.slice(0, 3) : [],
|
|
411
|
+
confidence: ['high', 'medium', 'low'].includes(String(json.confidence))
|
|
412
|
+
? json.confidence
|
|
413
|
+
: 'medium',
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
catch {
|
|
417
|
+
return { hasEnoughInfo: true, gaps: [], additionalQueries: [], conflicts: [], confidence: 'medium' };
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
/**
|
|
421
|
+
* Compute a verification summary from fetched sources and optional gap detection result.
|
|
422
|
+
* Used to emit the 'verification' progress event before synthesis.
|
|
423
|
+
*/
|
|
424
|
+
export function computeVerificationSummary(sources, gapResult) {
|
|
425
|
+
const credibilities = sources.map((s) => s.credibility || getSourceCredibility(s.result.url));
|
|
426
|
+
const officialCount = credibilities.filter((c) => c.tier === 'official').length;
|
|
427
|
+
const verifiedCount = credibilities.filter((c) => c.tier === 'verified').length;
|
|
428
|
+
const generalCount = credibilities.filter((c) => c.tier === 'general').length;
|
|
429
|
+
const total = sources.length || 1;
|
|
430
|
+
// Source diversity: at least 3 unique domains (or all are diverse if < 3 sources)
|
|
431
|
+
const domains = new Set(sources.map((s) => extractDomain(s.result.url)).filter((d) => d.length > 0));
|
|
432
|
+
const sourceDiversity = domains.size >= Math.min(3, total);
|
|
433
|
+
// Compute confidence from source quality
|
|
434
|
+
let confidence;
|
|
435
|
+
if (gapResult?.confidence) {
|
|
436
|
+
confidence = gapResult.confidence;
|
|
437
|
+
}
|
|
438
|
+
else {
|
|
439
|
+
const highQualityRatio = (officialCount + verifiedCount) / total;
|
|
440
|
+
if (officialCount >= 2 || highQualityRatio >= 0.5) {
|
|
441
|
+
confidence = 'high';
|
|
442
|
+
}
|
|
443
|
+
else if (verifiedCount >= 1 || highQualityRatio >= 0.25) {
|
|
444
|
+
confidence = 'medium';
|
|
445
|
+
}
|
|
446
|
+
else {
|
|
447
|
+
confidence = 'low';
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
const conflicts = gapResult?.conflicts ?? [];
|
|
451
|
+
return { conflicts, confidence, sourceDiversity, officialCount, verifiedCount, generalCount };
|
|
452
|
+
}
|
|
453
|
+
/**
|
|
454
|
+
* Compute a deterministic 0-100 quality score for the current research state.
|
|
455
|
+
*
|
|
456
|
+
* Dimensions:
|
|
457
|
+
* - Source diversity (0-20): unique domains vs total sources
|
|
458
|
+
* - Credibility mix (0-25): weighted score from official/verified/general
|
|
459
|
+
* - Coverage breadth (0-25): sub-queries with ≥2 relevant sources
|
|
460
|
+
* - Conflict resolution(0-15): whether conflicts are detected and addressed
|
|
461
|
+
* - Recency (0-15): bonus for sources with recent year patterns
|
|
462
|
+
*/
|
|
463
|
+
export function scoreResearchQuality(sources, _question, gapResult) {
|
|
464
|
+
const suggestions = [];
|
|
465
|
+
if (sources.length === 0) {
|
|
466
|
+
return {
|
|
467
|
+
score: 0,
|
|
468
|
+
breakdown: {
|
|
469
|
+
sourceDiversity: 0,
|
|
470
|
+
credibilityMix: 0,
|
|
471
|
+
coverageBreadth: 0,
|
|
472
|
+
conflictResolution: 0,
|
|
473
|
+
recency: 0,
|
|
474
|
+
},
|
|
475
|
+
suggestions: ['No sources found — try broader search queries.'],
|
|
476
|
+
};
|
|
477
|
+
}
|
|
478
|
+
// ── Source diversity (0-20) ─────────────────────────────────────────────
|
|
479
|
+
const domains = new Set(sources.map((s) => extractDomain(s.result.url)).filter((d) => d.length > 0));
|
|
480
|
+
const uniqueDomainCount = domains.size;
|
|
481
|
+
let sourceDiversity;
|
|
482
|
+
if (uniqueDomainCount >= 5) {
|
|
483
|
+
sourceDiversity = 20;
|
|
484
|
+
}
|
|
485
|
+
else if (uniqueDomainCount >= 4) {
|
|
486
|
+
sourceDiversity = 16;
|
|
487
|
+
}
|
|
488
|
+
else if (uniqueDomainCount >= 3) {
|
|
489
|
+
sourceDiversity = 12;
|
|
490
|
+
}
|
|
491
|
+
else if (uniqueDomainCount >= 2) {
|
|
492
|
+
sourceDiversity = 8;
|
|
493
|
+
}
|
|
494
|
+
else {
|
|
495
|
+
sourceDiversity = 5;
|
|
496
|
+
}
|
|
497
|
+
if (uniqueDomainCount < 3) {
|
|
498
|
+
suggestions.push(`Low source diversity (${uniqueDomainCount} unique domains) — search for alternative perspectives.`);
|
|
499
|
+
}
|
|
500
|
+
// ── Credibility mix (0-25) ──────────────────────────────────────────────
|
|
501
|
+
const credibilities = sources.map((s) => s.credibility || getSourceCredibility(s.result.url));
|
|
502
|
+
const officialCount = credibilities.filter((c) => c.tier === 'official').length;
|
|
503
|
+
const verifiedCount = credibilities.filter((c) => c.tier === 'verified').length;
|
|
504
|
+
const generalCount = credibilities.filter((c) => c.tier === 'general').length;
|
|
505
|
+
// Weighted score: official=25, verified=15, general=5, normalize to 0-25
|
|
506
|
+
const rawCredScore = officialCount * 25 + verifiedCount * 15 + generalCount * 5;
|
|
507
|
+
const maxPossibleCred = sources.length * 25;
|
|
508
|
+
const credibilityMix = maxPossibleCred > 0
|
|
509
|
+
? Math.round((rawCredScore / maxPossibleCred) * 25)
|
|
510
|
+
: 0;
|
|
511
|
+
if (officialCount === 0) {
|
|
512
|
+
suggestions.push('No official sources found — search for .gov, .edu, or official documentation.');
|
|
513
|
+
}
|
|
514
|
+
// ── Coverage breadth (0-25) ─────────────────────────────────────────────
|
|
515
|
+
// Group sources by sub-query, count sub-queries with ≥2 relevant sources
|
|
516
|
+
const subQueryMap = new Map();
|
|
517
|
+
for (const s of sources) {
|
|
518
|
+
if (s.relevanceScore > 0.3) {
|
|
519
|
+
const key = s.subQuery.toLowerCase();
|
|
520
|
+
subQueryMap.set(key, (subQueryMap.get(key) || 0) + 1);
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
const allSubQueries = new Set(sources.map((s) => s.subQuery.toLowerCase()));
|
|
524
|
+
const totalSubQueries = allSubQueries.size || 1;
|
|
525
|
+
const coveredSubQueries = [...subQueryMap.values()].filter((count) => count >= 2).length;
|
|
526
|
+
const coverageBreadth = Math.round((coveredSubQueries / totalSubQueries) * 25);
|
|
527
|
+
const uncoveredCount = totalSubQueries - coveredSubQueries;
|
|
528
|
+
if (uncoveredCount > 0) {
|
|
529
|
+
suggestions.push(`${uncoveredCount} sub-queries lack sufficient relevant sources — consider targeted searches.`);
|
|
530
|
+
}
|
|
531
|
+
// ── Conflict resolution (0-15) ──────────────────────────────────────────
|
|
532
|
+
const conflicts = gapResult.conflicts ?? [];
|
|
533
|
+
const resolvedConflicts = gapResult.conflictsResolved ?? [];
|
|
534
|
+
let conflictResolution;
|
|
535
|
+
if (conflicts.length === 0) {
|
|
536
|
+
// No conflicts detected — neutral score
|
|
537
|
+
conflictResolution = 10;
|
|
538
|
+
}
|
|
539
|
+
else if (resolvedConflicts.length >= conflicts.length) {
|
|
540
|
+
// All conflicts addressed
|
|
541
|
+
conflictResolution = 15;
|
|
542
|
+
}
|
|
543
|
+
else if (resolvedConflicts.length > 0) {
|
|
544
|
+
// Some conflicts addressed
|
|
545
|
+
conflictResolution = 10;
|
|
546
|
+
}
|
|
547
|
+
else {
|
|
548
|
+
// Conflicts detected but none addressed
|
|
549
|
+
conflictResolution = 5;
|
|
550
|
+
suggestions.push(`${conflicts.length} source conflict(s) remain unresolved — search for fact-checking sources.`);
|
|
551
|
+
}
|
|
552
|
+
// ── Recency (0-15) ─────────────────────────────────────────────────────
|
|
553
|
+
const currentYear = new Date().getFullYear();
|
|
554
|
+
const recentYears = new Set([currentYear, currentYear - 1].map(String));
|
|
555
|
+
let recentCount = 0;
|
|
556
|
+
for (const s of sources) {
|
|
557
|
+
const text = (s.content || '') + ' ' + (s.result.title || '') + ' ' + (s.result.snippet || '');
|
|
558
|
+
// Check if any recent year pattern appears
|
|
559
|
+
for (const year of recentYears) {
|
|
560
|
+
if (text.includes(year)) {
|
|
561
|
+
recentCount++;
|
|
562
|
+
break;
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
const recentRatio = recentCount / sources.length;
|
|
567
|
+
const recency = Math.round(recentRatio * 15);
|
|
568
|
+
if (recentRatio < 0.3) {
|
|
569
|
+
suggestions.push('Few recent sources found — consider adding date-specific search queries.');
|
|
570
|
+
}
|
|
571
|
+
const score = clamp(sourceDiversity + credibilityMix + coverageBreadth + conflictResolution + recency, 0, 100);
|
|
572
|
+
return {
|
|
573
|
+
score,
|
|
574
|
+
breakdown: {
|
|
575
|
+
sourceDiversity,
|
|
576
|
+
credibilityMix,
|
|
577
|
+
coverageBreadth,
|
|
578
|
+
conflictResolution,
|
|
579
|
+
recency,
|
|
580
|
+
},
|
|
581
|
+
suggestions,
|
|
582
|
+
};
|
|
583
|
+
}
|
|
584
|
+
// ---------------------------------------------------------------------------
|
|
585
|
+
// Step 7: Synthesis
|
|
586
|
+
// ---------------------------------------------------------------------------
|
|
587
|
+
async function synthesizeReport(question, sources, config, tokens, opts) {
|
|
588
|
+
// Sort by credibility tier first (official > verified > general), then by relevance
|
|
589
|
+
const tierOrder = { official: 0, verified: 1, general: 2 };
|
|
590
|
+
const topSources = sources
|
|
591
|
+
.map((s) => ({ ...s, credibility: s.credibility || getSourceCredibility(s.result.url) }))
|
|
592
|
+
.sort((a, b) => {
|
|
593
|
+
const tierDiff = (tierOrder[a.credibility.tier] ?? 2) - (tierOrder[b.credibility.tier] ?? 2);
|
|
594
|
+
if (tierDiff !== 0)
|
|
595
|
+
return tierDiff;
|
|
596
|
+
return b.relevanceScore - a.relevanceScore;
|
|
597
|
+
})
|
|
598
|
+
.slice(0, 15);
|
|
599
|
+
// Use selective evidence to pick the best blocks across all sources
|
|
600
|
+
// (AttnRes-inspired: query-aware, credibility-weighted, domain-diverse)
|
|
601
|
+
const evidenceSources = topSources.map(s => ({
|
|
602
|
+
url: s.result.url,
|
|
603
|
+
title: s.result.title,
|
|
604
|
+
content: s.content || s.result.snippet || '',
|
|
605
|
+
snippet: s.result.snippet,
|
|
606
|
+
}));
|
|
607
|
+
const evidenceResult = selectEvidence({
|
|
608
|
+
query: question,
|
|
609
|
+
sources: evidenceSources,
|
|
610
|
+
maxBlocks: 20,
|
|
611
|
+
maxChars: 12000,
|
|
612
|
+
});
|
|
613
|
+
// Build context from selected evidence, grouped by source with credibility labels
|
|
614
|
+
const contextParts = [];
|
|
615
|
+
const citations = [];
|
|
616
|
+
// Group selected blocks by source URL to maintain source-level structure
|
|
617
|
+
const blocksBySource = new Map();
|
|
618
|
+
for (const block of evidenceResult.blocks) {
|
|
619
|
+
if (!blocksBySource.has(block.sourceUrl))
|
|
620
|
+
blocksBySource.set(block.sourceUrl, []);
|
|
621
|
+
blocksBySource.get(block.sourceUrl).push(block.text);
|
|
622
|
+
}
|
|
623
|
+
// Build context in credibility-sorted order from topSources
|
|
624
|
+
let idx = 0;
|
|
625
|
+
for (const source of topSources) {
|
|
626
|
+
const blocks = blocksBySource.get(source.result.url);
|
|
627
|
+
if (!blocks || blocks.length === 0)
|
|
628
|
+
continue;
|
|
629
|
+
idx++;
|
|
630
|
+
const cred = source.credibility;
|
|
631
|
+
const stars = starsString(cred.stars);
|
|
632
|
+
const sanitized = sanitizeForLLM(blocks.join('\n\n'));
|
|
633
|
+
contextParts.push([
|
|
634
|
+
`SOURCE [${idx}] ${stars}`,
|
|
635
|
+
`Title: ${source.result.title}`,
|
|
636
|
+
`URL: ${source.result.url}`,
|
|
637
|
+
`Credibility: ${cred.label}`,
|
|
638
|
+
'',
|
|
639
|
+
sanitized.content,
|
|
640
|
+
].join('\n'));
|
|
641
|
+
citations.push({
|
|
642
|
+
index: idx,
|
|
643
|
+
title: source.result.title,
|
|
644
|
+
url: source.result.url,
|
|
645
|
+
snippet: source.result.snippet || '',
|
|
646
|
+
relevanceScore: source.relevanceScore,
|
|
647
|
+
});
|
|
648
|
+
}
|
|
649
|
+
const context = contextParts.join('\n\n---\n\n');
|
|
650
|
+
const messages = [
|
|
651
|
+
{
|
|
652
|
+
role: 'system',
|
|
653
|
+
content: [
|
|
654
|
+
'You are a research analyst that writes comprehensive, well-cited reports.',
|
|
655
|
+
'Each source is rated by credibility:',
|
|
656
|
+
' ★★★ = OFFICIAL SOURCE (government, academic, official docs) — highest authority',
|
|
657
|
+
' ★★☆ = VERIFIED (reputable news, Wikipedia, major developer platforms)',
|
|
658
|
+
' ★☆☆ = UNVERIFIED (blogs, forums, unknown sites) — use with caution',
|
|
659
|
+
'',
|
|
660
|
+
'Rules:',
|
|
661
|
+
' - Prioritize official sources [★★★] over unverified ones [★☆☆]',
|
|
662
|
+
' - If sources disagree, note the conflict and trust the higher-credibility source',
|
|
663
|
+
' - Cite every factual claim with [1], [2], etc.',
|
|
664
|
+
' - Use ONLY the provided sources — do not fabricate information or citations',
|
|
665
|
+
' - Structure your report with:',
|
|
666
|
+
' • Executive Summary',
|
|
667
|
+
' • Key Findings (with citations)',
|
|
668
|
+
' • Detailed Analysis',
|
|
669
|
+
' • Conclusion',
|
|
670
|
+
' - End with: **Confidence: HIGH/MEDIUM/LOW** based on source quality and agreement',
|
|
671
|
+
].join('\n'),
|
|
672
|
+
},
|
|
673
|
+
{
|
|
674
|
+
role: 'user',
|
|
675
|
+
content: `Research question: "${question}"\n\nSources (ranked by credibility):\n\n${context}\n\nWrite a comprehensive research report with citations:`,
|
|
676
|
+
},
|
|
677
|
+
];
|
|
678
|
+
const report = await callWithTracking(config, messages, tokens, {
|
|
679
|
+
stream: opts.stream,
|
|
680
|
+
onChunk: opts.onChunk,
|
|
681
|
+
signal: opts.signal,
|
|
682
|
+
maxTokens: 4096,
|
|
683
|
+
});
|
|
684
|
+
return { report, citations };
|
|
685
|
+
}
|
|
686
|
+
// ---------------------------------------------------------------------------
|
|
687
|
+
// Main: runDeepResearch
|
|
688
|
+
// ---------------------------------------------------------------------------
|
|
689
|
+
/**
|
|
690
|
+
* Run a deep research session.
|
|
691
|
+
*
|
|
692
|
+
* Orchestrates query decomposition → multi-search → source fetching →
|
|
693
|
+
* relevance scoring → gap detection → re-search loop → synthesis.
|
|
694
|
+
*/
|
|
695
|
+
export async function runDeepResearch(req) {
|
|
696
|
+
const startTime = Date.now();
|
|
697
|
+
const question = (req.question || '').trim();
|
|
698
|
+
if (!question)
|
|
699
|
+
throw new Error('Missing or invalid "question"');
|
|
700
|
+
if (question.length > 5000)
|
|
701
|
+
throw new Error('Question too long (max 5000 characters)');
|
|
702
|
+
const maxRounds = clamp(req.maxRounds ?? 3, 1, 5);
|
|
703
|
+
const maxSources = clamp(req.maxSources ?? 20, 5, 30);
|
|
704
|
+
const config = req.llm ?? getDefaultLLMConfig();
|
|
705
|
+
const tokens = { input: 0, output: 0 };
|
|
706
|
+
let totalSearchQueries = 0;
|
|
707
|
+
let roundsCompleted = 0;
|
|
708
|
+
const progress = (event) => {
|
|
709
|
+
req.onProgress?.(event);
|
|
710
|
+
};
|
|
711
|
+
// ── Round tracking ────────────────────────────────────────────────────────
|
|
712
|
+
// All fetched sources across all rounds, deduplicated by URL
|
|
713
|
+
const allSources = [];
|
|
714
|
+
const seenUrls = new Set();
|
|
715
|
+
const usedQueries = new Set();
|
|
716
|
+
let lastGapResult;
|
|
717
|
+
let lastQualityScore;
|
|
718
|
+
// Track all conflicts found and resolved across rounds
|
|
719
|
+
const allConflictsFound = [];
|
|
720
|
+
const allConflictsResolved = [];
|
|
721
|
+
// ── Round 0..maxRounds ────────────────────────────────────────────────────
|
|
722
|
+
let currentQueries = [];
|
|
723
|
+
for (let round = 0; round < maxRounds; round++) {
|
|
724
|
+
if (req.signal?.aborted)
|
|
725
|
+
break;
|
|
726
|
+
if (round === 0) {
|
|
727
|
+
// Step 1: Query Decomposition
|
|
728
|
+
progress({ type: 'decomposing', message: 'Decomposing question into sub-queries…', round });
|
|
729
|
+
try {
|
|
730
|
+
currentQueries = await decomposeQuery(question, config, tokens, req.signal);
|
|
731
|
+
}
|
|
732
|
+
catch (err) {
|
|
733
|
+
if (isFreeTierLimitError(err))
|
|
734
|
+
throw err;
|
|
735
|
+
// Fallback: just use the original question
|
|
736
|
+
currentQueries = [question];
|
|
737
|
+
}
|
|
738
|
+
}
|
|
739
|
+
// Filter out already-used queries
|
|
740
|
+
const newQueries = currentQueries.filter((q) => !usedQueries.has(q.toLowerCase()));
|
|
741
|
+
if (newQueries.length === 0)
|
|
742
|
+
break;
|
|
743
|
+
for (const q of newQueries) {
|
|
744
|
+
usedQueries.add(q.toLowerCase());
|
|
745
|
+
}
|
|
746
|
+
totalSearchQueries += newQueries.length;
|
|
747
|
+
// Step 2: Multi-Search
|
|
748
|
+
progress({
|
|
749
|
+
type: 'searching',
|
|
750
|
+
message: `Searching ${newQueries.length} queries (round ${round + 1})…`,
|
|
751
|
+
round,
|
|
752
|
+
data: { queries: newQueries },
|
|
753
|
+
});
|
|
754
|
+
const searchResults = await searchAll(newQueries, req.signal);
|
|
755
|
+
// Step 3: Source Fetching
|
|
756
|
+
const newResultCount = [...searchResults.values()].reduce((s, r) => s + r.length, 0);
|
|
757
|
+
progress({
|
|
758
|
+
type: 'fetching',
|
|
759
|
+
message: `Fetching content from up to ${Math.min(newResultCount, maxSources)} sources…`,
|
|
760
|
+
round,
|
|
761
|
+
});
|
|
762
|
+
const roundSources = await fetchSources(searchResults, maxSources, req.signal);
|
|
763
|
+
// Deduplicate against already-fetched sources
|
|
764
|
+
const newSources = roundSources.filter((s) => {
|
|
765
|
+
const key = normalizeUrl(s.result.url);
|
|
766
|
+
if (seenUrls.has(key))
|
|
767
|
+
return false;
|
|
768
|
+
seenUrls.add(key);
|
|
769
|
+
return true;
|
|
770
|
+
});
|
|
771
|
+
// Step 4: Relevance Scoring
|
|
772
|
+
progress({ type: 'scoring', message: 'Scoring source relevance…', round });
|
|
773
|
+
const scored = scoreSources(newSources, question);
|
|
774
|
+
// ── Quality-aware keep/discard logic ──────────────────────────────────
|
|
775
|
+
// Measure quality BEFORE adding new sources (baseline)
|
|
776
|
+
const preScore = allSources.length > 0 && lastGapResult
|
|
777
|
+
? scoreResearchQuality(allSources, question, lastGapResult).score
|
|
778
|
+
: 0;
|
|
779
|
+
// Tentatively add new sources
|
|
780
|
+
allSources.push(...scored);
|
|
781
|
+
roundsCompleted = round + 1;
|
|
782
|
+
// Don't do gap detection after the last round
|
|
783
|
+
if (round >= maxRounds - 1)
|
|
784
|
+
break;
|
|
785
|
+
// Step 5: Gap Detection
|
|
786
|
+
progress({
|
|
787
|
+
type: 'gap_check',
|
|
788
|
+
message: 'Checking research coverage for gaps…',
|
|
789
|
+
round,
|
|
790
|
+
});
|
|
791
|
+
let gapResult;
|
|
792
|
+
try {
|
|
793
|
+
gapResult = await detectGaps(question, allSources, config, tokens, req.signal);
|
|
794
|
+
}
|
|
795
|
+
catch (err) {
|
|
796
|
+
if (isFreeTierLimitError(err))
|
|
797
|
+
throw err;
|
|
798
|
+
break;
|
|
799
|
+
}
|
|
800
|
+
// Track conflicts across rounds
|
|
801
|
+
if (gapResult.conflicts && gapResult.conflicts.length > 0) {
|
|
802
|
+
for (const conflict of gapResult.conflicts) {
|
|
803
|
+
if (!allConflictsFound.includes(conflict)) {
|
|
804
|
+
allConflictsFound.push(conflict);
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
// Check if previous conflicts are now resolved (new sources address them)
|
|
809
|
+
if (round > 0 && lastGapResult?.conflicts) {
|
|
810
|
+
const previousConflicts = lastGapResult.conflicts;
|
|
811
|
+
const currentConflicts = gapResult.conflicts ?? [];
|
|
812
|
+
for (const prev of previousConflicts) {
|
|
813
|
+
// A conflict is "resolved" if it no longer appears in the current round's conflicts
|
|
814
|
+
if (!currentConflicts.includes(prev) && !allConflictsResolved.includes(prev)) {
|
|
815
|
+
allConflictsResolved.push(prev);
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
}
|
|
819
|
+
// Propagate resolved conflicts into gap result for quality scoring
|
|
820
|
+
gapResult.conflictsResolved = [...allConflictsResolved];
|
|
821
|
+
lastGapResult = gapResult;
|
|
822
|
+
// ── Quality scoring after gap detection ───────────────────────────────
|
|
823
|
+
const qualityResult = scoreResearchQuality(allSources, question, gapResult);
|
|
824
|
+
lastQualityScore = qualityResult;
|
|
825
|
+
// Emit quality_check progress event
|
|
826
|
+
progress({
|
|
827
|
+
type: 'quality_check',
|
|
828
|
+
message: `Round ${round + 1} quality: ${qualityResult.score}/100`,
|
|
829
|
+
round,
|
|
830
|
+
data: {
|
|
831
|
+
score: qualityResult.score,
|
|
832
|
+
breakdown: qualityResult.breakdown,
|
|
833
|
+
suggestions: qualityResult.suggestions,
|
|
834
|
+
},
|
|
835
|
+
});
|
|
836
|
+
// Keep/discard: if new sources DECREASED the quality score, discard them
|
|
837
|
+
if (round > 0 && preScore > 0 && qualityResult.score < preScore && scored.length > 0) {
|
|
838
|
+
// Remove the newly added sources
|
|
839
|
+
for (const s of scored) {
|
|
840
|
+
const idx = allSources.indexOf(s);
|
|
841
|
+
if (idx !== -1) {
|
|
842
|
+
allSources.splice(idx, 1);
|
|
843
|
+
// Also remove from seenUrls so they could be re-fetched later if needed
|
|
844
|
+
seenUrls.delete(normalizeUrl(s.result.url));
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
// Re-score without the discarded sources and use as the authoritative score
|
|
848
|
+
const reScored = scoreResearchQuality(allSources, question, gapResult);
|
|
849
|
+
lastQualityScore = reScored;
|
|
850
|
+
qualityResult.score = reScored.score;
|
|
851
|
+
qualityResult.breakdown = reScored.breakdown;
|
|
852
|
+
qualityResult.suggestions = reScored.suggestions;
|
|
853
|
+
}
|
|
854
|
+
// Early termination: score >= 85 AND hasEnoughInfo → stop
|
|
855
|
+
if (qualityResult.score >= 85 && gapResult.hasEnoughInfo) {
|
|
856
|
+
break;
|
|
857
|
+
}
|
|
858
|
+
if (gapResult.hasEnoughInfo || gapResult.additionalQueries.length === 0) {
|
|
859
|
+
break;
|
|
860
|
+
}
|
|
861
|
+
// Step 6: Re-Search Loop — combine gap queries with quality suggestions
|
|
862
|
+
// Generate conflict-specific fact-check queries
|
|
863
|
+
const conflictQueries = [];
|
|
864
|
+
if (gapResult.conflicts && gapResult.conflicts.length > 0) {
|
|
865
|
+
for (const conflict of gapResult.conflicts) {
|
|
866
|
+
// Extract the topic from the conflict description for a fact-check query
|
|
867
|
+
const shortConflict = conflict.length > 80 ? conflict.slice(0, 80) : conflict;
|
|
868
|
+
conflictQueries.push(`${question} fact check ${shortConflict}`);
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
// Merge: gap detection queries + quality suggestions + conflict queries (deduplicated)
|
|
872
|
+
const suggestionQueries = qualityResult.suggestions
|
|
873
|
+
.filter((s) => s.includes('\u2014'))
|
|
874
|
+
.map((s) => {
|
|
875
|
+
// Convert suggestion like "No official sources found — search for .gov..." into a search query
|
|
876
|
+
const afterDash = s.split('\u2014')[1]?.trim();
|
|
877
|
+
if (afterDash && afterDash.length > 10 && afterDash.length < 200) {
|
|
878
|
+
return `${question} ${afterDash.replace(/^search for\s*/i, '')}`;
|
|
879
|
+
}
|
|
880
|
+
return '';
|
|
881
|
+
})
|
|
882
|
+
.filter((q) => q.length > 0);
|
|
883
|
+
const allFollowUpQueries = [
|
|
884
|
+
...gapResult.additionalQueries,
|
|
885
|
+
...conflictQueries.slice(0, 2),
|
|
886
|
+
...suggestionQueries.slice(0, 2),
|
|
887
|
+
];
|
|
888
|
+
// Deduplicate
|
|
889
|
+
const seenQ = new Set();
|
|
890
|
+
const dedupedFollowUp = [];
|
|
891
|
+
for (const q of allFollowUpQueries) {
|
|
892
|
+
const key = q.toLowerCase();
|
|
893
|
+
if (!seenQ.has(key) && !usedQueries.has(key)) {
|
|
894
|
+
seenQ.add(key);
|
|
895
|
+
dedupedFollowUp.push(q);
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
if (dedupedFollowUp.length === 0)
|
|
899
|
+
break;
|
|
900
|
+
progress({
|
|
901
|
+
type: 'researching',
|
|
902
|
+
message: `Found ${dedupedFollowUp.length} gaps — searching more…`,
|
|
903
|
+
round,
|
|
904
|
+
data: { additionalQueries: dedupedFollowUp },
|
|
905
|
+
});
|
|
906
|
+
currentQueries = dedupedFollowUp;
|
|
907
|
+
}
|
|
908
|
+
// ── Final quality score (compute if not yet available) ────────────────────
|
|
909
|
+
const finalGap = lastGapResult ?? {
|
|
910
|
+
hasEnoughInfo: true,
|
|
911
|
+
gaps: [],
|
|
912
|
+
additionalQueries: [],
|
|
913
|
+
conflicts: [],
|
|
914
|
+
conflictsResolved: [...allConflictsResolved],
|
|
915
|
+
};
|
|
916
|
+
const finalQuality = lastQualityScore ?? scoreResearchQuality(allSources, question, finalGap);
|
|
917
|
+
// Verification summary (emitted before synthesis so streaming clients can show status)
|
|
918
|
+
const verifySummary = computeVerificationSummary(allSources, lastGapResult);
|
|
919
|
+
progress({
|
|
920
|
+
type: 'verification',
|
|
921
|
+
message: `Verification complete — confidence: ${verifySummary.confidence.toUpperCase()}`,
|
|
922
|
+
data: {
|
|
923
|
+
conflicts: verifySummary.conflicts,
|
|
924
|
+
confidence: verifySummary.confidence,
|
|
925
|
+
sourceDiversity: verifySummary.sourceDiversity,
|
|
926
|
+
officialCount: verifySummary.officialCount,
|
|
927
|
+
verifiedCount: verifySummary.verifiedCount,
|
|
928
|
+
generalCount: verifySummary.generalCount,
|
|
929
|
+
},
|
|
930
|
+
});
|
|
931
|
+
// Step 7: Synthesis
|
|
932
|
+
progress({ type: 'synthesizing', message: 'Synthesizing research report…' });
|
|
933
|
+
// Sort all sources by relevance for synthesis
|
|
934
|
+
const sortedSources = allSources.sort((a, b) => b.relevanceScore - a.relevanceScore);
|
|
935
|
+
const { report, citations } = await synthesizeReport(question, sortedSources, config, tokens, {
|
|
936
|
+
stream: req.stream,
|
|
937
|
+
onChunk: req.onChunk,
|
|
938
|
+
signal: req.signal,
|
|
939
|
+
});
|
|
940
|
+
// Quality floor warning: if score < 40, prepend a warning to the report
|
|
941
|
+
let finalReport = report;
|
|
942
|
+
if (finalQuality.score < 40) {
|
|
943
|
+
const warning = [
|
|
944
|
+
'> \u26A0\uFE0F **Low Research Quality Warning** (Score: ' + finalQuality.score + '/100)',
|
|
945
|
+
'> The sources gathered for this report may be insufficient, lack credibility,',
|
|
946
|
+
'> or have unresolved conflicts. Please verify key claims independently.',
|
|
947
|
+
'',
|
|
948
|
+
'',
|
|
949
|
+
].join('\n');
|
|
950
|
+
finalReport = warning + report;
|
|
951
|
+
}
|
|
952
|
+
const elapsed = Date.now() - startTime;
|
|
953
|
+
progress({
|
|
954
|
+
type: 'done',
|
|
955
|
+
message: `Research complete in ${(elapsed / 1000).toFixed(1)}s`,
|
|
956
|
+
data: { sourcesUsed: citations.length, roundsCompleted, totalSearchQueries },
|
|
957
|
+
});
|
|
958
|
+
return {
|
|
959
|
+
report: finalReport,
|
|
960
|
+
citations,
|
|
961
|
+
sourcesUsed: citations.length,
|
|
962
|
+
roundsCompleted,
|
|
963
|
+
totalSearchQueries,
|
|
964
|
+
llmProvider: config.provider,
|
|
965
|
+
tokensUsed: tokens,
|
|
966
|
+
elapsed,
|
|
967
|
+
qualityScore: finalQuality.score,
|
|
968
|
+
qualityBreakdown: finalQuality.breakdown,
|
|
969
|
+
conflictsFound: allConflictsFound.length > 0 ? allConflictsFound : undefined,
|
|
970
|
+
conflictsResolved: allConflictsResolved.length > 0 ? allConflictsResolved : undefined,
|
|
971
|
+
};
|
|
972
|
+
}
|