@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-source verification — search multiple engines, compare results,
|
|
3
|
+
* compute consensus/confidence scores.
|
|
4
|
+
*/
|
|
5
|
+
export async function crossVerifySearch(query, options) {
|
|
6
|
+
const engines = options?.engines || ['duckduckgo', 'google', 'baidu'];
|
|
7
|
+
const count = options?.count || 10;
|
|
8
|
+
const t0 = Date.now();
|
|
9
|
+
// Import providers dynamically to avoid circular deps
|
|
10
|
+
const { getSearchProvider } = await import('./search-provider.js');
|
|
11
|
+
const { BaiduSearchProvider, YandexSearchProvider, NaverSearchProvider, YahooJapanSearchProvider } = await import('./search-engines.js');
|
|
12
|
+
// Search all engines in parallel
|
|
13
|
+
const searchPromises = engines.map(async (engineId) => {
|
|
14
|
+
try {
|
|
15
|
+
let provider;
|
|
16
|
+
if (engineId === 'baidu')
|
|
17
|
+
provider = new BaiduSearchProvider();
|
|
18
|
+
else if (engineId === 'yandex')
|
|
19
|
+
provider = new YandexSearchProvider();
|
|
20
|
+
else if (engineId === 'naver')
|
|
21
|
+
provider = new NaverSearchProvider();
|
|
22
|
+
else if (engineId === 'yahoo_japan')
|
|
23
|
+
provider = new YahooJapanSearchProvider();
|
|
24
|
+
else
|
|
25
|
+
provider = getSearchProvider(engineId);
|
|
26
|
+
const results = await Promise.race([
|
|
27
|
+
provider.searchWeb(query, { count }),
|
|
28
|
+
new Promise((_, rej) => setTimeout(() => rej(new Error('timeout')), 10000)),
|
|
29
|
+
]);
|
|
30
|
+
return { engine: engineId, resultCount: results.length, topResults: results.slice(0, count) };
|
|
31
|
+
}
|
|
32
|
+
catch {
|
|
33
|
+
return { engine: engineId, resultCount: 0, topResults: [] };
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
const sources = await Promise.all(searchPromises);
|
|
37
|
+
// Build consensus — find URLs that appear across multiple engines
|
|
38
|
+
const urlMap = new Map();
|
|
39
|
+
for (const source of sources) {
|
|
40
|
+
for (let i = 0; i < source.topResults.length; i++) {
|
|
41
|
+
const r = source.topResults[i];
|
|
42
|
+
// Normalize URL for comparison (strip www, trailing slash, query params)
|
|
43
|
+
const normalizedUrl = normalizeUrl(r.url);
|
|
44
|
+
const existing = urlMap.get(normalizedUrl);
|
|
45
|
+
if (existing) {
|
|
46
|
+
existing.engines.push(source.engine);
|
|
47
|
+
existing.positions.push(i + 1);
|
|
48
|
+
}
|
|
49
|
+
else {
|
|
50
|
+
urlMap.set(normalizedUrl, {
|
|
51
|
+
title: r.title,
|
|
52
|
+
engines: [source.engine],
|
|
53
|
+
positions: [i + 1],
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
const activeSources = sources.filter(s => s.resultCount > 0);
|
|
59
|
+
// Sort by agreement (most engines first), then by average position
|
|
60
|
+
const consensus = [...urlMap.entries()]
|
|
61
|
+
.map(([url, data]) => ({
|
|
62
|
+
url,
|
|
63
|
+
title: data.title,
|
|
64
|
+
appearsIn: data.engines,
|
|
65
|
+
agreementScore: activeSources.length > 0
|
|
66
|
+
? data.engines.length / activeSources.length
|
|
67
|
+
: 0,
|
|
68
|
+
averagePosition: data.positions.reduce((a, b) => a + b, 0) / data.positions.length,
|
|
69
|
+
}))
|
|
70
|
+
.sort((a, b) => b.agreementScore - a.agreementScore || a.averagePosition - b.averagePosition);
|
|
71
|
+
// Overall confidence = average agreement of top 5 results
|
|
72
|
+
const top5Agreement = consensus.slice(0, 5);
|
|
73
|
+
const confidence = top5Agreement.length > 0
|
|
74
|
+
? top5Agreement.reduce((sum, r) => sum + r.agreementScore, 0) / top5Agreement.length
|
|
75
|
+
: 0;
|
|
76
|
+
return {
|
|
77
|
+
query,
|
|
78
|
+
sources,
|
|
79
|
+
consensus,
|
|
80
|
+
confidence: Math.round(confidence * 100) / 100,
|
|
81
|
+
totalSources: activeSources.length,
|
|
82
|
+
elapsed: Date.now() - t0,
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
function normalizeUrl(url) {
|
|
86
|
+
try {
|
|
87
|
+
const u = new URL(url);
|
|
88
|
+
return u.hostname.replace(/^www\./, '') + u.pathname.replace(/\/$/, '');
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
return url;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deep Fetch — Web Intelligence Module
|
|
3
|
+
*
|
|
4
|
+
* Transforms "search + fetch" into "search + fetch + synthesize + structure".
|
|
5
|
+
* No LLM required — pure heuristic signal extraction, BM25 relevance scoring,
|
|
6
|
+
* deduplication, entity/number extraction, and comparison detection.
|
|
7
|
+
*/
|
|
8
|
+
export interface DeepFetchOptions {
|
|
9
|
+
query: string;
|
|
10
|
+
/** Number of sources to fetch (default: 5, max: 10) */
|
|
11
|
+
count?: number;
|
|
12
|
+
/** Output format (default: 'merged') */
|
|
13
|
+
format?: 'merged' | 'structured' | 'comparison';
|
|
14
|
+
/** Approximate max characters in merged output (default: 32000 ≈ 8k tokens) */
|
|
15
|
+
maxChars?: number;
|
|
16
|
+
/** Minimum BM25 relevance score (0-1) to include a source (default: 0.05) */
|
|
17
|
+
relevanceThreshold?: number;
|
|
18
|
+
}
|
|
19
|
+
export interface SourceResult {
|
|
20
|
+
url: string;
|
|
21
|
+
title: string;
|
|
22
|
+
relevanceScore: number;
|
|
23
|
+
keyPoints: string[];
|
|
24
|
+
fetchedAt: string;
|
|
25
|
+
}
|
|
26
|
+
export interface StructuredData {
|
|
27
|
+
facts: string[];
|
|
28
|
+
entities: string[];
|
|
29
|
+
dates: string[];
|
|
30
|
+
numbers: Record<string, string>;
|
|
31
|
+
}
|
|
32
|
+
export interface ComparisonData {
|
|
33
|
+
columns: string[];
|
|
34
|
+
rows: Record<string, Record<string, string>>;
|
|
35
|
+
}
|
|
36
|
+
export interface DeepFetchResult {
|
|
37
|
+
query: string;
|
|
38
|
+
format: string;
|
|
39
|
+
sources: SourceResult[];
|
|
40
|
+
merged: string;
|
|
41
|
+
structured?: StructuredData;
|
|
42
|
+
comparison?: ComparisonData;
|
|
43
|
+
elapsed: number;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Extract up to `maxPoints` key points from content, ranked by signal value.
|
|
47
|
+
*/
|
|
48
|
+
export declare function extractKeyPoints(content: string, query: string, maxPoints?: number): string[];
|
|
49
|
+
/**
|
|
50
|
+
* Deduplicate a list of sentences.
|
|
51
|
+
* When two sentences are >60% similar, keep the longer (more detailed) one.
|
|
52
|
+
*/
|
|
53
|
+
export declare function deduplicateSentences(sentences: string[], threshold?: number): string[];
|
|
54
|
+
/** Extract numbers, prices, percentages from text. */
|
|
55
|
+
export declare function extractNumbers(text: string): Record<string, string>;
|
|
56
|
+
/** Extract dates from text. */
|
|
57
|
+
export declare function extractDates(text: string): string[];
|
|
58
|
+
/**
|
|
59
|
+
* Extract named entities (proper nouns) that appear in at least 2 sources.
|
|
60
|
+
* Simple heuristic: capitalized words/phrases not at sentence start.
|
|
61
|
+
*/
|
|
62
|
+
export declare function extractEntities(texts: string[]): string[];
|
|
63
|
+
/** Detect if the query is a comparison query. */
|
|
64
|
+
export declare function isComparisonQuery(query: string): boolean;
|
|
65
|
+
/**
|
|
66
|
+
* Extract the entities being compared from a query.
|
|
67
|
+
* Handles patterns like "A vs B", "compare A and B", "A or B".
|
|
68
|
+
*/
|
|
69
|
+
export declare function extractComparedEntities(query: string): string[];
|
|
70
|
+
/**
|
|
71
|
+
* Build a comparison table from merged content and entity names.
|
|
72
|
+
*/
|
|
73
|
+
export declare function buildComparisonTable(content: string, entities: string[]): ComparisonData | undefined;
|
|
74
|
+
export declare function deepFetch(options: DeepFetchOptions): Promise<DeepFetchResult>;
|
|
@@ -0,0 +1,405 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deep Fetch — Web Intelligence Module
|
|
3
|
+
*
|
|
4
|
+
* Transforms "search + fetch" into "search + fetch + synthesize + structure".
|
|
5
|
+
* No LLM required — pure heuristic signal extraction, BM25 relevance scoring,
|
|
6
|
+
* deduplication, entity/number extraction, and comparison detection.
|
|
7
|
+
*/
|
|
8
|
+
import { peelBatch } from '../index.js';
|
|
9
|
+
import { computeRelevanceScore } from './bm25-filter.js';
|
|
10
|
+
import { getBestSearchProvider } from './search-provider.js';
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
// Key-point extraction
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
const SIGNAL_WORDS = new Set([
|
|
15
|
+
'announced', 'launched', 'released', 'costs', 'requires', 'supports',
|
|
16
|
+
'offers', 'provides', 'includes', 'features', 'enables', 'allows',
|
|
17
|
+
'introduces', 'reveals', 'claims', 'states', 'reports', 'shows',
|
|
18
|
+
'found', 'discovered', 'improved', 'updated', 'deprecated', 'removed',
|
|
19
|
+
'increased', 'decreased', 'grew', 'declined', 'reached', 'exceeded',
|
|
20
|
+
]);
|
|
21
|
+
/** Split text into sentences (rough but fast — no NLP library needed). */
|
|
22
|
+
function splitSentences(text) {
|
|
23
|
+
// Strip markdown formatting
|
|
24
|
+
const plain = text
|
|
25
|
+
.replace(/```[\s\S]*?```/g, ' ')
|
|
26
|
+
.replace(/`[^`]+`/g, ' ')
|
|
27
|
+
.replace(/!\[.*?\]\(.*?\)/g, ' ')
|
|
28
|
+
.replace(/\[([^\]]*)\]\([^)]*\)/g, '$1')
|
|
29
|
+
.replace(/^#{1,6}\s+/gm, '')
|
|
30
|
+
.replace(/[*_~`>|\\]/g, ' ')
|
|
31
|
+
.replace(/\s+/g, ' ')
|
|
32
|
+
.trim();
|
|
33
|
+
// Split on sentence-ending punctuation followed by space + capital
|
|
34
|
+
return plain
|
|
35
|
+
.split(/(?<=[.!?])\s+(?=[A-Z"'])/)
|
|
36
|
+
.map(s => s.trim())
|
|
37
|
+
.filter(s => s.length > 20 && s.length < 500);
|
|
38
|
+
}
|
|
39
|
+
/** Extract first sentence of each paragraph (topic sentences). */
|
|
40
|
+
function topicSentences(content) {
|
|
41
|
+
const paragraphs = content.split(/\n{2,}/);
|
|
42
|
+
const result = [];
|
|
43
|
+
for (const para of paragraphs) {
|
|
44
|
+
const sentences = splitSentences(para);
|
|
45
|
+
if (sentences.length > 0) {
|
|
46
|
+
result.push(sentences[0]);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
return result;
|
|
50
|
+
}
|
|
51
|
+
/** Score a sentence for "key point" worthiness. Higher = more useful. */
|
|
52
|
+
function sentenceScore(sentence, queryTerms) {
|
|
53
|
+
const lower = sentence.toLowerCase();
|
|
54
|
+
const words = lower.split(/\s+/);
|
|
55
|
+
let score = 0;
|
|
56
|
+
// Query term overlap
|
|
57
|
+
let queryHits = 0;
|
|
58
|
+
for (const term of queryTerms) {
|
|
59
|
+
if (lower.includes(term))
|
|
60
|
+
queryHits++;
|
|
61
|
+
}
|
|
62
|
+
score += (queryHits / Math.max(queryTerms.size, 1)) * 3;
|
|
63
|
+
// Numbers / statistics
|
|
64
|
+
const numberMatches = sentence.match(/\b\d[\d,.]*\b|\$[\d,.]+|[\d.]+%/g);
|
|
65
|
+
if (numberMatches)
|
|
66
|
+
score += Math.min(numberMatches.length * 0.5, 2);
|
|
67
|
+
// Signal words
|
|
68
|
+
for (const word of words) {
|
|
69
|
+
if (SIGNAL_WORDS.has(word)) {
|
|
70
|
+
score += 1;
|
|
71
|
+
break;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// Prefer medium-length sentences
|
|
75
|
+
if (sentence.length > 60 && sentence.length < 300)
|
|
76
|
+
score += 0.5;
|
|
77
|
+
return score;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Extract up to `maxPoints` key points from content, ranked by signal value.
|
|
81
|
+
*/
|
|
82
|
+
export function extractKeyPoints(content, query, maxPoints = 5) {
|
|
83
|
+
const queryTerms = new Set(query.toLowerCase().split(/\s+/).filter(t => t.length > 2));
|
|
84
|
+
const allSentences = splitSentences(content);
|
|
85
|
+
const topics = topicSentences(content);
|
|
86
|
+
// Combine and deduplicate
|
|
87
|
+
const candidates = [...new Set([...allSentences, ...topics])];
|
|
88
|
+
// Score and sort
|
|
89
|
+
const scored = candidates.map(s => ({ s, score: sentenceScore(s, queryTerms) }));
|
|
90
|
+
scored.sort((a, b) => b.score - a.score);
|
|
91
|
+
return scored.slice(0, maxPoints).map(x => x.s);
|
|
92
|
+
}
|
|
93
|
+
// ---------------------------------------------------------------------------
|
|
94
|
+
// Deduplication
|
|
95
|
+
// ---------------------------------------------------------------------------
|
|
96
|
+
/** Normalize a sentence for comparison. */
|
|
97
|
+
function normalize(s) {
|
|
98
|
+
return s.toLowerCase().replace(/[^\w\s]/g, '').replace(/\s+/g, ' ').trim();
|
|
99
|
+
}
|
|
100
|
+
/** Jaccard similarity on word sets (fast approximation). */
|
|
101
|
+
function similarity(a, b) {
|
|
102
|
+
const wa = new Set(normalize(a).split(' ').filter(w => w.length > 2));
|
|
103
|
+
const wb = new Set(normalize(b).split(' ').filter(w => w.length > 2));
|
|
104
|
+
if (wa.size === 0 && wb.size === 0)
|
|
105
|
+
return 1;
|
|
106
|
+
let intersection = 0;
|
|
107
|
+
for (const w of wa) {
|
|
108
|
+
if (wb.has(w))
|
|
109
|
+
intersection++;
|
|
110
|
+
}
|
|
111
|
+
const union = wa.size + wb.size - intersection;
|
|
112
|
+
return union === 0 ? 0 : intersection / union;
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Deduplicate a list of sentences.
|
|
116
|
+
* When two sentences are >60% similar, keep the longer (more detailed) one.
|
|
117
|
+
*/
|
|
118
|
+
export function deduplicateSentences(sentences, threshold = 0.6) {
|
|
119
|
+
const kept = [];
|
|
120
|
+
for (const candidate of sentences) {
|
|
121
|
+
let dominated = false;
|
|
122
|
+
for (let i = 0; i < kept.length; i++) {
|
|
123
|
+
const sim = similarity(candidate, kept[i]);
|
|
124
|
+
if (sim >= threshold) {
|
|
125
|
+
// Keep the longer one
|
|
126
|
+
if (candidate.length > kept[i].length) {
|
|
127
|
+
kept[i] = candidate;
|
|
128
|
+
}
|
|
129
|
+
dominated = true;
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if (!dominated) {
|
|
134
|
+
kept.push(candidate);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return kept;
|
|
138
|
+
}
|
|
139
|
+
// ---------------------------------------------------------------------------
|
|
140
|
+
// Number / entity extraction
|
|
141
|
+
// ---------------------------------------------------------------------------
|
|
142
|
+
const PRICE_RE = /(?:\$|€|£|¥)\s?[\d,]+(?:\.\d+)?(?:\s?(?:\/mo(?:nth)?|\/yr|\/year|\/user|\/month))?/gi;
|
|
143
|
+
const PERCENT_RE = /\d+(?:\.\d+)?\s?%/g;
|
|
144
|
+
const COUNT_RE = /\d+(?:\.\d+)?\s?(?:million|billion|thousand|M|B|K)\s?\+?(?:\s?(?:users?|customers?|downloads?|installs?))?/gi;
|
|
145
|
+
const DATE_RE = /(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}|\d{4}-\d{2}-\d{2}|Q[1-4]\s+\d{4}/g;
|
|
146
|
+
/** Extract numbers, prices, percentages from text. */
|
|
147
|
+
export function extractNumbers(text) {
|
|
148
|
+
const result = {};
|
|
149
|
+
const prices = text.match(PRICE_RE);
|
|
150
|
+
if (prices) {
|
|
151
|
+
prices.slice(0, 5).forEach((p, i) => {
|
|
152
|
+
result[`price_${i + 1}`] = p.trim();
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
const percents = text.match(PERCENT_RE);
|
|
156
|
+
if (percents) {
|
|
157
|
+
percents.slice(0, 5).forEach((p, i) => {
|
|
158
|
+
result[`percent_${i + 1}`] = p.trim();
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
const counts = text.match(COUNT_RE);
|
|
162
|
+
if (counts) {
|
|
163
|
+
counts.slice(0, 5).forEach((c, i) => {
|
|
164
|
+
result[`count_${i + 1}`] = c.trim();
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
return result;
|
|
168
|
+
}
|
|
169
|
+
/** Extract dates from text. */
|
|
170
|
+
export function extractDates(text) {
|
|
171
|
+
const matches = text.match(DATE_RE) ?? [];
|
|
172
|
+
return [...new Set(matches)].slice(0, 10);
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Extract named entities (proper nouns) that appear in at least 2 sources.
|
|
176
|
+
* Simple heuristic: capitalized words/phrases not at sentence start.
|
|
177
|
+
*/
|
|
178
|
+
export function extractEntities(texts) {
|
|
179
|
+
// Collect capitalized words/phrases from each source.
|
|
180
|
+
// Matches: standard proper nouns (New York), CamelCase brands (PayPal, GitHub),
|
|
181
|
+
// and ALL-CAPS acronyms (AI, API) with length >= 2.
|
|
182
|
+
const ENTITY_RE = /\b([A-Z][A-Za-z0-9]*(?:[A-Z][a-z0-9]+)*(?:\s+[A-Z][A-Za-z0-9]*(?:[A-Z][a-z0-9]+)*)*)\b/g;
|
|
183
|
+
const termFreq = new Map();
|
|
184
|
+
for (const text of texts) {
|
|
185
|
+
const seen = new Set();
|
|
186
|
+
let m;
|
|
187
|
+
ENTITY_RE.lastIndex = 0;
|
|
188
|
+
while ((m = ENTITY_RE.exec(text)) !== null) {
|
|
189
|
+
const term = m[1];
|
|
190
|
+
if (term.length < 3)
|
|
191
|
+
continue;
|
|
192
|
+
// Skip common sentence starters / stopwords
|
|
193
|
+
if (/^(The|A|An|In|On|At|To|For|Of|And|Or|But|This|That|These|Those|It|He|She|They|We|You|I)$/.test(term))
|
|
194
|
+
continue;
|
|
195
|
+
if (!seen.has(term)) {
|
|
196
|
+
seen.add(term);
|
|
197
|
+
termFreq.set(term, (termFreq.get(term) ?? 0) + 1);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
// Only return entities that appear in 2+ sources
|
|
202
|
+
return [...termFreq.entries()]
|
|
203
|
+
.filter(([, freq]) => freq >= 2)
|
|
204
|
+
.sort((a, b) => b[1] - a[1])
|
|
205
|
+
.slice(0, 20)
|
|
206
|
+
.map(([term]) => term);
|
|
207
|
+
}
|
|
208
|
+
// ---------------------------------------------------------------------------
|
|
209
|
+
// Comparison mode
|
|
210
|
+
// ---------------------------------------------------------------------------
|
|
211
|
+
const COMPARISON_TRIGGERS = ['vs', 'versus', 'compare', 'comparison', 'difference', 'differences', 'alternative', 'alternatives'];
|
|
212
|
+
/** Detect if the query is a comparison query. */
|
|
213
|
+
export function isComparisonQuery(query) {
|
|
214
|
+
const lower = query.toLowerCase();
|
|
215
|
+
return COMPARISON_TRIGGERS.some(t => lower.includes(t));
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Extract the entities being compared from a query.
|
|
219
|
+
* Handles patterns like "A vs B", "compare A and B", "A or B".
|
|
220
|
+
*/
|
|
221
|
+
export function extractComparedEntities(query) {
|
|
222
|
+
const vsMatch = query.match(/(.+?)\s+(?:vs\.?|versus)\s+(.+)/i);
|
|
223
|
+
if (vsMatch) {
|
|
224
|
+
return [vsMatch[1].trim(), vsMatch[2].trim()].map(e => e.replace(/^(?:compare|difference|between)\s+/i, '').trim());
|
|
225
|
+
}
|
|
226
|
+
const compareMatch = query.match(/compare\s+(.+?)\s+(?:and|to|with|vs)\s+(.+)/i);
|
|
227
|
+
if (compareMatch) {
|
|
228
|
+
return [compareMatch[1].trim(), compareMatch[2].trim()];
|
|
229
|
+
}
|
|
230
|
+
const diffMatch = query.match(/difference(?:s)?\s+between\s+(.+?)\s+and\s+(.+)/i);
|
|
231
|
+
if (diffMatch) {
|
|
232
|
+
return [diffMatch[1].trim(), diffMatch[2].trim()];
|
|
233
|
+
}
|
|
234
|
+
return [];
|
|
235
|
+
}
|
|
236
|
+
const COMPARISON_ATTRIBUTES = [
|
|
237
|
+
{ name: 'price', patterns: [/(?:price|cost|pricing|fee|subscription)[:\s]+([^.\n]+)/i, /(\$[\d,.]+(?:\/\w+)?)/] },
|
|
238
|
+
{ name: 'features', patterns: [/(?:features?|capabilities|supports?|includes?)[:\s]+([^.\n]+)/i] },
|
|
239
|
+
{ name: 'pros', patterns: [/(?:pros?|advantages?|benefits?|strengths?)[:\s]+([^.\n]+)/i] },
|
|
240
|
+
{ name: 'cons', patterns: [/(?:cons?|disadvantages?|drawbacks?|weaknesses?|limitations?)[:\s]+([^.\n]+)/i] },
|
|
241
|
+
{ name: 'platform', patterns: [/(?:platform|works?\s+(?:on|with)|available\s+(?:on|for))[:\s]+([^.\n]+)/i] },
|
|
242
|
+
{ name: 'rating', patterns: [/(?:rating|score|stars?)[:\s]+([^.\n]+)/i, /(\d+(?:\.\d+)?\s*\/\s*\d+\s*(?:stars?))/i] },
|
|
243
|
+
];
|
|
244
|
+
/**
|
|
245
|
+
* Build a comparison table from merged content and entity names.
|
|
246
|
+
*/
|
|
247
|
+
export function buildComparisonTable(content, entities) {
|
|
248
|
+
if (entities.length < 2)
|
|
249
|
+
return undefined;
|
|
250
|
+
const columns = COMPARISON_ATTRIBUTES.map(a => a.name);
|
|
251
|
+
const rows = {};
|
|
252
|
+
for (const entity of entities) {
|
|
253
|
+
rows[entity] = {};
|
|
254
|
+
// Find paragraphs mentioning this entity
|
|
255
|
+
const lines = content.split(/\n+/);
|
|
256
|
+
const relevant = lines.filter(l => l.toLowerCase().includes(entity.toLowerCase()));
|
|
257
|
+
const entityText = relevant.join(' ');
|
|
258
|
+
for (const attr of COMPARISON_ATTRIBUTES) {
|
|
259
|
+
for (const pattern of attr.patterns) {
|
|
260
|
+
const m = entityText.match(pattern);
|
|
261
|
+
if (m && m[1]) {
|
|
262
|
+
rows[entity][attr.name] = m[1].trim().slice(0, 120);
|
|
263
|
+
break;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
if (!rows[entity][attr.name]) {
|
|
267
|
+
rows[entity][attr.name] = 'N/A';
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
return { columns, rows };
|
|
272
|
+
}
|
|
273
|
+
// ---------------------------------------------------------------------------
|
|
274
|
+
// Smart content merging
|
|
275
|
+
// ---------------------------------------------------------------------------
|
|
276
|
+
/**
|
|
277
|
+
* Merge content from multiple sources intelligently:
|
|
278
|
+
* - Sort by relevance (most relevant first)
|
|
279
|
+
* - Add source attribution
|
|
280
|
+
* - Truncate to maxChars
|
|
281
|
+
*/
|
|
282
|
+
function mergeContent(pages, maxChars) {
|
|
283
|
+
// Sort most-relevant first
|
|
284
|
+
const sorted = [...pages].sort((a, b) => b.relevanceScore - a.relevanceScore);
|
|
285
|
+
const parts = [];
|
|
286
|
+
let totalChars = 0;
|
|
287
|
+
for (const page of sorted) {
|
|
288
|
+
if (!page.content)
|
|
289
|
+
continue;
|
|
290
|
+
const header = `## [${page.title}](${page.url})\n\n`;
|
|
291
|
+
const body = page.content;
|
|
292
|
+
const section = `${header}${body}\n\n---\n\n`;
|
|
293
|
+
if (totalChars + section.length > maxChars) {
|
|
294
|
+
// Add truncated version
|
|
295
|
+
const remaining = maxChars - totalChars - header.length - 20;
|
|
296
|
+
if (remaining > 200) {
|
|
297
|
+
parts.push(`${header}${body.slice(0, remaining)}...\n\n---\n\n`);
|
|
298
|
+
}
|
|
299
|
+
break;
|
|
300
|
+
}
|
|
301
|
+
parts.push(section);
|
|
302
|
+
totalChars += section.length;
|
|
303
|
+
}
|
|
304
|
+
return parts.join('');
|
|
305
|
+
}
|
|
306
|
+
// ---------------------------------------------------------------------------
|
|
307
|
+
// Main deepFetch function
|
|
308
|
+
// ---------------------------------------------------------------------------
|
|
309
|
+
function timeout(ms, label) {
|
|
310
|
+
return new Promise((_, reject) => setTimeout(() => reject(new Error(`${label} timed out after ${ms}ms`)), ms));
|
|
311
|
+
}
|
|
312
|
+
export async function deepFetch(options) {
|
|
313
|
+
const { query, count = 5, format = 'merged', maxChars = 32000, relevanceThreshold = 0.05, } = options;
|
|
314
|
+
const startTime = Date.now();
|
|
315
|
+
const safeCount = Math.min(Math.max(count, 1), 10);
|
|
316
|
+
// ── Step 1: Search ────────────────────────────────────────────────────────
|
|
317
|
+
const { provider, apiKey } = getBestSearchProvider();
|
|
318
|
+
const searchResults = await Promise.race([
|
|
319
|
+
provider.searchWeb(query, { count: safeCount + 2, apiKey }), // fetch extras, filter low-relevance
|
|
320
|
+
timeout(30000, 'Search'),
|
|
321
|
+
]);
|
|
322
|
+
const topResults = (Array.isArray(searchResults) ? searchResults : searchResults?.results ?? [])
|
|
323
|
+
.slice(0, safeCount + 2);
|
|
324
|
+
if (topResults.length === 0) {
|
|
325
|
+
return {
|
|
326
|
+
query,
|
|
327
|
+
format,
|
|
328
|
+
sources: [],
|
|
329
|
+
merged: '',
|
|
330
|
+
elapsed: Date.now() - startTime,
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
// ── Step 2: Fetch all URLs in parallel ────────────────────────────────────
|
|
334
|
+
const urls = topResults.map((r) => r.url).filter(Boolean);
|
|
335
|
+
const pages = await Promise.race([
|
|
336
|
+
peelBatch(urls, { concurrency: 5, format: 'markdown' }),
|
|
337
|
+
timeout(120000, 'Batch fetch'),
|
|
338
|
+
]);
|
|
339
|
+
const scoredPages = [];
|
|
340
|
+
for (let i = 0; i < pages.length; i++) {
|
|
341
|
+
const page = pages[i];
|
|
342
|
+
const searchResult = topResults[i];
|
|
343
|
+
const url = urls[i];
|
|
344
|
+
if (!page || page.error)
|
|
345
|
+
continue;
|
|
346
|
+
const content = page.content || '';
|
|
347
|
+
const title = page.title || searchResult?.title || url;
|
|
348
|
+
const relevanceScore = computeRelevanceScore(content, query);
|
|
349
|
+
if (relevanceScore < relevanceThreshold && scoredPages.length >= 2) {
|
|
350
|
+
// Skip low-relevance if we already have enough
|
|
351
|
+
continue;
|
|
352
|
+
}
|
|
353
|
+
const keyPoints = extractKeyPoints(content, query, 5);
|
|
354
|
+
scoredPages.push({
|
|
355
|
+
url,
|
|
356
|
+
title,
|
|
357
|
+
content,
|
|
358
|
+
relevanceScore,
|
|
359
|
+
keyPoints,
|
|
360
|
+
fetchedAt: new Date().toISOString(),
|
|
361
|
+
});
|
|
362
|
+
if (scoredPages.length >= safeCount)
|
|
363
|
+
break;
|
|
364
|
+
}
|
|
365
|
+
// ── Step 4: Merge content ─────────────────────────────────────────────────
|
|
366
|
+
const mergedContent = mergeContent(scoredPages, maxChars);
|
|
367
|
+
// ── Step 5: Build sources list ────────────────────────────────────────────
|
|
368
|
+
const sources = scoredPages.map(p => ({
|
|
369
|
+
url: p.url,
|
|
370
|
+
title: p.title,
|
|
371
|
+
relevanceScore: Math.round(p.relevanceScore * 1000) / 1000,
|
|
372
|
+
keyPoints: p.keyPoints,
|
|
373
|
+
fetchedAt: p.fetchedAt,
|
|
374
|
+
}));
|
|
375
|
+
const result = {
|
|
376
|
+
query,
|
|
377
|
+
format,
|
|
378
|
+
sources,
|
|
379
|
+
merged: mergedContent,
|
|
380
|
+
elapsed: Date.now() - startTime,
|
|
381
|
+
};
|
|
382
|
+
// ── Step 6: Structured extraction (optional) ──────────────────────────────
|
|
383
|
+
if (format === 'structured' || format === 'comparison') {
|
|
384
|
+
const allTexts = scoredPages.map(p => p.content);
|
|
385
|
+
const allFacts = scoredPages.flatMap(p => p.keyPoints);
|
|
386
|
+
const deduplicatedFacts = deduplicateSentences(allFacts);
|
|
387
|
+
const entities = extractEntities(allTexts);
|
|
388
|
+
const dates = extractDates(mergedContent);
|
|
389
|
+
const numbers = extractNumbers(mergedContent);
|
|
390
|
+
result.structured = {
|
|
391
|
+
facts: deduplicatedFacts,
|
|
392
|
+
entities,
|
|
393
|
+
dates,
|
|
394
|
+
numbers,
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
// ── Step 7: Comparison table (optional) ───────────────────────────────────
|
|
398
|
+
if (format === 'comparison' || isComparisonQuery(query)) {
|
|
399
|
+
const comparedEntities = extractComparedEntities(query);
|
|
400
|
+
if (comparedEntities.length >= 2) {
|
|
401
|
+
result.comparison = buildComparisonTable(mergedContent, comparedEntities);
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
return result;
|
|
405
|
+
}
|