@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,558 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Autonomous web research agent
|
|
3
|
+
* Searches the web, fetches pages, and extracts structured data based on natural language prompts
|
|
4
|
+
*
|
|
5
|
+
* Supports:
|
|
6
|
+
* - depth: "basic" (1 search, top 3) vs "thorough" (multi-step, up to 3 searches, top 10)
|
|
7
|
+
* - maxSources: control how many sources to include (default 5, max 20)
|
|
8
|
+
* - topic: "general" | "news" | "technical" | "academic" — adjusts queries & prioritization
|
|
9
|
+
* - outputSchema: JSON Schema for structured output with validation
|
|
10
|
+
* - streaming callbacks for SSE support
|
|
11
|
+
*/
|
|
12
|
+
import { load } from 'cheerio';
|
|
13
|
+
import { peel } from '../index.js';
|
|
14
|
+
import { createLogger } from './logger.js';
|
|
15
|
+
const log = createLogger('agent');
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
// Helpers
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
/**
|
|
20
|
+
* Search DuckDuckGo HTML and parse results
|
|
21
|
+
*/
|
|
22
|
+
async function searchWeb(query, limit = 10) {
|
|
23
|
+
const { fetch: undiciFetch } = await import('undici');
|
|
24
|
+
const encodedQuery = encodeURIComponent(query);
|
|
25
|
+
const url = `https://html.duckduckgo.com/html/?q=${encodedQuery}`;
|
|
26
|
+
try {
|
|
27
|
+
const response = await undiciFetch(url, {
|
|
28
|
+
headers: {
|
|
29
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
|
|
30
|
+
},
|
|
31
|
+
});
|
|
32
|
+
const html = await response.text();
|
|
33
|
+
const $ = load(html);
|
|
34
|
+
const results = [];
|
|
35
|
+
$('.result').each((_, el) => {
|
|
36
|
+
const link = $(el).find('.result__a');
|
|
37
|
+
const snippet = $(el).find('.result__snippet');
|
|
38
|
+
const rawUrl = link.attr('href');
|
|
39
|
+
const title = link.text().trim();
|
|
40
|
+
const desc = snippet.text().trim();
|
|
41
|
+
if (rawUrl && title) {
|
|
42
|
+
try {
|
|
43
|
+
const actualUrl = rawUrl.startsWith('//')
|
|
44
|
+
? `https:${rawUrl}`
|
|
45
|
+
: rawUrl.includes('uddg=')
|
|
46
|
+
? decodeURIComponent(rawUrl.split('uddg=')[1].split('&')[0])
|
|
47
|
+
: rawUrl;
|
|
48
|
+
results.push({ url: actualUrl, title, snippet: desc });
|
|
49
|
+
}
|
|
50
|
+
catch (e) {
|
|
51
|
+
log.debug('url decode failed:', e instanceof Error ? e.message : e);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
return results.slice(0, limit);
|
|
56
|
+
}
|
|
57
|
+
catch (error) {
|
|
58
|
+
log.error('Search failed:', error);
|
|
59
|
+
return [];
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Prioritise search results by topic relevance (higher = better)
|
|
64
|
+
*/
|
|
65
|
+
function scoreByTopic(result, topic) {
|
|
66
|
+
const url = result.url.toLowerCase();
|
|
67
|
+
const domain = (() => { try {
|
|
68
|
+
return new URL(url).hostname;
|
|
69
|
+
}
|
|
70
|
+
catch {
|
|
71
|
+
return '';
|
|
72
|
+
} })();
|
|
73
|
+
switch (topic) {
|
|
74
|
+
case 'academic':
|
|
75
|
+
if (/\.edu$|arxiv\.org|scholar\.google|pubmed|ieee\.org|acm\.org|researchgate\.net/.test(domain))
|
|
76
|
+
return 10;
|
|
77
|
+
if (/\.gov$/.test(domain))
|
|
78
|
+
return 5;
|
|
79
|
+
return 0;
|
|
80
|
+
case 'technical':
|
|
81
|
+
if (/github\.com|stackoverflow\.com|docs\.|developer\.|devdocs\.io|mdn\./.test(domain))
|
|
82
|
+
return 10;
|
|
83
|
+
if (/\.dev$|\.io$/.test(domain))
|
|
84
|
+
return 3;
|
|
85
|
+
return 0;
|
|
86
|
+
case 'news':
|
|
87
|
+
if (/reuters\.com|apnews\.com|bbc\.com|cnn\.com|nytimes\.com|theguardian\.com|bloomberg\.com|techcrunch\.com|theverge\.com|arstechnica\.com/.test(domain))
|
|
88
|
+
return 10;
|
|
89
|
+
if (/news|press|blog/.test(domain))
|
|
90
|
+
return 3;
|
|
91
|
+
return 0;
|
|
92
|
+
default:
|
|
93
|
+
return 0;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Add topic-specific modifiers to search queries
|
|
98
|
+
*/
|
|
99
|
+
function enhanceQueryForTopic(query, topic) {
|
|
100
|
+
switch (topic) {
|
|
101
|
+
case 'news':
|
|
102
|
+
return `${query} latest news 2026`;
|
|
103
|
+
case 'academic':
|
|
104
|
+
return `${query} research paper study`;
|
|
105
|
+
case 'technical':
|
|
106
|
+
return `${query} documentation tutorial`;
|
|
107
|
+
default:
|
|
108
|
+
return query;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Call OpenAI-compatible LLM API (non-streaming)
|
|
113
|
+
*/
|
|
114
|
+
async function callLLM(messages, options) {
|
|
115
|
+
const { apiKey, model = 'gpt-4o-mini', baseUrl = 'https://api.openai.com/v1', jsonMode } = options;
|
|
116
|
+
const { fetch: undiciFetch } = await import('undici');
|
|
117
|
+
const body = {
|
|
118
|
+
model,
|
|
119
|
+
messages,
|
|
120
|
+
temperature: 0,
|
|
121
|
+
};
|
|
122
|
+
if (jsonMode) {
|
|
123
|
+
body.response_format = { type: 'json_object' };
|
|
124
|
+
}
|
|
125
|
+
const response = await undiciFetch(`${baseUrl}/chat/completions`, {
|
|
126
|
+
method: 'POST',
|
|
127
|
+
headers: {
|
|
128
|
+
'Content-Type': 'application/json',
|
|
129
|
+
Authorization: `Bearer ${apiKey}`,
|
|
130
|
+
},
|
|
131
|
+
body: JSON.stringify(body),
|
|
132
|
+
});
|
|
133
|
+
if (!response.ok) {
|
|
134
|
+
const errorText = await response.text();
|
|
135
|
+
throw new Error(`LLM API error ${response.status}: ${errorText}`);
|
|
136
|
+
}
|
|
137
|
+
const result = (await response.json());
|
|
138
|
+
const content = result.choices?.[0]?.message?.content;
|
|
139
|
+
if (!content) {
|
|
140
|
+
throw new Error('LLM returned empty response');
|
|
141
|
+
}
|
|
142
|
+
const usage = result.usage
|
|
143
|
+
? { input: result.usage.prompt_tokens ?? 0, output: result.usage.completion_tokens ?? 0 }
|
|
144
|
+
: { input: 0, output: 0 };
|
|
145
|
+
return { content, usage };
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Call OpenAI-compatible LLM API with streaming.
|
|
149
|
+
* Invokes `onChunk` for each text delta, returns full content when done.
|
|
150
|
+
*/
|
|
151
|
+
async function callLLMStreaming(messages, options, onChunk) {
|
|
152
|
+
if (!onChunk)
|
|
153
|
+
return callLLM(messages, options);
|
|
154
|
+
const { apiKey, model = 'gpt-4o-mini', baseUrl = 'https://api.openai.com/v1', jsonMode } = options;
|
|
155
|
+
const { fetch: undiciFetch } = await import('undici');
|
|
156
|
+
const body = {
|
|
157
|
+
model,
|
|
158
|
+
messages,
|
|
159
|
+
temperature: 0,
|
|
160
|
+
stream: true,
|
|
161
|
+
stream_options: { include_usage: true },
|
|
162
|
+
};
|
|
163
|
+
if (jsonMode) {
|
|
164
|
+
body.response_format = { type: 'json_object' };
|
|
165
|
+
}
|
|
166
|
+
const response = await undiciFetch(`${baseUrl}/chat/completions`, {
|
|
167
|
+
method: 'POST',
|
|
168
|
+
headers: {
|
|
169
|
+
'Content-Type': 'application/json',
|
|
170
|
+
Authorization: `Bearer ${apiKey}`,
|
|
171
|
+
},
|
|
172
|
+
body: JSON.stringify(body),
|
|
173
|
+
});
|
|
174
|
+
if (!response.ok) {
|
|
175
|
+
const errorText = await response.text();
|
|
176
|
+
throw new Error(`LLM API error ${response.status}: ${errorText}`);
|
|
177
|
+
}
|
|
178
|
+
let fullContent = '';
|
|
179
|
+
let usage = { input: 0, output: 0 };
|
|
180
|
+
// Read the SSE stream
|
|
181
|
+
const reader = response.body?.getReader?.();
|
|
182
|
+
if (!reader) {
|
|
183
|
+
// Fallback: consume entire body
|
|
184
|
+
const text = await response.text();
|
|
185
|
+
return { content: text, usage };
|
|
186
|
+
}
|
|
187
|
+
const decoder = new TextDecoder();
|
|
188
|
+
let buffer = '';
|
|
189
|
+
while (true) {
|
|
190
|
+
const { done, value } = await reader.read();
|
|
191
|
+
if (done)
|
|
192
|
+
break;
|
|
193
|
+
buffer += decoder.decode(value, { stream: true });
|
|
194
|
+
const lines = buffer.split('\n');
|
|
195
|
+
buffer = lines.pop() || '';
|
|
196
|
+
for (const line of lines) {
|
|
197
|
+
const trimmed = line.trim();
|
|
198
|
+
if (!trimmed || !trimmed.startsWith('data: '))
|
|
199
|
+
continue;
|
|
200
|
+
const data = trimmed.slice(6);
|
|
201
|
+
if (data === '[DONE]')
|
|
202
|
+
continue;
|
|
203
|
+
try {
|
|
204
|
+
const parsed = JSON.parse(data);
|
|
205
|
+
const delta = parsed.choices?.[0]?.delta?.content;
|
|
206
|
+
if (delta) {
|
|
207
|
+
fullContent += delta;
|
|
208
|
+
onChunk(delta);
|
|
209
|
+
}
|
|
210
|
+
// Final chunk may include usage
|
|
211
|
+
if (parsed.usage) {
|
|
212
|
+
usage = {
|
|
213
|
+
input: parsed.usage.prompt_tokens ?? 0,
|
|
214
|
+
output: parsed.usage.completion_tokens ?? 0,
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
catch (e) {
|
|
219
|
+
log.debug('stream chunk parse failed:', e instanceof Error ? e.message : e);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
return { content: fullContent, usage };
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Validate JSON data against a JSON Schema (best-effort, no extra deps)
|
|
227
|
+
*/
|
|
228
|
+
function validateJsonSchema(data, schema) {
|
|
229
|
+
// Lightweight validation: check required fields and top-level types
|
|
230
|
+
if (schema.type === 'object' && schema.properties) {
|
|
231
|
+
if (typeof data !== 'object' || data === null || Array.isArray(data)) {
|
|
232
|
+
return { valid: false, errors: 'Expected an object' };
|
|
233
|
+
}
|
|
234
|
+
if (schema.required && Array.isArray(schema.required)) {
|
|
235
|
+
const missing = schema.required.filter((k) => !(k in data));
|
|
236
|
+
if (missing.length > 0) {
|
|
237
|
+
return { valid: false, errors: `Missing required fields: ${missing.join(', ')}` };
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
else if (schema.type === 'array') {
|
|
242
|
+
if (!Array.isArray(data)) {
|
|
243
|
+
return { valid: false, errors: 'Expected an array' };
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
return { valid: true };
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Truncate content to approximately N tokens (rough estimate: 1 token ≈ 4 chars)
|
|
250
|
+
*/
|
|
251
|
+
function truncateContent(content, maxTokens = 3000) {
|
|
252
|
+
const maxChars = maxTokens * 4;
|
|
253
|
+
if (content.length <= maxChars)
|
|
254
|
+
return content;
|
|
255
|
+
return content.slice(0, maxChars) + '\n\n[Content truncated...]';
|
|
256
|
+
}
|
|
257
|
+
// ---------------------------------------------------------------------------
|
|
258
|
+
// Main agent
|
|
259
|
+
// ---------------------------------------------------------------------------
|
|
260
|
+
/**
|
|
261
|
+
* Run autonomous web research agent
|
|
262
|
+
*/
|
|
263
|
+
export async function runAgent(options) {
|
|
264
|
+
const { prompt, urls: startUrls = [], schema: legacySchema, outputSchema, llmApiKey, llmApiBase = 'https://api.openai.com/v1', llmModel = 'gpt-4o-mini', maxPages, maxSources: rawMaxSources, depth = 'basic', topic = 'general', maxCredits, onProgress, onEvent, } = options;
|
|
265
|
+
if (!llmApiKey)
|
|
266
|
+
throw new Error('llmApiKey is required');
|
|
267
|
+
if (!prompt)
|
|
268
|
+
throw new Error('prompt is required');
|
|
269
|
+
// Effective schema = outputSchema || legacy schema
|
|
270
|
+
const effectiveSchema = outputSchema || legacySchema;
|
|
271
|
+
// Determine effective maxSources:
|
|
272
|
+
// new param > legacy maxPages > depth-based default
|
|
273
|
+
const depthDefaults = depth === 'thorough'
|
|
274
|
+
? { maxSources: 10, maxQueries: 3, resultsPerQuery: 10 }
|
|
275
|
+
: { maxSources: 3, maxQueries: 1, resultsPerQuery: 5 };
|
|
276
|
+
const maxSourcesLimit = Math.min(rawMaxSources ?? maxPages ?? depthDefaults.maxSources, 20);
|
|
277
|
+
const maxQueries = depth === 'thorough' ? depthDefaults.maxQueries : depthDefaults.maxQueries;
|
|
278
|
+
const visitedUrls = new Set();
|
|
279
|
+
const sources = [];
|
|
280
|
+
const sourcesDetailed = [];
|
|
281
|
+
let pagesVisited = 0;
|
|
282
|
+
let creditsUsed = 0;
|
|
283
|
+
let totalUsage = { input: 0, output: 0 };
|
|
284
|
+
const collectedData = [];
|
|
285
|
+
// Emit both legacy progress and new event
|
|
286
|
+
const reportProgress = (status, message, currentUrl) => {
|
|
287
|
+
if (onProgress) {
|
|
288
|
+
onProgress({ status, currentUrl, pagesVisited, message });
|
|
289
|
+
}
|
|
290
|
+
};
|
|
291
|
+
const emit = (event) => {
|
|
292
|
+
if (onEvent)
|
|
293
|
+
onEvent(event);
|
|
294
|
+
};
|
|
295
|
+
const accUsage = (u) => {
|
|
296
|
+
totalUsage.input += u.input;
|
|
297
|
+
totalUsage.output += u.output;
|
|
298
|
+
};
|
|
299
|
+
try {
|
|
300
|
+
// -----------------------------------------------------------------------
|
|
301
|
+
// Step 1: Determine search strategy & collect URLs
|
|
302
|
+
// -----------------------------------------------------------------------
|
|
303
|
+
let urlsToVisit = [...startUrls];
|
|
304
|
+
if (urlsToVisit.length === 0) {
|
|
305
|
+
reportProgress('searching', 'Planning research strategy...');
|
|
306
|
+
const queryCount = depth === 'thorough' ? '3-5' : '2-3';
|
|
307
|
+
const topicHint = topic !== 'general'
|
|
308
|
+
? `\nFocus queries on ${topic} sources.`
|
|
309
|
+
: '';
|
|
310
|
+
const planningMessages = [
|
|
311
|
+
{
|
|
312
|
+
role: 'system',
|
|
313
|
+
content: `You are a web research assistant. Generate ${queryCount} specific search queries to find information for the user's request.${topicHint}\nReturn JSON only: {"queries": ["query1", "query2", ...]}`,
|
|
314
|
+
},
|
|
315
|
+
{ role: 'user', content: `Research request: ${prompt}` },
|
|
316
|
+
];
|
|
317
|
+
const planResponse = await callLLM(planningMessages, {
|
|
318
|
+
apiKey: llmApiKey,
|
|
319
|
+
model: llmModel,
|
|
320
|
+
baseUrl: llmApiBase,
|
|
321
|
+
jsonMode: true,
|
|
322
|
+
});
|
|
323
|
+
creditsUsed++;
|
|
324
|
+
accUsage(planResponse.usage);
|
|
325
|
+
let queries = [];
|
|
326
|
+
try {
|
|
327
|
+
const parsed = JSON.parse(planResponse.content);
|
|
328
|
+
queries = parsed.queries || [];
|
|
329
|
+
}
|
|
330
|
+
catch {
|
|
331
|
+
queries = [prompt];
|
|
332
|
+
}
|
|
333
|
+
// Limit queries to maxQueries
|
|
334
|
+
const effectiveQueries = queries.slice(0, maxQueries);
|
|
335
|
+
for (const rawQuery of effectiveQueries) {
|
|
336
|
+
const query = topic !== 'general' ? enhanceQueryForTopic(rawQuery, topic) : rawQuery;
|
|
337
|
+
reportProgress('searching', `Searching: ${query}`);
|
|
338
|
+
emit({ type: 'step', action: 'searching', query });
|
|
339
|
+
const results = await searchWeb(query, depthDefaults.resultsPerQuery);
|
|
340
|
+
// Sort by topic relevance
|
|
341
|
+
if (topic !== 'general') {
|
|
342
|
+
results.sort((a, b) => scoreByTopic(b, topic) - scoreByTopic(a, topic));
|
|
343
|
+
}
|
|
344
|
+
urlsToVisit.push(...results.map(r => r.url));
|
|
345
|
+
if (urlsToVisit.length >= maxSourcesLimit * 2)
|
|
346
|
+
break; // fetch a bit more than needed to account for failures
|
|
347
|
+
}
|
|
348
|
+
// Deduplicate by hostname+pathname
|
|
349
|
+
const seen = new Set();
|
|
350
|
+
urlsToVisit = urlsToVisit.filter(u => {
|
|
351
|
+
try {
|
|
352
|
+
const key = new URL(u).hostname + new URL(u).pathname;
|
|
353
|
+
if (seen.has(key))
|
|
354
|
+
return false;
|
|
355
|
+
seen.add(key);
|
|
356
|
+
return true;
|
|
357
|
+
}
|
|
358
|
+
catch {
|
|
359
|
+
return false;
|
|
360
|
+
}
|
|
361
|
+
});
|
|
362
|
+
}
|
|
363
|
+
// -----------------------------------------------------------------------
|
|
364
|
+
// Step 2: Visit pages and collect data
|
|
365
|
+
// -----------------------------------------------------------------------
|
|
366
|
+
const maxToFetch = Math.min(urlsToVisit.length, maxSourcesLimit);
|
|
367
|
+
for (const url of urlsToVisit.slice(0, maxToFetch + 5)) {
|
|
368
|
+
// Enough data collected?
|
|
369
|
+
if (collectedData.length >= maxSourcesLimit)
|
|
370
|
+
break;
|
|
371
|
+
if (maxCredits && creditsUsed >= maxCredits) {
|
|
372
|
+
reportProgress('done', 'Credit limit reached');
|
|
373
|
+
break;
|
|
374
|
+
}
|
|
375
|
+
if (visitedUrls.has(url))
|
|
376
|
+
continue;
|
|
377
|
+
visitedUrls.add(url);
|
|
378
|
+
reportProgress('visiting', `Fetching: ${url}`, url);
|
|
379
|
+
emit({ type: 'step', action: 'fetching', url });
|
|
380
|
+
try {
|
|
381
|
+
const result = await peel(url, { format: 'markdown', timeout: 15000 });
|
|
382
|
+
pagesVisited++;
|
|
383
|
+
creditsUsed++;
|
|
384
|
+
const truncated = truncateContent(result.content, depth === 'thorough' ? 4000 : 3000);
|
|
385
|
+
collectedData.push({ url: result.url, title: result.title, content: truncated });
|
|
386
|
+
sources.push(result.url);
|
|
387
|
+
sourcesDetailed.push({ url: result.url, title: result.title });
|
|
388
|
+
reportProgress('visiting', `Fetched: ${result.title}`, url);
|
|
389
|
+
}
|
|
390
|
+
catch (error) {
|
|
391
|
+
log.error(`Failed to fetch ${url}:`, error.message);
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
// -----------------------------------------------------------------------
|
|
395
|
+
// Step 2b (thorough only): Cross-reference — ask LLM if more info needed
|
|
396
|
+
// -----------------------------------------------------------------------
|
|
397
|
+
if (depth === 'thorough' && collectedData.length > 0 && collectedData.length < maxSourcesLimit) {
|
|
398
|
+
reportProgress('searching', 'Cross-referencing — checking for gaps...');
|
|
399
|
+
emit({ type: 'step', action: 'analyzing', summary: 'Cross-referencing collected data for gaps...' });
|
|
400
|
+
const gapMessages = [
|
|
401
|
+
{
|
|
402
|
+
role: 'system',
|
|
403
|
+
content: 'You are a web research assistant. Given the user\'s research request and summaries of pages already visited, identify any gaps. If more searches would help, return JSON: {"queries":["q1"]}. If no gaps, return {"queries":[]}.',
|
|
404
|
+
},
|
|
405
|
+
{
|
|
406
|
+
role: 'user',
|
|
407
|
+
content: `Research request: ${prompt}\n\nPages visited:\n${collectedData.map(d => `- ${d.title} (${d.url})`).join('\n')}`,
|
|
408
|
+
},
|
|
409
|
+
];
|
|
410
|
+
try {
|
|
411
|
+
const gapResponse = await callLLM(gapMessages, {
|
|
412
|
+
apiKey: llmApiKey, model: llmModel, baseUrl: llmApiBase, jsonMode: true,
|
|
413
|
+
});
|
|
414
|
+
creditsUsed++;
|
|
415
|
+
accUsage(gapResponse.usage);
|
|
416
|
+
const gapParsed = JSON.parse(gapResponse.content);
|
|
417
|
+
const gapQueries = (gapParsed.queries || []).slice(0, 2);
|
|
418
|
+
for (const q of gapQueries) {
|
|
419
|
+
emit({ type: 'step', action: 'searching', query: q });
|
|
420
|
+
const results = await searchWeb(q, 5);
|
|
421
|
+
for (const r of results) {
|
|
422
|
+
if (collectedData.length >= maxSourcesLimit)
|
|
423
|
+
break;
|
|
424
|
+
if (visitedUrls.has(r.url))
|
|
425
|
+
continue;
|
|
426
|
+
visitedUrls.add(r.url);
|
|
427
|
+
emit({ type: 'step', action: 'fetching', url: r.url });
|
|
428
|
+
try {
|
|
429
|
+
const result = await peel(r.url, { format: 'markdown', timeout: 15000 });
|
|
430
|
+
pagesVisited++;
|
|
431
|
+
creditsUsed++;
|
|
432
|
+
const truncated = truncateContent(result.content, 4000);
|
|
433
|
+
collectedData.push({ url: result.url, title: result.title, content: truncated });
|
|
434
|
+
sources.push(result.url);
|
|
435
|
+
sourcesDetailed.push({ url: result.url, title: result.title });
|
|
436
|
+
}
|
|
437
|
+
catch (e) {
|
|
438
|
+
log.debug('page fetch failed:', e instanceof Error ? e.message : e);
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
catch (e) {
|
|
444
|
+
log.debug('research batch failed:', e instanceof Error ? e.message : e);
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
// -----------------------------------------------------------------------
|
|
448
|
+
// Step 3: Extract / synthesise final answer
|
|
449
|
+
// -----------------------------------------------------------------------
|
|
450
|
+
if (collectedData.length === 0) {
|
|
451
|
+
return {
|
|
452
|
+
success: false,
|
|
453
|
+
data: { error: 'No data could be collected from the web' },
|
|
454
|
+
sources: [],
|
|
455
|
+
pagesVisited,
|
|
456
|
+
creditsUsed,
|
|
457
|
+
tokensUsed: totalUsage,
|
|
458
|
+
};
|
|
459
|
+
}
|
|
460
|
+
reportProgress('extracting', 'Analyzing collected data...');
|
|
461
|
+
emit({ type: 'step', action: 'analyzing', summary: `Synthesizing answer from ${collectedData.length} sources...` });
|
|
462
|
+
const context = collectedData
|
|
463
|
+
.map(d => `Source: ${d.url}\nTitle: ${d.title}\n\n${d.content}`)
|
|
464
|
+
.join('\n\n---\n\n');
|
|
465
|
+
const truncatedContext = truncateContent(context, depth === 'thorough' ? 12000 : 8000);
|
|
466
|
+
// Build system prompt based on schema or free-form
|
|
467
|
+
let systemPrompt;
|
|
468
|
+
if (effectiveSchema) {
|
|
469
|
+
systemPrompt =
|
|
470
|
+
'You are a web research assistant. Extract structured data from the provided web content based on the user\'s request. ' +
|
|
471
|
+
`Return a JSON object matching this schema:\n${JSON.stringify(effectiveSchema, null, 2)}\n\nReturn ONLY valid JSON, no explanation.`;
|
|
472
|
+
}
|
|
473
|
+
else {
|
|
474
|
+
systemPrompt =
|
|
475
|
+
'You are a web research assistant. Based on the provided web content, answer the user\'s research question. ' +
|
|
476
|
+
'Provide a comprehensive, well-structured answer. Return a JSON object with:\n' +
|
|
477
|
+
'- "answer": your detailed answer as a string (use markdown formatting)\n' +
|
|
478
|
+
'- "keyFindings": array of key facts/findings\n' +
|
|
479
|
+
'Return ONLY valid JSON, no explanation.';
|
|
480
|
+
}
|
|
481
|
+
const extractMessages = [
|
|
482
|
+
{ role: 'system', content: systemPrompt },
|
|
483
|
+
{
|
|
484
|
+
role: 'user',
|
|
485
|
+
content: `Research request: ${prompt}\n\nCollected data from ${collectedData.length} web pages:\n\n${truncatedContext}`,
|
|
486
|
+
},
|
|
487
|
+
];
|
|
488
|
+
// Use streaming LLM call when onEvent is present
|
|
489
|
+
const hasStreaming = !!onEvent;
|
|
490
|
+
const extractResponse = await callLLMStreaming(extractMessages, { apiKey: llmApiKey, model: llmModel, baseUrl: llmApiBase, jsonMode: true }, hasStreaming ? (text) => emit({ type: 'chunk', text }) : undefined);
|
|
491
|
+
creditsUsed++;
|
|
492
|
+
accUsage(extractResponse.usage);
|
|
493
|
+
// Parse final result
|
|
494
|
+
let finalData;
|
|
495
|
+
try {
|
|
496
|
+
finalData = JSON.parse(extractResponse.content);
|
|
497
|
+
}
|
|
498
|
+
catch {
|
|
499
|
+
finalData = { result: extractResponse.content };
|
|
500
|
+
}
|
|
501
|
+
// Validate against outputSchema if provided
|
|
502
|
+
if (outputSchema) {
|
|
503
|
+
const validation = validateJsonSchema(finalData, outputSchema);
|
|
504
|
+
if (!validation.valid) {
|
|
505
|
+
// Try once more: ask LLM to fix
|
|
506
|
+
try {
|
|
507
|
+
const fixMessages = [
|
|
508
|
+
{
|
|
509
|
+
role: 'system',
|
|
510
|
+
content: `The previous response did not match the required JSON schema. Fix it.\nSchema: ${JSON.stringify(outputSchema)}\nErrors: ${validation.errors}\nReturn ONLY valid JSON.`,
|
|
511
|
+
},
|
|
512
|
+
{ role: 'user', content: extractResponse.content },
|
|
513
|
+
];
|
|
514
|
+
const fixResponse = await callLLM(fixMessages, {
|
|
515
|
+
apiKey: llmApiKey, model: llmModel, baseUrl: llmApiBase, jsonMode: true,
|
|
516
|
+
});
|
|
517
|
+
creditsUsed++;
|
|
518
|
+
accUsage(fixResponse.usage);
|
|
519
|
+
finalData = JSON.parse(fixResponse.content);
|
|
520
|
+
}
|
|
521
|
+
catch {
|
|
522
|
+
// Return what we have with a warning
|
|
523
|
+
finalData._validationWarning = `Output did not match schema: ${validation.errors}`;
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
const answerText = typeof finalData?.answer === 'string' ? finalData.answer : undefined;
|
|
528
|
+
reportProgress('done', `Completed: ${pagesVisited} pages visited`);
|
|
529
|
+
emit({
|
|
530
|
+
type: 'done',
|
|
531
|
+
answer: answerText || JSON.stringify(finalData),
|
|
532
|
+
sources: sourcesDetailed,
|
|
533
|
+
tokensUsed: totalUsage,
|
|
534
|
+
});
|
|
535
|
+
return {
|
|
536
|
+
success: true,
|
|
537
|
+
data: finalData,
|
|
538
|
+
answer: answerText,
|
|
539
|
+
sources,
|
|
540
|
+
sourcesDetailed,
|
|
541
|
+
pagesVisited,
|
|
542
|
+
creditsUsed,
|
|
543
|
+
tokensUsed: totalUsage,
|
|
544
|
+
};
|
|
545
|
+
}
|
|
546
|
+
catch (error) {
|
|
547
|
+
log.error('Agent error:', error);
|
|
548
|
+
return {
|
|
549
|
+
success: false,
|
|
550
|
+
data: { error: error.message || 'Unknown error occurred' },
|
|
551
|
+
sources,
|
|
552
|
+
sourcesDetailed,
|
|
553
|
+
pagesVisited,
|
|
554
|
+
creditsUsed,
|
|
555
|
+
tokensUsed: totalUsage,
|
|
556
|
+
};
|
|
557
|
+
}
|
|
558
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* /answer core implementation
|
|
3
|
+
*
|
|
4
|
+
* Flow:
|
|
5
|
+
* - search the web
|
|
6
|
+
* - fetch top sources via WebPeel
|
|
7
|
+
* - call an LLM (BYOK) to generate a cited answer
|
|
8
|
+
*/
|
|
9
|
+
import { type SearchProviderId } from './search-provider.js';
|
|
10
|
+
export type LLMProviderId = 'openai' | 'anthropic' | 'google';
|
|
11
|
+
export interface TokensUsed {
|
|
12
|
+
input: number;
|
|
13
|
+
output: number;
|
|
14
|
+
}
|
|
15
|
+
export interface AnswerCitation {
|
|
16
|
+
title: string;
|
|
17
|
+
url: string;
|
|
18
|
+
snippet: string;
|
|
19
|
+
}
|
|
20
|
+
export interface AnswerRequest {
|
|
21
|
+
question: string;
|
|
22
|
+
searchProvider?: SearchProviderId;
|
|
23
|
+
searchApiKey?: string;
|
|
24
|
+
llmProvider: LLMProviderId;
|
|
25
|
+
llmApiKey: string;
|
|
26
|
+
llmModel?: string;
|
|
27
|
+
maxSources?: number;
|
|
28
|
+
stream?: boolean;
|
|
29
|
+
/** Called with incremental text when stream=true */
|
|
30
|
+
onChunk?: (text: string) => void;
|
|
31
|
+
/** Optional AbortSignal */
|
|
32
|
+
signal?: AbortSignal;
|
|
33
|
+
}
|
|
34
|
+
export interface AnswerResponse {
|
|
35
|
+
answer: string;
|
|
36
|
+
citations: AnswerCitation[];
|
|
37
|
+
searchProvider: SearchProviderId;
|
|
38
|
+
llmProvider: LLMProviderId;
|
|
39
|
+
llmModel: string;
|
|
40
|
+
tokensUsed: TokensUsed;
|
|
41
|
+
}
|
|
42
|
+
export declare function answerQuestion(req: AnswerRequest): Promise<AnswerResponse>;
|