@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Source Scoring — intelligent ranking for /v1/ask results
|
|
3
|
+
*
|
|
4
|
+
* Ranks search results using:
|
|
5
|
+
* 1. Primary source detection (entity name in domain, official docs paths)
|
|
6
|
+
* 2. Domain authority scoring (tiered: official → institutional → major → general)
|
|
7
|
+
* 3. Freshness scoring (from page metadata — publishDate, published, lastModified)
|
|
8
|
+
* 4. Domain deduplication (max 2 results per domain)
|
|
9
|
+
* 5. Combined score: bm25*0.4 + authority*0.25 + freshness*0.2 + primary*0.15
|
|
10
|
+
* (for factual/pricing queries, freshness weight is doubled)
|
|
11
|
+
*
|
|
12
|
+
* No external dependencies — pure TypeScript.
|
|
13
|
+
*/
|
|
14
|
+
export interface SearchResult {
|
|
15
|
+
url: string;
|
|
16
|
+
title: string;
|
|
17
|
+
snippet: string;
|
|
18
|
+
}
|
|
19
|
+
export interface PageMetadataForScoring {
|
|
20
|
+
published?: string;
|
|
21
|
+
publishDate?: string;
|
|
22
|
+
[key: string]: unknown;
|
|
23
|
+
}
|
|
24
|
+
export interface FreshnessData {
|
|
25
|
+
lastModified?: string;
|
|
26
|
+
fetchedAt?: string;
|
|
27
|
+
}
|
|
28
|
+
/** Authority tier label for the response payload */
|
|
29
|
+
export type AuthorityLabel = 'official' | 'institutional' | 'major' | 'general';
|
|
30
|
+
/** Freshness tier label for the response payload */
|
|
31
|
+
export type FreshnessLabel = 'recent' | 'this-month' | 'this-year' | 'older';
|
|
32
|
+
/** Public-facing scored source (returned in API response) */
|
|
33
|
+
export interface ScoredSource {
|
|
34
|
+
url: string;
|
|
35
|
+
title: string;
|
|
36
|
+
snippet: string;
|
|
37
|
+
confidence: number;
|
|
38
|
+
authority: AuthorityLabel;
|
|
39
|
+
freshness: FreshnessLabel;
|
|
40
|
+
isPrimarySource: boolean;
|
|
41
|
+
}
|
|
42
|
+
/** Internal scored source (includes all raw component scores) */
|
|
43
|
+
export interface ScoredSourceInternal extends ScoredSource {
|
|
44
|
+
bm25Score: number;
|
|
45
|
+
authorityScore: number;
|
|
46
|
+
freshnessScore: number;
|
|
47
|
+
primarySourceScore: number;
|
|
48
|
+
finalScore: number;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Extract the hostname from a URL (e.g. "docs.cerebras.ai").
|
|
52
|
+
*/
|
|
53
|
+
export declare function extractHostname(url: string): string;
|
|
54
|
+
/**
|
|
55
|
+
* Extract the registered domain (e.g. "cerebras.ai" from "docs.cerebras.ai").
|
|
56
|
+
* Handles common multi-part TLDs like .co.uk, .com.au, etc.
|
|
57
|
+
*/
|
|
58
|
+
export declare function extractRegisteredDomain(url: string): string;
|
|
59
|
+
/**
|
|
60
|
+
* Extract entity candidates from a query.
|
|
61
|
+
* Returns non-stopword tokens of length >= 3.
|
|
62
|
+
* e.g. "what are cerebras free tier limits" → ["cerebras", "tier", "limits"]
|
|
63
|
+
*/
|
|
64
|
+
export declare function extractEntityCandidates(query: string): string[];
|
|
65
|
+
/**
|
|
66
|
+
* Score a URL as a primary source for a given query.
|
|
67
|
+
* Returns a score in [0, 1.0].
|
|
68
|
+
*
|
|
69
|
+
* Factors:
|
|
70
|
+
* - Domain contains entity name → +0.3
|
|
71
|
+
* - URL path matches official docs/pricing patterns → +0.2
|
|
72
|
+
*/
|
|
73
|
+
export declare function scorePrimarySource(url: string, query: string): number;
|
|
74
|
+
/**
|
|
75
|
+
* Returns true if the URL is a primary source for the query.
|
|
76
|
+
*/
|
|
77
|
+
export declare function isPrimarySource(url: string, query: string): boolean;
|
|
78
|
+
/**
|
|
79
|
+
* Score domain authority for a URL.
|
|
80
|
+
* Returns a score in [0, 1].
|
|
81
|
+
*/
|
|
82
|
+
export declare function scoreDomainAuthority(url: string): number;
|
|
83
|
+
/**
|
|
84
|
+
* Map an authority score to a label.
|
|
85
|
+
*/
|
|
86
|
+
export declare function authorityLabel(score: number): AuthorityLabel;
|
|
87
|
+
/**
|
|
88
|
+
* Extract a publish/modification date from page metadata.
|
|
89
|
+
* Tries multiple metadata fields in order of preference.
|
|
90
|
+
*/
|
|
91
|
+
export declare function extractPageDate(metadata?: PageMetadataForScoring, freshness?: FreshnessData): Date | null;
|
|
92
|
+
/**
|
|
93
|
+
* Score freshness based on page date.
|
|
94
|
+
* Returns a score in [0, 1]:
|
|
95
|
+
* - No date known → 0.5 (neutral)
|
|
96
|
+
* - last 7 days → 1.0
|
|
97
|
+
* - last 30 days → 0.9
|
|
98
|
+
* - last 90 days → 0.8
|
|
99
|
+
* - last year → 0.6
|
|
100
|
+
* - older → 0.4
|
|
101
|
+
*/
|
|
102
|
+
export declare function scoreFreshness(metadata?: PageMetadataForScoring, freshnessData?: FreshnessData): number;
|
|
103
|
+
/**
|
|
104
|
+
* Map freshness metadata to a label.
|
|
105
|
+
*/
|
|
106
|
+
export declare function freshnessLabel(metadata?: PageMetadataForScoring, freshnessData?: FreshnessData): FreshnessLabel;
|
|
107
|
+
/**
|
|
108
|
+
* Returns true if the query is about pricing, limits, rates, or other
|
|
109
|
+
* time-sensitive factual data where freshness is critical.
|
|
110
|
+
*/
|
|
111
|
+
export declare function isFactualQuery(query: string): boolean;
|
|
112
|
+
/**
|
|
113
|
+
* Compute the combined final score for a source.
|
|
114
|
+
*
|
|
115
|
+
* Standard weights:
|
|
116
|
+
* finalScore = bm25*0.40 + authority*0.25 + freshness*0.20 + primary*0.15
|
|
117
|
+
*
|
|
118
|
+
* Factual/pricing query weights (freshness doubled at expense of authority):
|
|
119
|
+
* finalScore = bm25*0.35 + authority*0.15 + freshness*0.35 + primary*0.15
|
|
120
|
+
*/
|
|
121
|
+
export declare function computeFinalScore(bm25Score: number, authorityScore: number, freshnessScore: number, primarySourceScore: number, factual: boolean): number;
|
|
122
|
+
/**
|
|
123
|
+
* Deduplicate sources by registered domain.
|
|
124
|
+
* Keeps up to `maxPerDomain` (default: 2) highest-scored results per domain.
|
|
125
|
+
* Input must already be sorted by finalScore descending for correct behavior.
|
|
126
|
+
*/
|
|
127
|
+
export declare function deduplicateByDomain<T extends {
|
|
128
|
+
url: string;
|
|
129
|
+
finalScore: number;
|
|
130
|
+
}>(sources: T[], maxPerDomain?: number): T[];
|
|
131
|
+
export interface ScoreSourceOptions {
|
|
132
|
+
searchResult: SearchResult;
|
|
133
|
+
query: string;
|
|
134
|
+
/** BM25 confidence score from quickAnswer (0-1). Default: 0.5 (neutral pre-fetch) */
|
|
135
|
+
bm25Score?: number;
|
|
136
|
+
metadata?: PageMetadataForScoring;
|
|
137
|
+
freshnessData?: FreshnessData;
|
|
138
|
+
/** Override factual query detection */
|
|
139
|
+
factualQuery?: boolean;
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Score a single source with all signals combined.
|
|
143
|
+
*/
|
|
144
|
+
export declare function scoreSource(options: ScoreSourceOptions): ScoredSourceInternal;
|
|
145
|
+
/**
|
|
146
|
+
* Rank search results BEFORE fetching.
|
|
147
|
+
* Uses authority + primary source scores (BM25 and freshness not yet available).
|
|
148
|
+
* Returns deduplicated results sorted by pre-fetch score.
|
|
149
|
+
*
|
|
150
|
+
* Use this to prioritize which URLs to fetch.
|
|
151
|
+
*/
|
|
152
|
+
export declare function rankSearchResults(results: SearchResult[], query: string, options?: {
|
|
153
|
+
maxPerDomain?: number;
|
|
154
|
+
}): SearchResult[];
|
|
155
|
+
/**
|
|
156
|
+
* Score fetched sources AFTER BM25 scoring.
|
|
157
|
+
* Computes the full combined score and returns deduplicated results sorted by finalScore.
|
|
158
|
+
*/
|
|
159
|
+
export declare function scoreFetchedSources(sources: Array<{
|
|
160
|
+
searchResult: SearchResult;
|
|
161
|
+
bm25Score: number;
|
|
162
|
+
metadata?: PageMetadataForScoring;
|
|
163
|
+
freshnessData?: FreshnessData;
|
|
164
|
+
}>, query: string, options?: {
|
|
165
|
+
maxPerDomain?: number;
|
|
166
|
+
}): ScoredSourceInternal[];
|
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Source Scoring — intelligent ranking for /v1/ask results
|
|
3
|
+
*
|
|
4
|
+
* Ranks search results using:
|
|
5
|
+
* 1. Primary source detection (entity name in domain, official docs paths)
|
|
6
|
+
* 2. Domain authority scoring (tiered: official → institutional → major → general)
|
|
7
|
+
* 3. Freshness scoring (from page metadata — publishDate, published, lastModified)
|
|
8
|
+
* 4. Domain deduplication (max 2 results per domain)
|
|
9
|
+
* 5. Combined score: bm25*0.4 + authority*0.25 + freshness*0.2 + primary*0.15
|
|
10
|
+
* (for factual/pricing queries, freshness weight is doubled)
|
|
11
|
+
*
|
|
12
|
+
* No external dependencies — pure TypeScript.
|
|
13
|
+
*/
|
|
14
|
+
const AUTHORITY_TIERS = [
|
|
15
|
+
// -----------------------------------------------------------------------
|
|
16
|
+
// Specific known domains — checked FIRST (most precise)
|
|
17
|
+
// -----------------------------------------------------------------------
|
|
18
|
+
// High-quality reference/code (0.85-0.9)
|
|
19
|
+
{ pattern: 'github.com', score: 0.9 },
|
|
20
|
+
{ pattern: 'arxiv.org', score: 0.9 },
|
|
21
|
+
{ pattern: 'stackoverflow.com', score: 0.85 },
|
|
22
|
+
{ pattern: 'wikipedia.org', score: 0.85 },
|
|
23
|
+
// Major news/institutional (0.7-0.8)
|
|
24
|
+
{ pattern: 'reuters.com', score: 0.8 },
|
|
25
|
+
{ pattern: 'apnews.com', score: 0.8 },
|
|
26
|
+
{ pattern: 'bloomberg.com', score: 0.8 },
|
|
27
|
+
{ pattern: 'wsj.com', score: 0.8 },
|
|
28
|
+
{ pattern: 'ft.com', score: 0.8 },
|
|
29
|
+
{ pattern: 'nytimes.com', score: 0.8 },
|
|
30
|
+
{ pattern: 'bbc.com', score: 0.8 },
|
|
31
|
+
{ pattern: 'bbc.co.uk', score: 0.8 },
|
|
32
|
+
{ pattern: 'techcrunch.com', score: 0.75 },
|
|
33
|
+
{ pattern: 'arstechnica.com', score: 0.75 },
|
|
34
|
+
{ pattern: 'theverge.com', score: 0.75 },
|
|
35
|
+
{ pattern: 'wired.com', score: 0.75 },
|
|
36
|
+
{ pattern: 'zdnet.com', score: 0.7 },
|
|
37
|
+
{ pattern: 'cnn.com', score: 0.75 },
|
|
38
|
+
// -----------------------------------------------------------------------
|
|
39
|
+
// Subdomain patterns — regex, checked after specific domains
|
|
40
|
+
// -----------------------------------------------------------------------
|
|
41
|
+
{ pattern: /^docs\./, score: 0.9 },
|
|
42
|
+
{ pattern: /^developer\./, score: 0.9 },
|
|
43
|
+
{ pattern: /^developers\./, score: 0.9 },
|
|
44
|
+
{ pattern: /^api\./, score: 0.85 },
|
|
45
|
+
{ pattern: /^support\./, score: 0.8 },
|
|
46
|
+
{ pattern: /^help\./, score: 0.8 },
|
|
47
|
+
// -----------------------------------------------------------------------
|
|
48
|
+
// Broad TLD patterns — checked LAST (most general)
|
|
49
|
+
// These must come after specific domain rules to avoid overriding them.
|
|
50
|
+
// e.g. wikipedia.org should score 0.85 (specific), not 0.9 (.org TLD)
|
|
51
|
+
// -----------------------------------------------------------------------
|
|
52
|
+
{ pattern: '.gov', score: 1.0 },
|
|
53
|
+
{ pattern: '.edu', score: 0.95 },
|
|
54
|
+
{ pattern: '.org', score: 0.9 },
|
|
55
|
+
// Default for everything else: 0.5
|
|
56
|
+
];
|
|
57
|
+
const AUTHORITY_DEFAULT = 0.5;
|
|
58
|
+
// ---------------------------------------------------------------------------
|
|
59
|
+
// Stopwords (for entity extraction)
|
|
60
|
+
// ---------------------------------------------------------------------------
|
|
61
|
+
const ENTITY_STOPWORDS = new Set([
|
|
62
|
+
'what', 'is', 'the', 'how', 'do', 'a', 'an', 'where', 'when', 'why',
|
|
63
|
+
'which', 'can', 'does', 'are', 'was', 'were', 'be', 'been', 'being',
|
|
64
|
+
'have', 'has', 'had', 'will', 'would', 'could', 'should', 'may', 'might',
|
|
65
|
+
'shall', 'must', 'did', 'i', 'you', 'he', 'she', 'it', 'we', 'they',
|
|
66
|
+
'me', 'him', 'her', 'us', 'them', 'my', 'your', 'his', 'its', 'our', 'their',
|
|
67
|
+
'this', 'that', 'these', 'those', 'of', 'in', 'on', 'at', 'by', 'for',
|
|
68
|
+
'with', 'about', 'into', 'to', 'from', 'up', 'out', 'and', 'or', 'but',
|
|
69
|
+
'if', 'so', 'as', 'not', 'no', 'than', 'then', 'also', 'get', 'use',
|
|
70
|
+
'list', 'find', 'tell', 'show', 'give', 'make', 'need', 'want', 'know',
|
|
71
|
+
'free', 'best', 'good', 'new', 'all', 'any', 'some', 'more', 'most',
|
|
72
|
+
'vs', 'versus', 'compare', 'difference', 'between', 'using', 'used',
|
|
73
|
+
'many', 'much', 'long', 'does', 'cost', 'price', 'limit', 'rate',
|
|
74
|
+
]);
|
|
75
|
+
// Factual query keywords — freshness is doubled for these
|
|
76
|
+
const FACTUAL_QUERY_PATTERN = /\b(price|pricing|cost|costs|limit|limits|rate|rates|quota|tier|plan|plans|fee|fees|subscription|deprecat|latest|current|version|update)\b/i;
|
|
77
|
+
// Official path patterns — indicates docs/pricing/help pages
|
|
78
|
+
const OFFICIAL_PATH_PATTERN = /\/(docs|api|pricing|help|support|documentation|reference|guide|faq|changelog|release|releases|download|downloads|getting-started|quickstart)\b/i;
|
|
79
|
+
// ---------------------------------------------------------------------------
|
|
80
|
+
// URL helpers
|
|
81
|
+
// ---------------------------------------------------------------------------
|
|
82
|
+
/**
|
|
83
|
+
* Extract the hostname from a URL (e.g. "docs.cerebras.ai").
|
|
84
|
+
*/
|
|
85
|
+
export function extractHostname(url) {
|
|
86
|
+
try {
|
|
87
|
+
return new URL(url).hostname.toLowerCase();
|
|
88
|
+
}
|
|
89
|
+
catch {
|
|
90
|
+
const match = url.match(/^https?:\/\/([^/?#]+)/i);
|
|
91
|
+
return match ? match[1].toLowerCase() : '';
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Extract the registered domain (e.g. "cerebras.ai" from "docs.cerebras.ai").
|
|
96
|
+
* Handles common multi-part TLDs like .co.uk, .com.au, etc.
|
|
97
|
+
*/
|
|
98
|
+
export function extractRegisteredDomain(url) {
|
|
99
|
+
const hostname = extractHostname(url);
|
|
100
|
+
if (!hostname)
|
|
101
|
+
return '';
|
|
102
|
+
const MULTI_TLD = /\.(co|com|net|org|gov|edu)\.[a-z]{2}$/i;
|
|
103
|
+
if (MULTI_TLD.test(hostname)) {
|
|
104
|
+
const parts = hostname.split('.');
|
|
105
|
+
return parts.slice(-3).join('.');
|
|
106
|
+
}
|
|
107
|
+
const parts = hostname.split('.');
|
|
108
|
+
return parts.length >= 2 ? parts.slice(-2).join('.') : hostname;
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Extract the URL path from a URL string.
|
|
112
|
+
*/
|
|
113
|
+
function extractPath(url) {
|
|
114
|
+
try {
|
|
115
|
+
return new URL(url).pathname;
|
|
116
|
+
}
|
|
117
|
+
catch {
|
|
118
|
+
const match = url.match(/^https?:\/\/[^/?#]+(\/[^?#]*)?/i);
|
|
119
|
+
return match?.[1] ?? '';
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
// ---------------------------------------------------------------------------
|
|
123
|
+
// 1. Primary source detection
|
|
124
|
+
// ---------------------------------------------------------------------------
|
|
125
|
+
/**
|
|
126
|
+
* Extract entity candidates from a query.
|
|
127
|
+
* Returns non-stopword tokens of length >= 3.
|
|
128
|
+
* e.g. "what are cerebras free tier limits" → ["cerebras", "tier", "limits"]
|
|
129
|
+
*/
|
|
130
|
+
export function extractEntityCandidates(query) {
|
|
131
|
+
return query
|
|
132
|
+
.toLowerCase()
|
|
133
|
+
.replace(/[^\w\s]/g, ' ')
|
|
134
|
+
.split(/\s+/)
|
|
135
|
+
.filter(t => t.length >= 3 && !ENTITY_STOPWORDS.has(t));
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Score a URL as a primary source for a given query.
|
|
139
|
+
* Returns a score in [0, 1.0].
|
|
140
|
+
*
|
|
141
|
+
* Factors:
|
|
142
|
+
* - Domain contains entity name → +0.3
|
|
143
|
+
* - URL path matches official docs/pricing patterns → +0.2
|
|
144
|
+
*/
|
|
145
|
+
export function scorePrimarySource(url, query) {
|
|
146
|
+
const hostname = extractHostname(url);
|
|
147
|
+
const entities = extractEntityCandidates(query);
|
|
148
|
+
let score = 0;
|
|
149
|
+
// Check if any entity appears as a whole word in the domain.
|
|
150
|
+
// Split hostname on delimiters (., -, _) and check for exact segment match.
|
|
151
|
+
// This prevents partial matches like 'random' matching 'randomblog.io'.
|
|
152
|
+
const domainSegments = hostname.split(/[.\-_]/);
|
|
153
|
+
const domainMatch = entities.some(entity => domainSegments.includes(entity));
|
|
154
|
+
if (domainMatch) {
|
|
155
|
+
score += 0.3;
|
|
156
|
+
}
|
|
157
|
+
// Check for official docs/pricing patterns in the path
|
|
158
|
+
const path = extractPath(url);
|
|
159
|
+
if (OFFICIAL_PATH_PATTERN.test(path)) {
|
|
160
|
+
score += 0.2;
|
|
161
|
+
}
|
|
162
|
+
return Math.min(1.0, score);
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Returns true if the URL is a primary source for the query.
|
|
166
|
+
*/
|
|
167
|
+
export function isPrimarySource(url, query) {
|
|
168
|
+
return scorePrimarySource(url, query) > 0;
|
|
169
|
+
}
|
|
170
|
+
// ---------------------------------------------------------------------------
|
|
171
|
+
// 2. Domain authority scoring
|
|
172
|
+
// ---------------------------------------------------------------------------
|
|
173
|
+
/**
|
|
174
|
+
* Score domain authority for a URL.
|
|
175
|
+
* Returns a score in [0, 1].
|
|
176
|
+
*/
|
|
177
|
+
export function scoreDomainAuthority(url) {
|
|
178
|
+
const hostname = extractHostname(url);
|
|
179
|
+
if (!hostname)
|
|
180
|
+
return AUTHORITY_DEFAULT;
|
|
181
|
+
for (const tier of AUTHORITY_TIERS) {
|
|
182
|
+
if (typeof tier.pattern === 'string') {
|
|
183
|
+
if (tier.pattern.startsWith('.')) {
|
|
184
|
+
// TLD check: ".gov" → hostname ends with ".gov"
|
|
185
|
+
if (hostname.endsWith(tier.pattern))
|
|
186
|
+
return tier.score;
|
|
187
|
+
}
|
|
188
|
+
else {
|
|
189
|
+
// Domain check: exact match or subdomain
|
|
190
|
+
if (hostname === tier.pattern || hostname.endsWith('.' + tier.pattern)) {
|
|
191
|
+
return tier.score;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
else {
|
|
196
|
+
// RegExp: test against the full hostname
|
|
197
|
+
if (tier.pattern.test(hostname))
|
|
198
|
+
return tier.score;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
return AUTHORITY_DEFAULT;
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Map an authority score to a label.
|
|
205
|
+
*/
|
|
206
|
+
export function authorityLabel(score) {
|
|
207
|
+
if (score >= 0.9)
|
|
208
|
+
return 'official'; // .gov, .edu, github, arxiv, docs.*
|
|
209
|
+
if (score >= 0.8)
|
|
210
|
+
return 'institutional'; // .org, reuters, nytimes, bbc
|
|
211
|
+
if (score >= 0.7)
|
|
212
|
+
return 'major'; // techcrunch, arstechnica, etc.
|
|
213
|
+
return 'general';
|
|
214
|
+
}
|
|
215
|
+
// ---------------------------------------------------------------------------
|
|
216
|
+
// 3. Freshness scoring
|
|
217
|
+
// ---------------------------------------------------------------------------
|
|
218
|
+
/**
|
|
219
|
+
* Extract a publish/modification date from page metadata.
|
|
220
|
+
* Tries multiple metadata fields in order of preference.
|
|
221
|
+
*/
|
|
222
|
+
export function extractPageDate(metadata, freshness) {
|
|
223
|
+
const candidates = [
|
|
224
|
+
metadata?.publishDate,
|
|
225
|
+
metadata?.published,
|
|
226
|
+
metadata?.['article:published_time'],
|
|
227
|
+
metadata?.['og:article:published_time'],
|
|
228
|
+
metadata?.['datePublished'],
|
|
229
|
+
metadata?.['modified'],
|
|
230
|
+
metadata?.['dateModified'],
|
|
231
|
+
freshness?.lastModified,
|
|
232
|
+
];
|
|
233
|
+
for (const candidate of candidates) {
|
|
234
|
+
if (!candidate)
|
|
235
|
+
continue;
|
|
236
|
+
const d = new Date(candidate);
|
|
237
|
+
if (!isNaN(d.getTime()) && d.getFullYear() > 1990 && d.getFullYear() <= new Date().getFullYear() + 1) {
|
|
238
|
+
return d;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
return null;
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Score freshness based on page date.
|
|
245
|
+
* Returns a score in [0, 1]:
|
|
246
|
+
* - No date known → 0.5 (neutral)
|
|
247
|
+
* - last 7 days → 1.0
|
|
248
|
+
* - last 30 days → 0.9
|
|
249
|
+
* - last 90 days → 0.8
|
|
250
|
+
* - last year → 0.6
|
|
251
|
+
* - older → 0.4
|
|
252
|
+
*/
|
|
253
|
+
export function scoreFreshness(metadata, freshnessData) {
|
|
254
|
+
const pageDate = extractPageDate(metadata, freshnessData);
|
|
255
|
+
if (!pageDate)
|
|
256
|
+
return 0.5; // neutral when unknown
|
|
257
|
+
const ageDays = (Date.now() - pageDate.getTime()) / (1000 * 60 * 60 * 24);
|
|
258
|
+
if (ageDays <= 7)
|
|
259
|
+
return 1.0;
|
|
260
|
+
if (ageDays <= 30)
|
|
261
|
+
return 0.9;
|
|
262
|
+
if (ageDays <= 90)
|
|
263
|
+
return 0.8;
|
|
264
|
+
if (ageDays <= 365)
|
|
265
|
+
return 0.6;
|
|
266
|
+
return 0.4;
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Map freshness metadata to a label.
|
|
270
|
+
*/
|
|
271
|
+
export function freshnessLabel(metadata, freshnessData) {
|
|
272
|
+
const score = scoreFreshness(metadata, freshnessData);
|
|
273
|
+
if (score >= 0.85)
|
|
274
|
+
return 'recent'; // last 30 days
|
|
275
|
+
if (score >= 0.75)
|
|
276
|
+
return 'this-month'; // last 90 days
|
|
277
|
+
if (score >= 0.45)
|
|
278
|
+
return 'this-year'; // last year or unknown
|
|
279
|
+
return 'older';
|
|
280
|
+
}
|
|
281
|
+
// ---------------------------------------------------------------------------
|
|
282
|
+
// 4. Factual query detection
|
|
283
|
+
// ---------------------------------------------------------------------------
|
|
284
|
+
/**
|
|
285
|
+
* Returns true if the query is about pricing, limits, rates, or other
|
|
286
|
+
* time-sensitive factual data where freshness is critical.
|
|
287
|
+
*/
|
|
288
|
+
export function isFactualQuery(query) {
|
|
289
|
+
return FACTUAL_QUERY_PATTERN.test(query);
|
|
290
|
+
}
|
|
291
|
+
// ---------------------------------------------------------------------------
|
|
292
|
+
// 5. Combined scoring
|
|
293
|
+
// ---------------------------------------------------------------------------
|
|
294
|
+
/**
|
|
295
|
+
* Compute the combined final score for a source.
|
|
296
|
+
*
|
|
297
|
+
* Standard weights:
|
|
298
|
+
* finalScore = bm25*0.40 + authority*0.25 + freshness*0.20 + primary*0.15
|
|
299
|
+
*
|
|
300
|
+
* Factual/pricing query weights (freshness doubled at expense of authority):
|
|
301
|
+
* finalScore = bm25*0.35 + authority*0.15 + freshness*0.35 + primary*0.15
|
|
302
|
+
*/
|
|
303
|
+
export function computeFinalScore(bm25Score, authorityScore, freshnessScore, primarySourceScore, factual) {
|
|
304
|
+
if (factual) {
|
|
305
|
+
return bm25Score * 0.35 + authorityScore * 0.15 + freshnessScore * 0.35 + primarySourceScore * 0.15;
|
|
306
|
+
}
|
|
307
|
+
return bm25Score * 0.40 + authorityScore * 0.25 + freshnessScore * 0.20 + primarySourceScore * 0.15;
|
|
308
|
+
}
|
|
309
|
+
// ---------------------------------------------------------------------------
|
|
310
|
+
// 6. Domain deduplication
|
|
311
|
+
// ---------------------------------------------------------------------------
|
|
312
|
+
/**
|
|
313
|
+
* Deduplicate sources by registered domain.
|
|
314
|
+
* Keeps up to `maxPerDomain` (default: 2) highest-scored results per domain.
|
|
315
|
+
* Input must already be sorted by finalScore descending for correct behavior.
|
|
316
|
+
*/
|
|
317
|
+
export function deduplicateByDomain(sources, maxPerDomain = 2) {
|
|
318
|
+
// Sort by finalScore descending to keep the best
|
|
319
|
+
const sorted = [...sources].sort((a, b) => b.finalScore - a.finalScore);
|
|
320
|
+
const domainCounts = new Map();
|
|
321
|
+
const result = [];
|
|
322
|
+
for (const source of sorted) {
|
|
323
|
+
const domain = extractRegisteredDomain(source.url);
|
|
324
|
+
const count = domainCounts.get(domain) ?? 0;
|
|
325
|
+
if (count < maxPerDomain) {
|
|
326
|
+
result.push(source);
|
|
327
|
+
domainCounts.set(domain, count + 1);
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
return result;
|
|
331
|
+
}
|
|
332
|
+
/**
|
|
333
|
+
* Score a single source with all signals combined.
|
|
334
|
+
*/
|
|
335
|
+
export function scoreSource(options) {
|
|
336
|
+
const { searchResult, query, bm25Score = 0.5, metadata, freshnessData, } = options;
|
|
337
|
+
const factualQuery = options.factualQuery ?? isFactualQuery(query);
|
|
338
|
+
const authorityScore = scoreDomainAuthority(searchResult.url);
|
|
339
|
+
const freshnessScore = scoreFreshness(metadata, freshnessData);
|
|
340
|
+
const primarySourceScore = scorePrimarySource(searchResult.url, query);
|
|
341
|
+
const finalScore = computeFinalScore(bm25Score, authorityScore, freshnessScore, primarySourceScore, factualQuery);
|
|
342
|
+
return {
|
|
343
|
+
url: searchResult.url,
|
|
344
|
+
title: searchResult.title,
|
|
345
|
+
snippet: searchResult.snippet,
|
|
346
|
+
confidence: bm25Score,
|
|
347
|
+
authority: authorityLabel(authorityScore),
|
|
348
|
+
freshness: freshnessLabel(metadata, freshnessData),
|
|
349
|
+
isPrimarySource: primarySourceScore > 0,
|
|
350
|
+
// Internal fields
|
|
351
|
+
bm25Score,
|
|
352
|
+
authorityScore,
|
|
353
|
+
freshnessScore,
|
|
354
|
+
primarySourceScore,
|
|
355
|
+
finalScore,
|
|
356
|
+
};
|
|
357
|
+
}
|
|
358
|
+
// ---------------------------------------------------------------------------
|
|
359
|
+
// 8. Batch ranking helpers (for ask.ts integration)
|
|
360
|
+
// ---------------------------------------------------------------------------
|
|
361
|
+
/**
|
|
362
|
+
* Rank search results BEFORE fetching.
|
|
363
|
+
* Uses authority + primary source scores (BM25 and freshness not yet available).
|
|
364
|
+
* Returns deduplicated results sorted by pre-fetch score.
|
|
365
|
+
*
|
|
366
|
+
* Use this to prioritize which URLs to fetch.
|
|
367
|
+
*/
|
|
368
|
+
export function rankSearchResults(results, query, options) {
|
|
369
|
+
const factual = isFactualQuery(query);
|
|
370
|
+
const scored = results.map(r => {
|
|
371
|
+
const authorityScore = scoreDomainAuthority(r.url);
|
|
372
|
+
const primarySourceScore = scorePrimarySource(r.url, query);
|
|
373
|
+
// Pre-fetch: BM25 = 0.5 (neutral), freshness = 0.5 (unknown)
|
|
374
|
+
const finalScore = computeFinalScore(0.5, authorityScore, 0.5, primarySourceScore, factual);
|
|
375
|
+
return { ...r, finalScore };
|
|
376
|
+
});
|
|
377
|
+
const deduped = deduplicateByDomain(scored, options?.maxPerDomain ?? 2);
|
|
378
|
+
// Return search results in ranked order (strip internal finalScore)
|
|
379
|
+
return deduped.map(({ finalScore: _f, ...r }) => r);
|
|
380
|
+
}
|
|
381
|
+
/**
|
|
382
|
+
* Score fetched sources AFTER BM25 scoring.
|
|
383
|
+
* Computes the full combined score and returns deduplicated results sorted by finalScore.
|
|
384
|
+
*/
|
|
385
|
+
export function scoreFetchedSources(sources, query, options) {
|
|
386
|
+
const factual = isFactualQuery(query);
|
|
387
|
+
const scored = sources.map(s => scoreSource({
|
|
388
|
+
searchResult: s.searchResult,
|
|
389
|
+
query,
|
|
390
|
+
bm25Score: s.bm25Score,
|
|
391
|
+
metadata: s.metadata,
|
|
392
|
+
freshnessData: s.freshnessData,
|
|
393
|
+
factualQuery: factual,
|
|
394
|
+
}));
|
|
395
|
+
return deduplicateByDomain(scored, options?.maxPerDomain ?? 2);
|
|
396
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Porter Stemmer — Lightweight implementation of the Porter stemming algorithm.
|
|
3
|
+
*
|
|
4
|
+
* Based on: Martin Porter, "An algorithm for suffix stripping", 1980.
|
|
5
|
+
* Reference: https://tartarus.org/martin/PorterStemmer/
|
|
6
|
+
*
|
|
7
|
+
* This is a well-tested, deterministic implementation with no external dependencies.
|
|
8
|
+
* It correctly handles all standard Porter stemmer rules including steps 1a-5b.
|
|
9
|
+
*/
|
|
10
|
+
/**
|
|
11
|
+
* Irregular verb forms → base form.
|
|
12
|
+
* Porter stemmer only handles regular morphology (-ed, -ing, -s).
|
|
13
|
+
* English has ~200 irregular verbs; we cover the most common ones.
|
|
14
|
+
* This table normalizes irregular forms before stemming so that
|
|
15
|
+
* "built" → "build" → stem("build") = "build" matches stem("build").
|
|
16
|
+
*
|
|
17
|
+
* Ambiguous words are intentionally excluded:
|
|
18
|
+
* "found" — could be find (past) OR establish (base form "found a company")
|
|
19
|
+
* "left" — could be leave (past) OR direction
|
|
20
|
+
* "bore"/"borne"/"born" — could be bear (past) OR bore=boring OR born=birth
|
|
21
|
+
* "bound" — could be bind (past) OR boundary (noun)
|
|
22
|
+
*/
|
|
23
|
+
export declare const IRREGULAR_FORMS: Record<string, string>;
|
|
24
|
+
/**
|
|
25
|
+
* Stem a single word using the Porter stemming algorithm.
|
|
26
|
+
*
|
|
27
|
+
* Returns the stemmed word (lowercase). Input is also lowercased.
|
|
28
|
+
* Words shorter than 3 characters are returned as-is.
|
|
29
|
+
*
|
|
30
|
+
* Irregular verb forms (e.g. "built", "ran", "spoke") are first normalized
|
|
31
|
+
* to their base form before Porter steps are applied, ensuring that
|
|
32
|
+
* stem("built") === stem("build"), stem("spoke") === stem("speak"), etc.
|
|
33
|
+
*/
|
|
34
|
+
export declare function stem(word: string): string;
|
|
35
|
+
/**
|
|
36
|
+
* Stem an array of tokens.
|
|
37
|
+
*/
|
|
38
|
+
export declare function stemTokens(tokens: string[]): string[];
|