@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart content distillation for WebPeel
|
|
3
|
+
*
|
|
4
|
+
* Intelligently compresses content to fit within a token budget using
|
|
5
|
+
* heuristic-based techniques — no LLM required.
|
|
6
|
+
*
|
|
7
|
+
* This is NOT simple truncation: it prioritises information-dense content
|
|
8
|
+
* and progressively removes lower-value sections while preserving structure.
|
|
9
|
+
*
|
|
10
|
+
* @module budget
|
|
11
|
+
*/
|
|
12
|
+
import { estimateTokens } from './markdown.js';
|
|
13
|
+
/* ------------------------------------------------------------------ */
|
|
14
|
+
/* Constants */
|
|
15
|
+
/* ------------------------------------------------------------------ */
|
|
16
|
+
/**
|
|
17
|
+
* Heading patterns that indicate low-value boilerplate sections.
|
|
18
|
+
* When a heading matches, its entire section is removed.
|
|
19
|
+
*/
|
|
20
|
+
const BOILERPLATE_HEADING_PATTERNS = [
|
|
21
|
+
/^#{1,3}\s*(cookie(s| notice| policy| banner| consent)?|privacy( policy)?|terms( of (use|service))?|disclaimer|copyright)/i,
|
|
22
|
+
/^#{1,3}\s*(about us|contact( us)?|subscribe|newsletter|follow us|social media)/i,
|
|
23
|
+
/^#{1,3}\s*(related posts?|you may also|more from|popular posts?|trending|recent posts?)/i,
|
|
24
|
+
/^#{1,3}\s*(comments?|leave a (comment|reply)|tags?|categories?|share this)/i,
|
|
25
|
+
/^#{1,3}\s*(table of contents?|toc|index)/i,
|
|
26
|
+
/^#{1,3}\s*(advertisement|sponsored|promoted|ad(s| section)?)/i,
|
|
27
|
+
/^#{1,3}\s*(navigation|menu|sidebar|footer|header)/i,
|
|
28
|
+
/^#{1,3}\s*(sign[\s-]*up|log[\s-]*in|register|create( an)? account|get started)/i,
|
|
29
|
+
];
|
|
30
|
+
/** Maximum data rows to keep when compressing a markdown table */
|
|
31
|
+
const MAX_TABLE_ROWS = 3;
|
|
32
|
+
/** Tokens per listing item used for budget estimation in extract-all mode */
|
|
33
|
+
export const TOKENS_PER_LISTING_ITEM = 50;
|
|
34
|
+
/* ------------------------------------------------------------------ */
|
|
35
|
+
/* Public API */
|
|
36
|
+
/* ------------------------------------------------------------------ */
|
|
37
|
+
/**
|
|
38
|
+
* Distill content to fit within a token budget using smart compression.
|
|
39
|
+
*
|
|
40
|
+
* Strategy (applied progressively until within budget):
|
|
41
|
+
* 1. Remove image markdown —  → keep meaningful alt text
|
|
42
|
+
* 2. Remove boilerplate sections (cookie banners, nav headings, etc.)
|
|
43
|
+
* 3. Compress tables to MAX_TABLE_ROWS data rows
|
|
44
|
+
* 4. Collapse redundant whitespace
|
|
45
|
+
* 5. Remove low information-density paragraphs
|
|
46
|
+
* 6. Hard-truncate with notice as last resort
|
|
47
|
+
*
|
|
48
|
+
* @param content The content string to distill
|
|
49
|
+
* @param budget Maximum token budget (rough: 1 token ≈ 4 chars)
|
|
50
|
+
* @param format Content format: 'markdown' | 'text' | 'json'
|
|
51
|
+
* @returns Distilled content within the budget
|
|
52
|
+
*/
|
|
53
|
+
export function distillToBudget(content, budget, format) {
|
|
54
|
+
if (!content || budget <= 0)
|
|
55
|
+
return content;
|
|
56
|
+
if (estimateTokens(content) <= budget)
|
|
57
|
+
return content;
|
|
58
|
+
if (format === 'json') {
|
|
59
|
+
return distillJson(content, budget);
|
|
60
|
+
}
|
|
61
|
+
return distillMarkdown(content, budget);
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Calculate how many listing items fit within a token budget.
|
|
65
|
+
*
|
|
66
|
+
* @param totalItems Total available items
|
|
67
|
+
* @param budget Token budget
|
|
68
|
+
* @returns { maxItems, truncated, totalAvailable }
|
|
69
|
+
*/
|
|
70
|
+
export function budgetListings(totalItems, budget) {
|
|
71
|
+
const maxItems = Math.max(1, Math.floor(budget / TOKENS_PER_LISTING_ITEM));
|
|
72
|
+
const truncated = maxItems < totalItems;
|
|
73
|
+
return {
|
|
74
|
+
maxItems: truncated ? maxItems : totalItems,
|
|
75
|
+
truncated,
|
|
76
|
+
totalAvailable: totalItems,
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
/* ------------------------------------------------------------------ */
|
|
80
|
+
/* Markdown / text distillation */
|
|
81
|
+
/* ------------------------------------------------------------------ */
|
|
82
|
+
function distillMarkdown(content, budget) {
|
|
83
|
+
let result = content;
|
|
84
|
+
// Step 1: Remove decorative images (minimal info loss)
|
|
85
|
+
if (estimateTokens(result) > budget) {
|
|
86
|
+
result = removeImages(result);
|
|
87
|
+
}
|
|
88
|
+
// Step 2: Remove boilerplate sections
|
|
89
|
+
if (estimateTokens(result) > budget) {
|
|
90
|
+
result = removeBoilerplateSections(result);
|
|
91
|
+
}
|
|
92
|
+
// Step 3: Compress wide tables
|
|
93
|
+
if (estimateTokens(result) > budget) {
|
|
94
|
+
result = compressTables(result);
|
|
95
|
+
}
|
|
96
|
+
// Step 4: Collapse redundant whitespace
|
|
97
|
+
if (estimateTokens(result) > budget) {
|
|
98
|
+
result = compressWhitespace(result);
|
|
99
|
+
}
|
|
100
|
+
// Step 5: Remove low-density paragraphs
|
|
101
|
+
if (estimateTokens(result) > budget) {
|
|
102
|
+
result = removeWeakParagraphs(result, budget);
|
|
103
|
+
}
|
|
104
|
+
// Step 6: Hard-truncate with notice as last resort
|
|
105
|
+
if (estimateTokens(result) > budget) {
|
|
106
|
+
result = hardTruncate(result, budget);
|
|
107
|
+
}
|
|
108
|
+
return result.trim();
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Remove image markdown — replace informative alt text, drop decorative images.
|
|
112
|
+
*/
|
|
113
|
+
function removeImages(content) {
|
|
114
|
+
return content
|
|
115
|
+
.replace(/!\[([^\]]*)\]\([^)]+\)/g, (_, alt) => {
|
|
116
|
+
const a = alt.trim();
|
|
117
|
+
// Keep short, descriptive alt text as a text label
|
|
118
|
+
return a.length > 0 && a.length < 60 ? `[Image: ${a}]` : '';
|
|
119
|
+
})
|
|
120
|
+
// Clean up empty image labels that remain
|
|
121
|
+
.replace(/\[Image: \]\s*/g, '');
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Remove boilerplate sections by matching heading patterns.
|
|
125
|
+
*
|
|
126
|
+
* When a boilerplate heading is found, everything up to (but not including)
|
|
127
|
+
* the next heading of equal or higher importance is removed.
|
|
128
|
+
*/
|
|
129
|
+
function removeBoilerplateSections(content) {
|
|
130
|
+
const lines = content.split('\n');
|
|
131
|
+
const result = [];
|
|
132
|
+
let skipping = false;
|
|
133
|
+
let skipDepth = 0;
|
|
134
|
+
for (const line of lines) {
|
|
135
|
+
const headingMatch = line.match(/^(#{1,6})\s/);
|
|
136
|
+
if (headingMatch) {
|
|
137
|
+
const depth = headingMatch[1].length;
|
|
138
|
+
// Stop skipping when we encounter a heading of equal or higher priority
|
|
139
|
+
if (skipping && depth <= skipDepth) {
|
|
140
|
+
skipping = false;
|
|
141
|
+
}
|
|
142
|
+
// Check if this heading starts a boilerplate section
|
|
143
|
+
if (!skipping && BOILERPLATE_HEADING_PATTERNS.some(p => p.test(line))) {
|
|
144
|
+
skipping = true;
|
|
145
|
+
skipDepth = depth;
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
if (!skipping) {
|
|
150
|
+
result.push(line);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return result.join('\n');
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Compress markdown tables to MAX_TABLE_ROWS data rows + header + separator.
|
|
157
|
+
*/
|
|
158
|
+
function compressTables(content) {
|
|
159
|
+
const lines = content.split('\n');
|
|
160
|
+
const result = [];
|
|
161
|
+
let inTable = false;
|
|
162
|
+
let headerDone = false;
|
|
163
|
+
let separatorDone = false;
|
|
164
|
+
let dataRows = 0;
|
|
165
|
+
let truncatedNote = false;
|
|
166
|
+
for (const line of lines) {
|
|
167
|
+
const trimmed = line.trim();
|
|
168
|
+
const isTableRow = trimmed.startsWith('|') && trimmed.endsWith('|');
|
|
169
|
+
const isSeparator = isTableRow && /^\|[\s|:-]+\|$/.test(trimmed);
|
|
170
|
+
if (isTableRow) {
|
|
171
|
+
if (!inTable) {
|
|
172
|
+
// New table begins
|
|
173
|
+
inTable = true;
|
|
174
|
+
headerDone = false;
|
|
175
|
+
separatorDone = false;
|
|
176
|
+
dataRows = 0;
|
|
177
|
+
truncatedNote = false;
|
|
178
|
+
}
|
|
179
|
+
if (!headerDone) {
|
|
180
|
+
result.push(line);
|
|
181
|
+
headerDone = true;
|
|
182
|
+
}
|
|
183
|
+
else if (isSeparator && !separatorDone) {
|
|
184
|
+
result.push(line);
|
|
185
|
+
separatorDone = true;
|
|
186
|
+
}
|
|
187
|
+
else if (!isSeparator) {
|
|
188
|
+
if (dataRows < MAX_TABLE_ROWS) {
|
|
189
|
+
result.push(line);
|
|
190
|
+
dataRows++;
|
|
191
|
+
}
|
|
192
|
+
else if (!truncatedNote) {
|
|
193
|
+
result.push(`| ... | *(${MAX_TABLE_ROWS}+ rows — additional rows omitted)* | ... |`);
|
|
194
|
+
truncatedNote = true;
|
|
195
|
+
}
|
|
196
|
+
// Further rows silently dropped
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
else {
|
|
200
|
+
inTable = false;
|
|
201
|
+
result.push(line);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
return result.join('\n');
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* Collapse runs of 3+ blank lines to a single blank line.
|
|
208
|
+
*/
|
|
209
|
+
function compressWhitespace(content) {
|
|
210
|
+
return content.replace(/\n{3,}/g, '\n\n');
|
|
211
|
+
}
|
|
212
|
+
/**
|
|
213
|
+
* Remove paragraphs scored as low information-density until within budget.
|
|
214
|
+
*
|
|
215
|
+
* Scoring heuristics:
|
|
216
|
+
* - Word count is the base score
|
|
217
|
+
* - Very short paragraphs (< 50 chars) are heavily penalised
|
|
218
|
+
* - Unusual avg word length penalised (nav menus, link lists)
|
|
219
|
+
* - Long bullet lists scored slightly lower
|
|
220
|
+
* - Headings and code blocks are never removed
|
|
221
|
+
*/
|
|
222
|
+
function removeWeakParagraphs(content, budget) {
|
|
223
|
+
const paragraphs = content.split('\n\n');
|
|
224
|
+
const scored = paragraphs.map((para, i) => {
|
|
225
|
+
const trimmed = para.trim();
|
|
226
|
+
const isHeading = /^#{1,6}\s/.test(trimmed);
|
|
227
|
+
const isCodeBlock = trimmed.startsWith('```');
|
|
228
|
+
const isHtmlComment = trimmed.startsWith('<!--');
|
|
229
|
+
// Never remove structural elements
|
|
230
|
+
if (isHeading || isCodeBlock || isHtmlComment) {
|
|
231
|
+
return { para, score: Number.MAX_SAFE_INTEGER, i };
|
|
232
|
+
}
|
|
233
|
+
// Strip markdown formatting for text analysis
|
|
234
|
+
const textOnly = trimmed.replace(/[#*_\[\]\(\)\-`|>~]/g, '');
|
|
235
|
+
const words = textOnly.split(/\s+/).filter(w => w.length > 0);
|
|
236
|
+
let score = words.length;
|
|
237
|
+
// Heavily penalise very short paragraphs (likely nav labels / single words)
|
|
238
|
+
if (textOnly.length < 50)
|
|
239
|
+
score *= 0.15;
|
|
240
|
+
// Penalise unusual avg word lengths (short = icon labels, long = data URIs)
|
|
241
|
+
const avgWordLen = words.length > 0 ? textOnly.length / words.length : 0;
|
|
242
|
+
if (avgWordLen < 3 || avgWordLen > 15)
|
|
243
|
+
score *= 0.4;
|
|
244
|
+
// Slightly penalise long bullet lists (repetitive structure)
|
|
245
|
+
const lines = trimmed.split('\n');
|
|
246
|
+
const bulletLines = lines.filter(l => /^[-*]\s/.test(l.trim()));
|
|
247
|
+
if (bulletLines.length > 3 && bulletLines.length === lines.length) {
|
|
248
|
+
score *= 0.7;
|
|
249
|
+
}
|
|
250
|
+
return { para, score, i };
|
|
251
|
+
});
|
|
252
|
+
// Sort ascending — weakest paragraphs first
|
|
253
|
+
const byScore = [...scored].sort((a, b) => a.score - b.score);
|
|
254
|
+
const removed = new Set();
|
|
255
|
+
let current = content;
|
|
256
|
+
for (const item of byScore) {
|
|
257
|
+
if (estimateTokens(current) <= budget)
|
|
258
|
+
break;
|
|
259
|
+
// Don't remove paragraphs with reasonable content
|
|
260
|
+
if (item.score >= 8)
|
|
261
|
+
break;
|
|
262
|
+
removed.add(item.i);
|
|
263
|
+
current = scored
|
|
264
|
+
.filter(s => !removed.has(s.i))
|
|
265
|
+
.map(s => s.para)
|
|
266
|
+
.join('\n\n');
|
|
267
|
+
}
|
|
268
|
+
return current;
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* Hard-truncate at a clean line boundary, appending a notice.
|
|
272
|
+
* Used only as the last resort after all other compression steps fail.
|
|
273
|
+
*/
|
|
274
|
+
function hardTruncate(content, budget) {
|
|
275
|
+
// Leave ~15 tokens for the truncation notice
|
|
276
|
+
const maxChars = Math.max((budget - 15) * 4, 0);
|
|
277
|
+
if (content.length <= maxChars)
|
|
278
|
+
return content;
|
|
279
|
+
// Find the last newline before the character limit
|
|
280
|
+
let cut = maxChars;
|
|
281
|
+
while (cut > 0 && content[cut] !== '\n')
|
|
282
|
+
cut--;
|
|
283
|
+
if (cut === 0)
|
|
284
|
+
cut = maxChars; // No newline found — hard cut
|
|
285
|
+
return content.slice(0, cut).trimEnd() + '\n\n[Content distilled to fit budget]';
|
|
286
|
+
}
|
|
287
|
+
/* ------------------------------------------------------------------ */
|
|
288
|
+
/* JSON distillation */
|
|
289
|
+
/* ------------------------------------------------------------------ */
|
|
290
|
+
/**
|
|
291
|
+
* Distill JSON content:
|
|
292
|
+
* - Arrays: binary-search for the maximum number of items that fit
|
|
293
|
+
* - Objects: fall back to text truncation
|
|
294
|
+
*/
|
|
295
|
+
function distillJson(content, budget) {
|
|
296
|
+
try {
|
|
297
|
+
const parsed = JSON.parse(content);
|
|
298
|
+
if (Array.isArray(parsed)) {
|
|
299
|
+
// Binary search for max items that fit within budget
|
|
300
|
+
let lo = 0;
|
|
301
|
+
let hi = parsed.length;
|
|
302
|
+
while (lo < hi) {
|
|
303
|
+
const mid = Math.ceil((lo + hi) / 2);
|
|
304
|
+
const slice = parsed.slice(0, mid);
|
|
305
|
+
if (estimateTokens(JSON.stringify(slice, null, 2)) <= budget) {
|
|
306
|
+
lo = mid;
|
|
307
|
+
}
|
|
308
|
+
else {
|
|
309
|
+
hi = mid - 1;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
return JSON.stringify(parsed.slice(0, lo), null, 2);
|
|
313
|
+
}
|
|
314
|
+
// Non-array JSON — fall back to text truncation
|
|
315
|
+
const str = JSON.stringify(parsed, null, 2);
|
|
316
|
+
if (estimateTokens(str) <= budget)
|
|
317
|
+
return str;
|
|
318
|
+
return hardTruncate(str, budget);
|
|
319
|
+
}
|
|
320
|
+
catch {
|
|
321
|
+
// Invalid JSON — treat as plain text
|
|
322
|
+
return hardTruncate(content, budget);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* business-intel.ts — Extract structured business intelligence from a URL.
|
|
3
|
+
*
|
|
4
|
+
* Uses peel() to fetch the website, then extracts:
|
|
5
|
+
* - Name, description, industry from schema.org + OG tags
|
|
6
|
+
* - Products and pricing from /pricing and /plans pages
|
|
7
|
+
* - Tech stack from headers and script patterns
|
|
8
|
+
* - Social media links
|
|
9
|
+
* - Review aggregates
|
|
10
|
+
*/
|
|
11
|
+
export interface BusinessIntel {
|
|
12
|
+
name?: string;
|
|
13
|
+
description?: string;
|
|
14
|
+
industry?: string;
|
|
15
|
+
products?: string[];
|
|
16
|
+
pricing?: {
|
|
17
|
+
plan: string;
|
|
18
|
+
price: string;
|
|
19
|
+
}[];
|
|
20
|
+
reviews?: {
|
|
21
|
+
source: string;
|
|
22
|
+
rating: number;
|
|
23
|
+
count: number;
|
|
24
|
+
}[];
|
|
25
|
+
socialMedia?: {
|
|
26
|
+
platform: string;
|
|
27
|
+
url: string;
|
|
28
|
+
}[];
|
|
29
|
+
techStack?: string[];
|
|
30
|
+
employees?: string;
|
|
31
|
+
founded?: string;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Extract structured business intelligence from a website URL.
|
|
35
|
+
*
|
|
36
|
+
* Fetches the homepage and optionally the /pricing page, then extracts
|
|
37
|
+
* structured data including tech stack, social media, pricing, and more.
|
|
38
|
+
*
|
|
39
|
+
* @example
|
|
40
|
+
* ```typescript
|
|
41
|
+
* const intel = await getBusinessIntel('https://stripe.com');
|
|
42
|
+
* console.log(intel.name); // "Stripe"
|
|
43
|
+
* console.log(intel.techStack); // ["React", "Cloudflare", ...]
|
|
44
|
+
* console.log(intel.pricing); // [{plan: "Starter", price: "$0"}, ...]
|
|
45
|
+
* ```
|
|
46
|
+
*/
|
|
47
|
+
export declare function getBusinessIntel(url: string): Promise<BusinessIntel>;
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* business-intel.ts — Extract structured business intelligence from a URL.
|
|
3
|
+
*
|
|
4
|
+
* Uses peel() to fetch the website, then extracts:
|
|
5
|
+
* - Name, description, industry from schema.org + OG tags
|
|
6
|
+
* - Products and pricing from /pricing and /plans pages
|
|
7
|
+
* - Tech stack from headers and script patterns
|
|
8
|
+
* - Social media links
|
|
9
|
+
* - Review aggregates
|
|
10
|
+
*/
|
|
11
|
+
import { peel } from '../index.js';
|
|
12
|
+
// ─── Tech stack detectors ─────────────────────────────────────────────────
|
|
13
|
+
const TECH_PATTERNS = [
|
|
14
|
+
// Frontend frameworks
|
|
15
|
+
{ name: 'React', pattern: /react(?:\.min)?\.js|__reactFiber|react-dom/i, type: 'script' },
|
|
16
|
+
{ name: 'Vue.js', pattern: /vue(?:\.min)?\.js|Vue\.component|__vue_/i, type: 'script' },
|
|
17
|
+
{ name: 'Angular', pattern: /angular(?:\.min)?\.js|ng-version|ng-app/i, type: 'script' },
|
|
18
|
+
{ name: 'Next.js', pattern: /__NEXT_DATA__|next\/dist\/|_next\/static/i, type: 'script' },
|
|
19
|
+
{ name: 'Nuxt.js', pattern: /__NUXT__|_nuxt\/|nuxtjs\.org/i, type: 'script' },
|
|
20
|
+
{ name: 'Svelte', pattern: /svelte\/internal|SvelteComponent/i, type: 'script' },
|
|
21
|
+
// E-commerce & CMS
|
|
22
|
+
{ name: 'Shopify', pattern: /shopify\.com|Shopify\.theme|cdn\.shopify/i, type: 'script' },
|
|
23
|
+
{ name: 'WordPress', pattern: /wp-content\/|wp-includes\/|WordPress/i, type: 'script' },
|
|
24
|
+
{ name: 'Webflow', pattern: /webflow\.com|Webflow\.require/i, type: 'script' },
|
|
25
|
+
{ name: 'Squarespace', pattern: /squarespace\.com|SQUARESPACE_ROLLUPS/i, type: 'script' },
|
|
26
|
+
{ name: 'Wix', pattern: /wix\.com|wixstatic\.com/i, type: 'script' },
|
|
27
|
+
// Analytics & marketing
|
|
28
|
+
{ name: 'Google Analytics', pattern: /google-analytics\.com|gtag\(|ga\('send/i, type: 'script' },
|
|
29
|
+
{ name: 'Segment', pattern: /segment\.com|analytics\.identify/i, type: 'script' },
|
|
30
|
+
{ name: 'Mixpanel', pattern: /mixpanel\.com|mixpanel\.track/i, type: 'script' },
|
|
31
|
+
{ name: 'Intercom', pattern: /intercom\.io|window\.Intercom/i, type: 'script' },
|
|
32
|
+
{ name: 'Hubspot', pattern: /hubspot\.com|hs-scripts\.com/i, type: 'script' },
|
|
33
|
+
{ name: 'Stripe', pattern: /js\.stripe\.com|Stripe\(/i, type: 'script' },
|
|
34
|
+
// Server / infrastructure (detected via headers)
|
|
35
|
+
{ name: 'Vercel', pattern: /vercel/i, type: 'header' },
|
|
36
|
+
{ name: 'Netlify', pattern: /netlify/i, type: 'header' },
|
|
37
|
+
{ name: 'Cloudflare', pattern: /cloudflare/i, type: 'header' },
|
|
38
|
+
{ name: 'AWS', pattern: /amazonaws\.com|x-amz-/i, type: 'header' },
|
|
39
|
+
{ name: 'Nginx', pattern: /nginx/i, type: 'header' },
|
|
40
|
+
{ name: 'Apache', pattern: /apache/i, type: 'header' },
|
|
41
|
+
];
|
|
42
|
+
const SOCIAL_PATTERNS = [
|
|
43
|
+
{ platform: 'Twitter/X', pattern: /(?:twitter\.com|x\.com)\/([^/"?\s]+)/i },
|
|
44
|
+
{ platform: 'LinkedIn', pattern: /linkedin\.com\/(?:company|in)\/([^/"?\s]+)/i },
|
|
45
|
+
{ platform: 'Facebook', pattern: /facebook\.com\/([^/"?\s]+)/i },
|
|
46
|
+
{ platform: 'Instagram', pattern: /instagram\.com\/([^/"?\s]+)/i },
|
|
47
|
+
{ platform: 'YouTube', pattern: /youtube\.com\/(?:channel|c|@)\/([^/"?\s]+)/i },
|
|
48
|
+
{ platform: 'GitHub', pattern: /github\.com\/([^/"?\s]+)/i },
|
|
49
|
+
{ platform: 'Discord', pattern: /discord\.(?:gg|com\/invite)\/([^/"?\s]+)/i },
|
|
50
|
+
{ platform: 'TikTok', pattern: /tiktok\.com\/@([^/"?\s]+)/i },
|
|
51
|
+
];
|
|
52
|
+
const INDUSTRY_KEYWORDS = {
|
|
53
|
+
'SaaS / Software': ['software', 'saas', 'platform', 'api', 'developer', 'cloud', 'app'],
|
|
54
|
+
'E-commerce': ['shop', 'store', 'buy', 'cart', 'checkout', 'product', 'shipping'],
|
|
55
|
+
'Finance / Fintech': ['payment', 'invoice', 'banking', 'crypto', 'invest', 'finance', 'loan'],
|
|
56
|
+
'Healthcare': ['health', 'medical', 'patient', 'clinic', 'doctor', 'hospital', 'pharma'],
|
|
57
|
+
'Education': ['course', 'learn', 'training', 'education', 'school', 'university', 'tutor'],
|
|
58
|
+
'Marketing': ['marketing', 'seo', 'email campaign', 'crm', 'lead', 'analytics'],
|
|
59
|
+
'AI / Machine Learning': ['ai', 'machine learning', 'nlp', 'model', 'inference', 'llm'],
|
|
60
|
+
'Food & Restaurant': ['restaurant', 'food', 'menu', 'delivery', 'catering', 'dining'],
|
|
61
|
+
'Travel': ['travel', 'hotel', 'flight', 'booking', 'tourism', 'vacation'],
|
|
62
|
+
'Real Estate': ['real estate', 'property', 'mortgage', 'rent', 'apartment', 'lease'],
|
|
63
|
+
'Media / Content': ['news', 'blog', 'podcast', 'video', 'streaming', 'media', 'content'],
|
|
64
|
+
};
|
|
65
|
+
function extractPricing(content) {
|
|
66
|
+
const plans = [];
|
|
67
|
+
const seen = new Set();
|
|
68
|
+
// Look for plan name + price in proximity
|
|
69
|
+
const lines = content.split('\n');
|
|
70
|
+
for (let i = 0; i < lines.length; i++) {
|
|
71
|
+
const line = lines[i];
|
|
72
|
+
const planMatch = line.match(/\b(free|starter|basic|pro|professional|business|enterprise|premium|growth|scale|team|individual|personal)\b/i);
|
|
73
|
+
if (!planMatch)
|
|
74
|
+
continue;
|
|
75
|
+
// Search nearby lines for a price
|
|
76
|
+
const context = lines.slice(Math.max(0, i - 1), Math.min(lines.length, i + 5)).join(' ');
|
|
77
|
+
const priceMatch = context.match(/\$([\d,]+(?:\.\d{2})?)/);
|
|
78
|
+
if (priceMatch) {
|
|
79
|
+
const key = `${planMatch[1].toLowerCase()}:${priceMatch[0]}`;
|
|
80
|
+
if (!seen.has(key)) {
|
|
81
|
+
seen.add(key);
|
|
82
|
+
plans.push({ plan: planMatch[1], price: priceMatch[0] });
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
else if (/\bfree\b/i.test(planMatch[1])) {
|
|
86
|
+
const key = `${planMatch[1].toLowerCase()}:$0`;
|
|
87
|
+
if (!seen.has(key)) {
|
|
88
|
+
seen.add(key);
|
|
89
|
+
plans.push({ plan: planMatch[1], price: '$0' });
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return plans.slice(0, 8);
|
|
94
|
+
}
|
|
95
|
+
function detectTechStack(content, headers) {
|
|
96
|
+
const detected = new Set();
|
|
97
|
+
// Check headers
|
|
98
|
+
const headerString = Object.entries(headers)
|
|
99
|
+
.map(([k, v]) => `${k}: ${v}`)
|
|
100
|
+
.join('\n')
|
|
101
|
+
.toLowerCase();
|
|
102
|
+
// Check content (HTML/scripts)
|
|
103
|
+
for (const tech of TECH_PATTERNS) {
|
|
104
|
+
if (tech.type === 'header') {
|
|
105
|
+
if (tech.pattern.test(headerString))
|
|
106
|
+
detected.add(tech.name);
|
|
107
|
+
}
|
|
108
|
+
else {
|
|
109
|
+
if (tech.pattern.test(content))
|
|
110
|
+
detected.add(tech.name);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
return [...detected].sort();
|
|
114
|
+
}
|
|
115
|
+
function extractSocialMedia(content) {
|
|
116
|
+
const found = [];
|
|
117
|
+
const seen = new Set();
|
|
118
|
+
for (const { platform, pattern } of SOCIAL_PATTERNS) {
|
|
119
|
+
const matches = content.matchAll(new RegExp(pattern.source, 'gi'));
|
|
120
|
+
for (const match of matches) {
|
|
121
|
+
const fullMatch = match[0];
|
|
122
|
+
if (!fullMatch.includes('share') && !fullMatch.includes('intent') && !seen.has(fullMatch)) {
|
|
123
|
+
seen.add(fullMatch);
|
|
124
|
+
// Build full URL
|
|
125
|
+
let url = fullMatch;
|
|
126
|
+
if (!url.startsWith('http'))
|
|
127
|
+
url = 'https://' + url;
|
|
128
|
+
found.push({ platform, url });
|
|
129
|
+
break; // one per platform
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
return found;
|
|
134
|
+
}
|
|
135
|
+
function detectIndustry(text) {
|
|
136
|
+
const lower = text.toLowerCase();
|
|
137
|
+
const scores = {};
|
|
138
|
+
for (const [industry, keywords] of Object.entries(INDUSTRY_KEYWORDS)) {
|
|
139
|
+
scores[industry] = keywords.filter(kw => lower.includes(kw)).length;
|
|
140
|
+
}
|
|
141
|
+
const best = Object.entries(scores)
|
|
142
|
+
.filter(([, score]) => score > 0)
|
|
143
|
+
.sort(([, a], [, b]) => b - a)[0];
|
|
144
|
+
return best ? best[0] : undefined;
|
|
145
|
+
}
|
|
146
|
+
function extractSchemaOrgData(content) {
|
|
147
|
+
const result = {};
|
|
148
|
+
// JSON-LD: look for Organization or LocalBusiness schema
|
|
149
|
+
const jsonLdMatch = content.match(/<script[^>]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/gi);
|
|
150
|
+
if (jsonLdMatch) {
|
|
151
|
+
for (const block of jsonLdMatch) {
|
|
152
|
+
try {
|
|
153
|
+
const json = JSON.parse(block.replace(/<script[^>]*>|<\/script>/gi, ''));
|
|
154
|
+
const items = Array.isArray(json) ? json : [json];
|
|
155
|
+
for (const item of items) {
|
|
156
|
+
if (!result.name && item.name)
|
|
157
|
+
result.name = item.name;
|
|
158
|
+
if (!result.description && item.description)
|
|
159
|
+
result.description = item.description;
|
|
160
|
+
if (!result.founded && item.foundingDate)
|
|
161
|
+
result.founded = String(item.foundingDate);
|
|
162
|
+
if (!result.employees && item.numberOfEmployees) {
|
|
163
|
+
const emp = item.numberOfEmployees;
|
|
164
|
+
result.employees = typeof emp === 'object' ? `${emp.minValue ?? ''}–${emp.maxValue ?? ''}` : String(emp);
|
|
165
|
+
}
|
|
166
|
+
// Review aggregate
|
|
167
|
+
if (item.aggregateRating) {
|
|
168
|
+
result.reviews = result.reviews || [];
|
|
169
|
+
result.reviews.push({
|
|
170
|
+
source: 'Schema.org',
|
|
171
|
+
rating: parseFloat(item.aggregateRating.ratingValue) || 0,
|
|
172
|
+
count: parseInt(item.aggregateRating.reviewCount) || 0,
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
catch {
|
|
178
|
+
// Invalid JSON-LD — skip
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
// Open Graph fallback
|
|
183
|
+
if (!result.name) {
|
|
184
|
+
const ogTitle = content.match(/property="og:title"\s+content="([^"]+)"/);
|
|
185
|
+
if (ogTitle)
|
|
186
|
+
result.name = ogTitle[1];
|
|
187
|
+
}
|
|
188
|
+
if (!result.description) {
|
|
189
|
+
const ogDesc = content.match(/(?:property="og:description"|name="description")\s+content="([^"]+)"/);
|
|
190
|
+
if (ogDesc)
|
|
191
|
+
result.description = ogDesc[1];
|
|
192
|
+
}
|
|
193
|
+
return result;
|
|
194
|
+
}
|
|
195
|
+
// ─── Main export ──────────────────────────────────────────────────────────
|
|
196
|
+
/**
|
|
197
|
+
* Extract structured business intelligence from a website URL.
|
|
198
|
+
*
|
|
199
|
+
* Fetches the homepage and optionally the /pricing page, then extracts
|
|
200
|
+
* structured data including tech stack, social media, pricing, and more.
|
|
201
|
+
*
|
|
202
|
+
* @example
|
|
203
|
+
* ```typescript
|
|
204
|
+
* const intel = await getBusinessIntel('https://stripe.com');
|
|
205
|
+
* console.log(intel.name); // "Stripe"
|
|
206
|
+
* console.log(intel.techStack); // ["React", "Cloudflare", ...]
|
|
207
|
+
* console.log(intel.pricing); // [{plan: "Starter", price: "$0"}, ...]
|
|
208
|
+
* ```
|
|
209
|
+
*/
|
|
210
|
+
export async function getBusinessIntel(url) {
|
|
211
|
+
// Normalize URL
|
|
212
|
+
if (!url.startsWith('http'))
|
|
213
|
+
url = 'https://' + url;
|
|
214
|
+
const parsed = new URL(url);
|
|
215
|
+
const origin = parsed.origin;
|
|
216
|
+
// 1. Fetch homepage
|
|
217
|
+
const homeResult = await peel(url, {
|
|
218
|
+
format: 'html',
|
|
219
|
+
timeout: 15000,
|
|
220
|
+
});
|
|
221
|
+
const homeContent = homeResult.content || '';
|
|
222
|
+
const homeHtml = homeResult.rawHtml || homeContent;
|
|
223
|
+
// 2. Extract schema.org / OG data
|
|
224
|
+
const schemaData = extractSchemaOrgData(homeHtml);
|
|
225
|
+
// 3. Detect tech stack from content + headers
|
|
226
|
+
const responseHeaders = homeResult.headers || {};
|
|
227
|
+
const techStack = detectTechStack(homeHtml + homeContent, responseHeaders);
|
|
228
|
+
// 4. Extract social media links
|
|
229
|
+
const socialMedia = extractSocialMedia(homeHtml + homeContent);
|
|
230
|
+
// 5. Detect industry from description + content
|
|
231
|
+
const textForIndustry = [schemaData.description, homeContent].filter(Boolean).join(' ');
|
|
232
|
+
const industry = detectIndustry(textForIndustry);
|
|
233
|
+
// 6. Try to fetch pricing page (best-effort)
|
|
234
|
+
let pricing = [];
|
|
235
|
+
const pricingPaths = ['/pricing', '/plans', '/pricing-plans', '/subscribe'];
|
|
236
|
+
for (const path of pricingPaths) {
|
|
237
|
+
try {
|
|
238
|
+
const pricingUrl = origin + path;
|
|
239
|
+
const pricingResult = await peel(pricingUrl, { timeout: 8000 });
|
|
240
|
+
if (pricingResult.content && pricingResult.content.length > 200) {
|
|
241
|
+
pricing = extractPricing(pricingResult.content);
|
|
242
|
+
if (pricing.length > 0)
|
|
243
|
+
break;
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
catch {
|
|
247
|
+
// Pricing page not found — continue
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
// If no pricing found from pricing page, try extracting from homepage
|
|
251
|
+
if (pricing.length === 0) {
|
|
252
|
+
pricing = extractPricing(homeContent);
|
|
253
|
+
}
|
|
254
|
+
// 7. Extract products list from homepage (look for feature/product lists)
|
|
255
|
+
const products = [];
|
|
256
|
+
const productSection = homeContent.match(/(?:products?|features?|solutions?)[^\n]*\n((?:[^\n]+\n){1,10})/i);
|
|
257
|
+
if (productSection) {
|
|
258
|
+
const lines = productSection[1]
|
|
259
|
+
.split('\n')
|
|
260
|
+
.map(l => l.trim())
|
|
261
|
+
.filter(l => l.length > 3 && l.length < 80 && !l.startsWith('#') && !l.startsWith('http'));
|
|
262
|
+
products.push(...lines.slice(0, 8));
|
|
263
|
+
}
|
|
264
|
+
// Build final result
|
|
265
|
+
const intel = {
|
|
266
|
+
...schemaData,
|
|
267
|
+
};
|
|
268
|
+
if (industry)
|
|
269
|
+
intel.industry = industry;
|
|
270
|
+
if (products.length > 0)
|
|
271
|
+
intel.products = products;
|
|
272
|
+
if (pricing.length > 0)
|
|
273
|
+
intel.pricing = pricing;
|
|
274
|
+
if (socialMedia.length > 0)
|
|
275
|
+
intel.socialMedia = socialMedia;
|
|
276
|
+
if (techStack.length > 0)
|
|
277
|
+
intel.techStack = techStack;
|
|
278
|
+
return intel;
|
|
279
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-memory LRU response cache.
|
|
3
|
+
*/
|
|
4
|
+
export interface CacheResult<T = unknown> {
|
|
5
|
+
value: T;
|
|
6
|
+
stale: boolean;
|
|
7
|
+
}
|
|
8
|
+
export declare function getCached<T = unknown>(url: string): T | null;
|
|
9
|
+
export declare function getCachedWithSWR<T = unknown>(url: string): CacheResult<T> | null;
|
|
10
|
+
export declare function markRevalidating(url: string): boolean;
|
|
11
|
+
export declare function setCached<T = unknown>(url: string, result: T): void;
|
|
12
|
+
export declare function clearCache(): void;
|
|
13
|
+
export declare function setCacheTTL(ms: number): void;
|