@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart content chunking for LLM processing.
|
|
3
|
+
*
|
|
4
|
+
* Splits content into manageable pieces with configurable overlap and strategy.
|
|
5
|
+
* Zero external dependencies; target <1ms for typical page content.
|
|
6
|
+
*/
|
|
7
|
+
/** Estimate token count using chars/4 heuristic. Accurate within ±10%. */
|
|
8
|
+
export function estimateTokens(text) {
|
|
9
|
+
return Math.ceil(text.length / 4);
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Split content into chunks suitable for LLM processing.
|
|
13
|
+
*/
|
|
14
|
+
export function chunkContent(content, options) {
|
|
15
|
+
const chunkSize = options?.chunkSize ?? 4000;
|
|
16
|
+
const overlap = options?.overlap ?? 200;
|
|
17
|
+
const strategy = options?.strategy ?? 'semantic';
|
|
18
|
+
const originalTokens = estimateTokens(content);
|
|
19
|
+
if (content.length === 0) {
|
|
20
|
+
return { chunks: [], totalChunks: 0, totalTokens: 0, originalTokens: 0 };
|
|
21
|
+
}
|
|
22
|
+
let rawChunks;
|
|
23
|
+
switch (strategy) {
|
|
24
|
+
case 'fixed':
|
|
25
|
+
rawChunks = chunkFixed(content, chunkSize, overlap);
|
|
26
|
+
break;
|
|
27
|
+
case 'paragraph':
|
|
28
|
+
rawChunks = chunkParagraph(content, chunkSize);
|
|
29
|
+
break;
|
|
30
|
+
case 'semantic':
|
|
31
|
+
default:
|
|
32
|
+
rawChunks = chunkSemantic(content, chunkSize, overlap);
|
|
33
|
+
break;
|
|
34
|
+
}
|
|
35
|
+
const chunks = rawChunks.map((raw, i) => ({
|
|
36
|
+
index: i,
|
|
37
|
+
content: raw.content,
|
|
38
|
+
tokens: estimateTokens(raw.content),
|
|
39
|
+
startOffset: raw.startOffset,
|
|
40
|
+
isLast: i === rawChunks.length - 1,
|
|
41
|
+
}));
|
|
42
|
+
const totalTokens = chunks.reduce((sum, c) => sum + c.tokens, 0);
|
|
43
|
+
return {
|
|
44
|
+
chunks,
|
|
45
|
+
totalChunks: chunks.length,
|
|
46
|
+
totalTokens,
|
|
47
|
+
originalTokens,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
// Fixed strategy
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
function chunkFixed(content, chunkSize, overlap) {
|
|
54
|
+
const chunkChars = chunkSize * 4;
|
|
55
|
+
const overlapChars = overlap * 4;
|
|
56
|
+
const result = [];
|
|
57
|
+
let start = 0;
|
|
58
|
+
while (start < content.length) {
|
|
59
|
+
const end = Math.min(start + chunkChars, content.length);
|
|
60
|
+
result.push({ content: content.slice(start, end), startOffset: start });
|
|
61
|
+
if (end >= content.length)
|
|
62
|
+
break;
|
|
63
|
+
start = end - overlapChars;
|
|
64
|
+
}
|
|
65
|
+
return result;
|
|
66
|
+
}
|
|
67
|
+
// ---------------------------------------------------------------------------
|
|
68
|
+
// Semantic strategy
|
|
69
|
+
// ---------------------------------------------------------------------------
|
|
70
|
+
function findNaturalBreak(content, target, tolerance) {
|
|
71
|
+
const min = target - tolerance;
|
|
72
|
+
const max = Math.min(target + tolerance, content.length);
|
|
73
|
+
// 1. Heading break: newline immediately before ##
|
|
74
|
+
const headingRe = /\n(?=#{1,6} )/g;
|
|
75
|
+
let best = null;
|
|
76
|
+
headingRe.lastIndex = min;
|
|
77
|
+
let m = headingRe.exec(content);
|
|
78
|
+
while (m && m.index <= max) {
|
|
79
|
+
const dist = Math.abs(m.index - target);
|
|
80
|
+
if (!best || best.priority < 4 || dist < Math.abs(best.pos - target)) {
|
|
81
|
+
best = { pos: m.index, priority: 4 };
|
|
82
|
+
}
|
|
83
|
+
m = headingRe.exec(content);
|
|
84
|
+
}
|
|
85
|
+
// 2. Paragraph break: \n\n
|
|
86
|
+
const paraRe = /\n\n/g;
|
|
87
|
+
paraRe.lastIndex = min;
|
|
88
|
+
m = paraRe.exec(content);
|
|
89
|
+
while (m && m.index <= max) {
|
|
90
|
+
const dist = Math.abs(m.index - target);
|
|
91
|
+
if (!best || best.priority < 3 || (best.priority === 3 && dist < Math.abs(best.pos - target))) {
|
|
92
|
+
best = { pos: m.index + 2, priority: 3 };
|
|
93
|
+
}
|
|
94
|
+
m = paraRe.exec(content);
|
|
95
|
+
}
|
|
96
|
+
// 3. Sentence end: '. ', '! ', '? ' followed by capital or newline
|
|
97
|
+
const sentRe = /[.!?](?:\s+(?=[A-Z\n])|(?=\n))/g;
|
|
98
|
+
sentRe.lastIndex = min;
|
|
99
|
+
m = sentRe.exec(content);
|
|
100
|
+
while (m && m.index <= max) {
|
|
101
|
+
const pos = m.index + m[0].length;
|
|
102
|
+
const dist = Math.abs(pos - target);
|
|
103
|
+
if (!best || best.priority < 2 || (best.priority === 2 && dist < Math.abs(best.pos - target))) {
|
|
104
|
+
best = { pos, priority: 2 };
|
|
105
|
+
}
|
|
106
|
+
m = sentRe.exec(content);
|
|
107
|
+
}
|
|
108
|
+
// 4. Word boundary (space)
|
|
109
|
+
if (!best || best.priority < 1) {
|
|
110
|
+
const spaceRe = / /g;
|
|
111
|
+
spaceRe.lastIndex = min;
|
|
112
|
+
m = spaceRe.exec(content);
|
|
113
|
+
while (m && m.index <= max) {
|
|
114
|
+
const pos = m.index + 1;
|
|
115
|
+
const dist = Math.abs(pos - target);
|
|
116
|
+
if (!best || (best.priority < 2 && dist < Math.abs(best.pos - target))) {
|
|
117
|
+
best = { pos, priority: 1 };
|
|
118
|
+
}
|
|
119
|
+
m = spaceRe.exec(content);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return best ? best.pos : Math.min(target, content.length);
|
|
123
|
+
}
|
|
124
|
+
function chunkSemantic(content, chunkSize, overlap) {
|
|
125
|
+
const chunkChars = chunkSize * 4;
|
|
126
|
+
const overlapChars = overlap * 4;
|
|
127
|
+
const tolerance = Math.floor(chunkChars * 0.2);
|
|
128
|
+
const result = [];
|
|
129
|
+
let start = 0;
|
|
130
|
+
while (start < content.length) {
|
|
131
|
+
const remaining = content.length - start;
|
|
132
|
+
// If the rest fits, take it all
|
|
133
|
+
if (remaining <= chunkChars + tolerance) {
|
|
134
|
+
result.push({ content: content.slice(start), startOffset: start });
|
|
135
|
+
break;
|
|
136
|
+
}
|
|
137
|
+
const breakPos = findNaturalBreak(content, start + chunkChars, tolerance);
|
|
138
|
+
const end = Math.max(breakPos, start + 1); // always advance
|
|
139
|
+
result.push({ content: content.slice(start, end), startOffset: start });
|
|
140
|
+
// Next chunk starts with overlap from end of this chunk
|
|
141
|
+
const nextStart = Math.max(start + 1, end - overlapChars);
|
|
142
|
+
start = nextStart;
|
|
143
|
+
}
|
|
144
|
+
return result;
|
|
145
|
+
}
|
|
146
|
+
// ---------------------------------------------------------------------------
|
|
147
|
+
// Paragraph strategy
|
|
148
|
+
// ---------------------------------------------------------------------------
|
|
149
|
+
function chunkParagraph(content, chunkSize) {
|
|
150
|
+
const chunkChars = chunkSize * 4;
|
|
151
|
+
const paragraphs = content.split(/\n\n/);
|
|
152
|
+
const result = [];
|
|
153
|
+
let currentParts = [];
|
|
154
|
+
let currentLen = 0;
|
|
155
|
+
let currentOffset = 0;
|
|
156
|
+
let offsetTracker = 0;
|
|
157
|
+
for (let i = 0; i < paragraphs.length; i++) {
|
|
158
|
+
const para = paragraphs[i];
|
|
159
|
+
// +2 for the \n\n separator (except for first paragraph)
|
|
160
|
+
const paraLen = para.length + (currentParts.length > 0 ? 2 : 0);
|
|
161
|
+
if (currentParts.length > 0 && currentLen + paraLen > chunkChars) {
|
|
162
|
+
// Flush current group
|
|
163
|
+
result.push({ content: currentParts.join('\n\n'), startOffset: currentOffset });
|
|
164
|
+
currentOffset = offsetTracker;
|
|
165
|
+
currentParts = [para];
|
|
166
|
+
currentLen = para.length;
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
if (currentParts.length === 0) {
|
|
170
|
+
currentOffset = offsetTracker;
|
|
171
|
+
}
|
|
172
|
+
currentParts.push(para);
|
|
173
|
+
currentLen += paraLen;
|
|
174
|
+
}
|
|
175
|
+
offsetTracker += para.length + 2; // account for \n\n
|
|
176
|
+
}
|
|
177
|
+
if (currentParts.length > 0) {
|
|
178
|
+
result.push({ content: currentParts.join('\n\n'), startOffset: currentOffset });
|
|
179
|
+
}
|
|
180
|
+
return result;
|
|
181
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Circuit breaker for browser/Chromium operations.
|
|
3
|
+
*
|
|
4
|
+
* States:
|
|
5
|
+
* - CLOSED (normal): requests pass through
|
|
6
|
+
* - OPEN (tripped): requests immediately fail, no browser launch attempted
|
|
7
|
+
* - HALF_OPEN (testing): allow 1 request through to test if browser works again
|
|
8
|
+
*
|
|
9
|
+
* Transitions:
|
|
10
|
+
* - CLOSED → OPEN: after `failureThreshold` consecutive failures (default: 3)
|
|
11
|
+
* - OPEN → HALF_OPEN: after `resetTimeoutMs` (default: 60s)
|
|
12
|
+
* - HALF_OPEN → CLOSED: if test request succeeds
|
|
13
|
+
* - HALF_OPEN → OPEN: if test request fails (reset timer)
|
|
14
|
+
*/
|
|
15
|
+
export type CircuitState = 'closed' | 'open' | 'half_open';
|
|
16
|
+
export interface CircuitBreakerOptions {
|
|
17
|
+
failureThreshold?: number;
|
|
18
|
+
resetTimeoutMs?: number;
|
|
19
|
+
name?: string;
|
|
20
|
+
}
|
|
21
|
+
export declare class CircuitBreaker {
|
|
22
|
+
private state;
|
|
23
|
+
private failureCount;
|
|
24
|
+
private lastFailureTime;
|
|
25
|
+
private readonly failureThreshold;
|
|
26
|
+
private readonly resetTimeoutMs;
|
|
27
|
+
private readonly name;
|
|
28
|
+
constructor(options?: CircuitBreakerOptions);
|
|
29
|
+
/** Check if the circuit allows a request through */
|
|
30
|
+
canExecute(): boolean;
|
|
31
|
+
/** Record a successful operation */
|
|
32
|
+
recordSuccess(): void;
|
|
33
|
+
/** Record a failed operation */
|
|
34
|
+
recordFailure(error?: Error): void;
|
|
35
|
+
/** Get current state for health checks */
|
|
36
|
+
getState(): {
|
|
37
|
+
state: CircuitState;
|
|
38
|
+
failureCount: number;
|
|
39
|
+
lastFailureTime: number;
|
|
40
|
+
};
|
|
41
|
+
/** Force reset (e.g., on manual intervention) */
|
|
42
|
+
reset(): void;
|
|
43
|
+
}
|
|
44
|
+
export declare const browserCircuitBreaker: CircuitBreaker;
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Circuit breaker for browser/Chromium operations.
|
|
3
|
+
*
|
|
4
|
+
* States:
|
|
5
|
+
* - CLOSED (normal): requests pass through
|
|
6
|
+
* - OPEN (tripped): requests immediately fail, no browser launch attempted
|
|
7
|
+
* - HALF_OPEN (testing): allow 1 request through to test if browser works again
|
|
8
|
+
*
|
|
9
|
+
* Transitions:
|
|
10
|
+
* - CLOSED → OPEN: after `failureThreshold` consecutive failures (default: 3)
|
|
11
|
+
* - OPEN → HALF_OPEN: after `resetTimeoutMs` (default: 60s)
|
|
12
|
+
* - HALF_OPEN → CLOSED: if test request succeeds
|
|
13
|
+
* - HALF_OPEN → OPEN: if test request fails (reset timer)
|
|
14
|
+
*/
|
|
15
|
+
export class CircuitBreaker {
|
|
16
|
+
state = 'closed';
|
|
17
|
+
failureCount = 0;
|
|
18
|
+
lastFailureTime = 0;
|
|
19
|
+
failureThreshold;
|
|
20
|
+
resetTimeoutMs;
|
|
21
|
+
name;
|
|
22
|
+
constructor(options = {}) {
|
|
23
|
+
this.failureThreshold = options.failureThreshold ?? 3;
|
|
24
|
+
this.resetTimeoutMs = options.resetTimeoutMs ?? 60000;
|
|
25
|
+
this.name = options.name ?? 'browser';
|
|
26
|
+
}
|
|
27
|
+
/** Check if the circuit allows a request through */
|
|
28
|
+
canExecute() {
|
|
29
|
+
if (this.state === 'closed')
|
|
30
|
+
return true;
|
|
31
|
+
if (this.state === 'open') {
|
|
32
|
+
// Check if enough time has passed to try again
|
|
33
|
+
if (Date.now() - this.lastFailureTime >= this.resetTimeoutMs) {
|
|
34
|
+
this.state = 'half_open';
|
|
35
|
+
console.log(`[circuit-breaker:${this.name}] HALF_OPEN — testing browser availability`);
|
|
36
|
+
return true;
|
|
37
|
+
}
|
|
38
|
+
return false;
|
|
39
|
+
}
|
|
40
|
+
// half_open: allow one request
|
|
41
|
+
return true;
|
|
42
|
+
}
|
|
43
|
+
/** Record a successful operation */
|
|
44
|
+
recordSuccess() {
|
|
45
|
+
if (this.state === 'half_open') {
|
|
46
|
+
console.log(`[circuit-breaker:${this.name}] CLOSED — browser recovered`);
|
|
47
|
+
}
|
|
48
|
+
this.failureCount = 0;
|
|
49
|
+
this.state = 'closed';
|
|
50
|
+
}
|
|
51
|
+
/** Record a failed operation */
|
|
52
|
+
recordFailure(error) {
|
|
53
|
+
this.failureCount++;
|
|
54
|
+
this.lastFailureTime = Date.now();
|
|
55
|
+
if (this.state === 'half_open') {
|
|
56
|
+
this.state = 'open';
|
|
57
|
+
console.error(`[circuit-breaker:${this.name}] OPEN — test request failed, waiting ${this.resetTimeoutMs / 1000}s`);
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
if (this.failureCount >= this.failureThreshold) {
|
|
61
|
+
this.state = 'open';
|
|
62
|
+
console.error(`[circuit-breaker:${this.name}] OPEN — ${this.failureCount} consecutive failures (${error?.message ?? 'unknown'}). Falling back to HTTP-only for ${this.resetTimeoutMs / 1000}s`);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
/** Get current state for health checks */
|
|
66
|
+
getState() {
|
|
67
|
+
return {
|
|
68
|
+
state: this.state,
|
|
69
|
+
failureCount: this.failureCount,
|
|
70
|
+
lastFailureTime: this.lastFailureTime,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
/** Force reset (e.g., on manual intervention) */
|
|
74
|
+
reset() {
|
|
75
|
+
this.state = 'closed';
|
|
76
|
+
this.failureCount = 0;
|
|
77
|
+
console.log(`[circuit-breaker:${this.name}] RESET — manually closed`);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
// Singleton browser circuit breaker
|
|
81
|
+
export const browserCircuitBreaker = new CircuitBreaker({
|
|
82
|
+
name: 'browser',
|
|
83
|
+
failureThreshold: 3,
|
|
84
|
+
resetTimeoutMs: 60000, // 1 minute cooldown
|
|
85
|
+
});
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content Density Pruner
|
|
3
|
+
*
|
|
4
|
+
* Two-pass pruning to reduce HTML before markdown conversion:
|
|
5
|
+
*
|
|
6
|
+
* Pass 1 — Semantic removal: strip elements whose tag or class/id clearly
|
|
7
|
+
* mark them as page chrome (nav, footer, sidebar, cookie banners, ads).
|
|
8
|
+
*
|
|
9
|
+
* Pass 2 — Density scoring: score remaining block elements by text density,
|
|
10
|
+
* link density, tag importance, and word count. Remove low-scorers.
|
|
11
|
+
*
|
|
12
|
+
* Inspired by Crawl4AI's PruningContentFilter — targets 40-60% token savings.
|
|
13
|
+
*/
|
|
14
|
+
export interface PruneOptions {
|
|
15
|
+
/** Score threshold (0-1). Blocks below this are removed. Default: 0.3 */
|
|
16
|
+
threshold?: number;
|
|
17
|
+
/** Minimum word count for a block to be considered. Default: 3 */
|
|
18
|
+
minWords?: number;
|
|
19
|
+
/** Whether threshold adapts to content distribution. Default: true */
|
|
20
|
+
dynamic?: boolean;
|
|
21
|
+
}
|
|
22
|
+
export interface PruneResult {
|
|
23
|
+
/** Pruned HTML */
|
|
24
|
+
html: string;
|
|
25
|
+
/** Number of nodes removed */
|
|
26
|
+
nodesRemoved: number;
|
|
27
|
+
/** Percentage of content removed (by character count) */
|
|
28
|
+
reductionPercent: number;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Prune low-value HTML blocks using two-pass approach:
|
|
32
|
+
* 1. Semantic tag/class removal
|
|
33
|
+
* 2. Density scoring of remaining blocks
|
|
34
|
+
*
|
|
35
|
+
* @param html - Raw HTML to prune
|
|
36
|
+
* @param options - Pruning configuration
|
|
37
|
+
* @returns Pruned HTML with stats
|
|
38
|
+
*/
|
|
39
|
+
export declare function pruneContent(html: string, options?: PruneOptions): PruneResult;
|
|
40
|
+
/**
|
|
41
|
+
* Post-process markdown output to remove UI elements that leak through
|
|
42
|
+
* from content scrapers (buttons, empty images, consecutive hr separators).
|
|
43
|
+
*
|
|
44
|
+
* @param markdown - Raw markdown string
|
|
45
|
+
* @returns Cleaned markdown string
|
|
46
|
+
*/
|
|
47
|
+
export declare function pruneMarkdown(markdown: string): string;
|