@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Image alt-text enhancement module.
|
|
3
|
+
*
|
|
4
|
+
* Two strategies:
|
|
5
|
+
* 1. Heuristic (no LLM) — generates captions from filename, URL path, and nearby text context
|
|
6
|
+
* 2. LLM vision (with user's API key) — generates accurate descriptions via vision models
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* Enhance images that lack alt text with heuristic-based descriptions.
|
|
10
|
+
*
|
|
11
|
+
* Processes <img> tags that have:
|
|
12
|
+
* - No alt attribute at all
|
|
13
|
+
* - An empty alt attribute (alt="")
|
|
14
|
+
*
|
|
15
|
+
* Caption priority:
|
|
16
|
+
* 1. Filename analysis: `/images/team-photo-2024.jpg` → "Team Photo 2024"
|
|
17
|
+
* 2. URL path segments: `/products/widget/hero.png` → "Widget image"
|
|
18
|
+
* 3. Nearby heading/figcaption/paragraph text (within 300 chars)
|
|
19
|
+
* 4. Generic fallback: "Image"
|
|
20
|
+
*
|
|
21
|
+
* Non-empty alt text is always preserved unchanged.
|
|
22
|
+
*
|
|
23
|
+
* @param html - Raw HTML string to process
|
|
24
|
+
* @returns HTML with alt text added/replaced on qualifying img tags
|
|
25
|
+
*/
|
|
26
|
+
export declare function enhanceImageAltText(html: string): string;
|
|
27
|
+
/**
|
|
28
|
+
* Caption images using LLM vision models.
|
|
29
|
+
*
|
|
30
|
+
* Requires the user to supply their own API key. No key is stored server-side.
|
|
31
|
+
* Processes images sequentially to avoid rate limiting.
|
|
32
|
+
*
|
|
33
|
+
* @param images - Array of {url, context} pairs. `context` is nearby text for better accuracy.
|
|
34
|
+
* @param llmApiKey - API key for the chosen provider
|
|
35
|
+
* @param llmProvider - Vision-capable model to use: 'openai' | 'anthropic' | 'google'
|
|
36
|
+
* @returns Array of {url, caption} — same order as input
|
|
37
|
+
*/
|
|
38
|
+
export declare function captionImagesWithLLM(images: {
|
|
39
|
+
url: string;
|
|
40
|
+
context: string;
|
|
41
|
+
}[], llmApiKey: string, llmProvider: 'openai' | 'anthropic' | 'google'): Promise<{
|
|
42
|
+
url: string;
|
|
43
|
+
caption: string;
|
|
44
|
+
}[]>;
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Image alt-text enhancement module.
|
|
3
|
+
*
|
|
4
|
+
* Two strategies:
|
|
5
|
+
* 1. Heuristic (no LLM) — generates captions from filename, URL path, and nearby text context
|
|
6
|
+
* 2. LLM vision (with user's API key) — generates accurate descriptions via vision models
|
|
7
|
+
*/
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
// Heuristic helpers
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
const GENERIC_FILENAMES = new Set([
|
|
12
|
+
'image', 'img', 'photo', 'picture', 'thumbnail', 'thumb',
|
|
13
|
+
'icon', 'logo', 'banner', 'placeholder', 'default', 'hero',
|
|
14
|
+
'bg', 'background', 'avatar', 'pic', 'graphic', 'figure', 'shot',
|
|
15
|
+
]);
|
|
16
|
+
const NOISE_PATH_SEGMENTS = new Set([
|
|
17
|
+
'images', 'img', 'imgs', 'photos', 'assets', 'static', 'media',
|
|
18
|
+
'public', 'uploads', 'files', 'resources', 'content', 'cdn',
|
|
19
|
+
'dist', 'build', 'src', 'www', 'web', 'site',
|
|
20
|
+
]);
|
|
21
|
+
/**
|
|
22
|
+
* Convert a URL slug / camelCase / underscored name into readable title-cased text.
|
|
23
|
+
* Examples:
|
|
24
|
+
* "team-photo-2024" → "Team Photo 2024"
|
|
25
|
+
* "heroImage" → "Hero Image"
|
|
26
|
+
* "my_product_shot" → "My Product Shot"
|
|
27
|
+
*/
|
|
28
|
+
function slugToTitle(slug) {
|
|
29
|
+
return slug
|
|
30
|
+
.replace(/[-_]+/g, ' ') // hyphens/underscores → spaces
|
|
31
|
+
.replace(/([a-z])([A-Z])/g, '$1 $2') // camelCase split
|
|
32
|
+
.replace(/([A-Z]{2,})([A-Z][a-z])/g, '$1 $2') // HTMLParser → HTML Parser
|
|
33
|
+
.replace(/\s+/g, ' ')
|
|
34
|
+
.trim()
|
|
35
|
+
.replace(/\b\w/g, (c) => c.toUpperCase()); // Title Case
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Derive a caption from the image src URL.
|
|
39
|
+
* Tries (in order): filename, parent path segment.
|
|
40
|
+
* Returns null if nothing useful can be derived.
|
|
41
|
+
*/
|
|
42
|
+
function captionFromUrl(src) {
|
|
43
|
+
try {
|
|
44
|
+
const pathStr = src.startsWith('http') ? new URL(src).pathname : src;
|
|
45
|
+
const parts = pathStr.split('/').filter(Boolean);
|
|
46
|
+
// 1. Try the filename (without extension)
|
|
47
|
+
const filename = parts[parts.length - 1] ?? '';
|
|
48
|
+
const nameWithoutExt = filename.replace(/\.[^.]+$/, '');
|
|
49
|
+
if (nameWithoutExt.length > 2 &&
|
|
50
|
+
!GENERIC_FILENAMES.has(nameWithoutExt.toLowerCase())) {
|
|
51
|
+
const title = slugToTitle(nameWithoutExt);
|
|
52
|
+
if (title.length > 2)
|
|
53
|
+
return title;
|
|
54
|
+
}
|
|
55
|
+
// 2. Try meaningful parent path segments (walk up from the file)
|
|
56
|
+
for (let i = parts.length - 2; i >= 0; i--) {
|
|
57
|
+
const seg = parts[i];
|
|
58
|
+
if (seg && seg.length > 2 && !NOISE_PATH_SEGMENTS.has(seg.toLowerCase())) {
|
|
59
|
+
const title = slugToTitle(seg);
|
|
60
|
+
return `${title} image`;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
catch {
|
|
65
|
+
// URL parse error — fall through
|
|
66
|
+
}
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Extract the nearest meaningful text context surrounding an img tag.
|
|
71
|
+
* Searches up to 300 chars before and after the tag position.
|
|
72
|
+
* Prefers headings, then figcaption, then raw surrounding text.
|
|
73
|
+
*/
|
|
74
|
+
function extractNearbyText(html, imgStart) {
|
|
75
|
+
const beforeHtml = html.slice(Math.max(0, imgStart - 300), imgStart);
|
|
76
|
+
const afterHtml = html.slice(imgStart, Math.min(html.length, imgStart + 400));
|
|
77
|
+
// Prefer the nearest heading before the image
|
|
78
|
+
const headingMatches = beforeHtml.match(/<h[1-6][^>]*>([^<]{3,80})<\/h[1-6]>/gi);
|
|
79
|
+
if (headingMatches) {
|
|
80
|
+
const lastHeading = headingMatches[headingMatches.length - 1];
|
|
81
|
+
const text = lastHeading.replace(/<[^>]+>/g, '').trim();
|
|
82
|
+
if (text.length > 3)
|
|
83
|
+
return text;
|
|
84
|
+
}
|
|
85
|
+
// Prefer figcaption near the image
|
|
86
|
+
const figMatch = afterHtml.match(/<figcaption[^>]*>([^<]{3,120})<\/figcaption>/i);
|
|
87
|
+
if (figMatch) {
|
|
88
|
+
const text = (figMatch[1] ?? '').trim();
|
|
89
|
+
if (text.length > 3)
|
|
90
|
+
return text;
|
|
91
|
+
}
|
|
92
|
+
// Strip tags, return the richer side
|
|
93
|
+
const stripTags = (s) => s.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
|
|
94
|
+
const beforeText = stripTags(beforeHtml);
|
|
95
|
+
const afterText = stripTags(afterHtml);
|
|
96
|
+
return afterText.length > beforeText.length
|
|
97
|
+
? afterText.slice(0, 80)
|
|
98
|
+
: beforeText.slice(-80);
|
|
99
|
+
}
|
|
100
|
+
// ---------------------------------------------------------------------------
|
|
101
|
+
// Public: heuristic alt-text enhancement
|
|
102
|
+
// ---------------------------------------------------------------------------
|
|
103
|
+
/**
|
|
104
|
+
* Enhance images that lack alt text with heuristic-based descriptions.
|
|
105
|
+
*
|
|
106
|
+
* Processes <img> tags that have:
|
|
107
|
+
* - No alt attribute at all
|
|
108
|
+
* - An empty alt attribute (alt="")
|
|
109
|
+
*
|
|
110
|
+
* Caption priority:
|
|
111
|
+
* 1. Filename analysis: `/images/team-photo-2024.jpg` → "Team Photo 2024"
|
|
112
|
+
* 2. URL path segments: `/products/widget/hero.png` → "Widget image"
|
|
113
|
+
* 3. Nearby heading/figcaption/paragraph text (within 300 chars)
|
|
114
|
+
* 4. Generic fallback: "Image"
|
|
115
|
+
*
|
|
116
|
+
* Non-empty alt text is always preserved unchanged.
|
|
117
|
+
*
|
|
118
|
+
* @param html - Raw HTML string to process
|
|
119
|
+
* @returns HTML with alt text added/replaced on qualifying img tags
|
|
120
|
+
*/
|
|
121
|
+
export function enhanceImageAltText(html) {
|
|
122
|
+
return html.replace(/<img(\s[^>]*)>/gi, (match, attrs, offset) => {
|
|
123
|
+
const srcMatch = attrs.match(/\bsrc=["']([^"']*)["']/i);
|
|
124
|
+
if (!srcMatch)
|
|
125
|
+
return match; // No src — leave unchanged
|
|
126
|
+
const src = srcMatch[1] ?? '';
|
|
127
|
+
const altMatch = attrs.match(/\balt=["']([^"']*)["']/i);
|
|
128
|
+
const altValue = altMatch ? altMatch[1] : null;
|
|
129
|
+
// Already has meaningful alt text — preserve as-is
|
|
130
|
+
if (altValue !== null && altValue.trim() !== '')
|
|
131
|
+
return match;
|
|
132
|
+
// Build caption: URL → nearby text → generic fallback
|
|
133
|
+
let caption = captionFromUrl(src);
|
|
134
|
+
if (!caption) {
|
|
135
|
+
const nearbyText = extractNearbyText(html, offset).trim();
|
|
136
|
+
if (nearbyText.length > 3) {
|
|
137
|
+
caption = `Image: ${nearbyText.slice(0, 60)}`;
|
|
138
|
+
}
|
|
139
|
+
else {
|
|
140
|
+
caption = 'Image';
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
const escaped = caption.replace(/"/g, '"');
|
|
144
|
+
if (altMatch) {
|
|
145
|
+
// Replace the empty alt value in-place
|
|
146
|
+
const newAttrs = attrs.replace(/\balt=["'][^"']*["']/i, `alt="${escaped}"`);
|
|
147
|
+
return `<img${newAttrs}>`;
|
|
148
|
+
}
|
|
149
|
+
else {
|
|
150
|
+
// Prepend alt attribute (keeps src first is fine; alt first is valid too)
|
|
151
|
+
return `<img alt="${escaped}"${attrs}>`;
|
|
152
|
+
}
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
// ---------------------------------------------------------------------------
|
|
156
|
+
// Public: LLM vision captioning (BYOK)
|
|
157
|
+
// ---------------------------------------------------------------------------
|
|
158
|
+
/**
|
|
159
|
+
* Caption images using LLM vision models.
|
|
160
|
+
*
|
|
161
|
+
* Requires the user to supply their own API key. No key is stored server-side.
|
|
162
|
+
* Processes images sequentially to avoid rate limiting.
|
|
163
|
+
*
|
|
164
|
+
* @param images - Array of {url, context} pairs. `context` is nearby text for better accuracy.
|
|
165
|
+
* @param llmApiKey - API key for the chosen provider
|
|
166
|
+
* @param llmProvider - Vision-capable model to use: 'openai' | 'anthropic' | 'google'
|
|
167
|
+
* @returns Array of {url, caption} — same order as input
|
|
168
|
+
*/
|
|
169
|
+
export async function captionImagesWithLLM(images, llmApiKey, llmProvider) {
|
|
170
|
+
const results = [];
|
|
171
|
+
for (const image of images) {
|
|
172
|
+
try {
|
|
173
|
+
const prompt = `Write a concise, descriptive alt text (1–2 sentences) for this image. Context from the surrounding page: "${image.context || 'none'}". Be specific and informative.`;
|
|
174
|
+
let caption = '';
|
|
175
|
+
if (llmProvider === 'openai') {
|
|
176
|
+
// GPT-4o-mini supports image_url with public URLs
|
|
177
|
+
const response = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
178
|
+
method: 'POST',
|
|
179
|
+
headers: {
|
|
180
|
+
Authorization: `Bearer ${llmApiKey}`,
|
|
181
|
+
'Content-Type': 'application/json',
|
|
182
|
+
},
|
|
183
|
+
body: JSON.stringify({
|
|
184
|
+
model: 'gpt-4o-mini',
|
|
185
|
+
max_tokens: 120,
|
|
186
|
+
messages: [
|
|
187
|
+
{
|
|
188
|
+
role: 'user',
|
|
189
|
+
content: [
|
|
190
|
+
{ type: 'image_url', image_url: { url: image.url, detail: 'low' } },
|
|
191
|
+
{ type: 'text', text: prompt },
|
|
192
|
+
],
|
|
193
|
+
},
|
|
194
|
+
],
|
|
195
|
+
}),
|
|
196
|
+
});
|
|
197
|
+
const data = (await response.json());
|
|
198
|
+
caption = (data?.choices?.[0]?.message?.content ?? '').trim();
|
|
199
|
+
}
|
|
200
|
+
else if (llmProvider === 'anthropic') {
|
|
201
|
+
// claude-haiku-4-5 supports url-type image sources
|
|
202
|
+
const response = await fetch('https://api.anthropic.com/v1/messages', {
|
|
203
|
+
method: 'POST',
|
|
204
|
+
headers: {
|
|
205
|
+
'x-api-key': llmApiKey,
|
|
206
|
+
'anthropic-version': '2023-06-01',
|
|
207
|
+
'Content-Type': 'application/json',
|
|
208
|
+
},
|
|
209
|
+
body: JSON.stringify({
|
|
210
|
+
model: 'claude-haiku-4-5',
|
|
211
|
+
max_tokens: 120,
|
|
212
|
+
messages: [
|
|
213
|
+
{
|
|
214
|
+
role: 'user',
|
|
215
|
+
content: [
|
|
216
|
+
{ type: 'image', source: { type: 'url', url: image.url } },
|
|
217
|
+
{ type: 'text', text: prompt },
|
|
218
|
+
],
|
|
219
|
+
},
|
|
220
|
+
],
|
|
221
|
+
}),
|
|
222
|
+
});
|
|
223
|
+
const data = (await response.json());
|
|
224
|
+
caption = (data?.content?.[0]?.text ?? '').trim();
|
|
225
|
+
}
|
|
226
|
+
else if (llmProvider === 'google') {
|
|
227
|
+
// Gemini requires base64 inlineData — fetch the image first
|
|
228
|
+
let imageData = null;
|
|
229
|
+
let mimeType = 'image/jpeg';
|
|
230
|
+
try {
|
|
231
|
+
const imgResp = await fetch(image.url, {
|
|
232
|
+
headers: { Accept: 'image/*,*/*;q=0.8' },
|
|
233
|
+
});
|
|
234
|
+
if (imgResp.ok) {
|
|
235
|
+
const buffer = await imgResp.arrayBuffer();
|
|
236
|
+
imageData = Buffer.from(buffer).toString('base64');
|
|
237
|
+
const ct = imgResp.headers.get('content-type') ?? 'image/jpeg';
|
|
238
|
+
mimeType = ct.split(';')[0]?.trim() ?? 'image/jpeg';
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
catch {
|
|
242
|
+
// Image download failed — skip this provider for this image
|
|
243
|
+
}
|
|
244
|
+
if (imageData) {
|
|
245
|
+
const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-lite:generateContent?key=${llmApiKey}`, {
|
|
246
|
+
method: 'POST',
|
|
247
|
+
headers: { 'Content-Type': 'application/json' },
|
|
248
|
+
body: JSON.stringify({
|
|
249
|
+
contents: [
|
|
250
|
+
{
|
|
251
|
+
parts: [
|
|
252
|
+
{ inlineData: { mimeType, data: imageData } },
|
|
253
|
+
{ text: prompt },
|
|
254
|
+
],
|
|
255
|
+
},
|
|
256
|
+
],
|
|
257
|
+
}),
|
|
258
|
+
});
|
|
259
|
+
const data = (await response.json());
|
|
260
|
+
caption = (data?.candidates?.[0]?.content?.parts?.[0]?.text ?? '').trim();
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
results.push({ url: image.url, caption: caption || 'Image' });
|
|
264
|
+
}
|
|
265
|
+
catch {
|
|
266
|
+
// Non-fatal — captioning failed for this image
|
|
267
|
+
results.push({ url: image.url, caption: 'Image' });
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
return results;
|
|
271
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Jobs extraction module — turns job board pages into structured JSON
|
|
3
|
+
*
|
|
4
|
+
* Two-phase pipeline:
|
|
5
|
+
* Phase A (Search): Fetch a job search URL → parse markdown → extract job cards
|
|
6
|
+
* Phase B (Details): For top N results, fetch each detail URL → extract full description
|
|
7
|
+
*
|
|
8
|
+
* Supports LinkedIn, Glassdoor, and Indeed out of the box.
|
|
9
|
+
* Call `cleanup()` from the main webpeel export when you are done fetching.
|
|
10
|
+
*/
|
|
11
|
+
export interface JobCard {
|
|
12
|
+
title: string;
|
|
13
|
+
company: string;
|
|
14
|
+
location: string;
|
|
15
|
+
salary?: string;
|
|
16
|
+
remote?: boolean;
|
|
17
|
+
postedAt?: string;
|
|
18
|
+
detailUrl: string;
|
|
19
|
+
snippet?: string;
|
|
20
|
+
skills?: string[];
|
|
21
|
+
rating?: number;
|
|
22
|
+
source: 'glassdoor' | 'indeed' | 'linkedin' | 'upwork' | 'generic';
|
|
23
|
+
/** Upwork-specific: budget or hourly rate string */
|
|
24
|
+
budget?: string;
|
|
25
|
+
/** Upwork-specific: job type (hourly / fixed-price) */
|
|
26
|
+
jobType?: string;
|
|
27
|
+
/** Upwork-specific: required experience level */
|
|
28
|
+
experienceLevel?: string;
|
|
29
|
+
/** Upwork-specific: client rating (0–5) */
|
|
30
|
+
clientRating?: number;
|
|
31
|
+
/** Upwork-specific: total amount client has spent on Upwork */
|
|
32
|
+
clientSpend?: string;
|
|
33
|
+
}
|
|
34
|
+
export interface JobDetail extends JobCard {
|
|
35
|
+
description: string;
|
|
36
|
+
requirements?: string[];
|
|
37
|
+
responsibilities?: string[];
|
|
38
|
+
benefits?: string[];
|
|
39
|
+
applyUrl?: string;
|
|
40
|
+
employmentType?: string;
|
|
41
|
+
experienceLevel?: string;
|
|
42
|
+
}
|
|
43
|
+
export interface JobSearchOptions {
|
|
44
|
+
/** Search URL to fetch, OR use keywords+location to build URL */
|
|
45
|
+
url?: string;
|
|
46
|
+
/** Job search keywords (e.g. "software engineer") */
|
|
47
|
+
keywords?: string;
|
|
48
|
+
/** Location (e.g. "New York") */
|
|
49
|
+
location?: string;
|
|
50
|
+
/** Which job board to search. Default: 'linkedin' */
|
|
51
|
+
source?: 'glassdoor' | 'indeed' | 'linkedin' | 'upwork';
|
|
52
|
+
/** Max job cards to return from search. Default: 25 */
|
|
53
|
+
limit?: number;
|
|
54
|
+
/** Fetch detail pages for top N jobs. 0 = skip details. Default: 0 */
|
|
55
|
+
fetchDetails?: number;
|
|
56
|
+
/** Timeout per request in ms. Default: 30000 */
|
|
57
|
+
timeout?: number;
|
|
58
|
+
}
|
|
59
|
+
export interface JobSearchResult {
|
|
60
|
+
jobs: (JobCard | JobDetail)[];
|
|
61
|
+
totalFound: number;
|
|
62
|
+
source: string;
|
|
63
|
+
searchUrl: string;
|
|
64
|
+
detailsFetched: number;
|
|
65
|
+
timeTakenMs: number;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Search job boards and return structured results.
|
|
69
|
+
*
|
|
70
|
+
* Uses `peel()` internally so all smart-escalation / stealth logic applies.
|
|
71
|
+
* Call `cleanup()` from the main webpeel export when you're done with all
|
|
72
|
+
* fetching (this module does **not** call it automatically because the
|
|
73
|
+
* browser instance is shared across the library).
|
|
74
|
+
*/
|
|
75
|
+
export declare function searchJobs(options: JobSearchOptions): Promise<JobSearchResult>;
|