@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,634 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Jobs extraction module — turns job board pages into structured JSON
|
|
3
|
+
*
|
|
4
|
+
* Two-phase pipeline:
|
|
5
|
+
* Phase A (Search): Fetch a job search URL → parse markdown → extract job cards
|
|
6
|
+
* Phase B (Details): For top N results, fetch each detail URL → extract full description
|
|
7
|
+
*
|
|
8
|
+
* Supports LinkedIn, Glassdoor, and Indeed out of the box.
|
|
9
|
+
* Call `cleanup()` from the main webpeel export when you are done fetching.
|
|
10
|
+
*/
|
|
11
|
+
import { peel } from '../index.js';
|
|
12
|
+
function detectSource(url) {
|
|
13
|
+
const h = url.toLowerCase();
|
|
14
|
+
if (h.includes('linkedin.com'))
|
|
15
|
+
return 'linkedin';
|
|
16
|
+
if (h.includes('glassdoor.com'))
|
|
17
|
+
return 'glassdoor';
|
|
18
|
+
if (h.includes('indeed.com'))
|
|
19
|
+
return 'indeed';
|
|
20
|
+
if (h.includes('upwork.com'))
|
|
21
|
+
return 'upwork';
|
|
22
|
+
return 'generic';
|
|
23
|
+
}
|
|
24
|
+
function stealthNeeded(src) {
|
|
25
|
+
return src === 'indeed' || src === 'glassdoor' || src === 'upwork';
|
|
26
|
+
}
|
|
27
|
+
function buildSearchUrl(src, kw, loc) {
|
|
28
|
+
switch (src) {
|
|
29
|
+
case 'linkedin':
|
|
30
|
+
return `https://www.linkedin.com/jobs/search/?keywords=${enc(kw)}&location=${enc(loc)}`;
|
|
31
|
+
case 'glassdoor':
|
|
32
|
+
return `https://www.glassdoor.com/Job/jobs.htm?sc.keyword=${enc(kw)}&locT=C&locId=1132348&sc.location=${enc(loc)}`;
|
|
33
|
+
case 'indeed':
|
|
34
|
+
return `https://www.indeed.com/jobs?q=${enc(kw)}&l=${enc(loc)}`;
|
|
35
|
+
case 'upwork':
|
|
36
|
+
return `https://www.upwork.com/nx/search/jobs/?q=${enc(kw)}&sort=recency`;
|
|
37
|
+
default:
|
|
38
|
+
throw new Error('Cannot build URL for generic source — provide a url');
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
const enc = encodeURIComponent;
|
|
42
|
+
function clean(s) {
|
|
43
|
+
return s
|
|
44
|
+
.replace(/&/g, '&')
|
|
45
|
+
.replace(/…/g, '…')
|
|
46
|
+
.replace(/ /g, ' ')
|
|
47
|
+
.replace(/'/g, "'")
|
|
48
|
+
.replace(/"/g, '"')
|
|
49
|
+
.replace(/\s+/g, ' ')
|
|
50
|
+
.trim();
|
|
51
|
+
}
|
|
52
|
+
function absUrl(href, base) {
|
|
53
|
+
try {
|
|
54
|
+
return new URL(href, base).href;
|
|
55
|
+
}
|
|
56
|
+
catch {
|
|
57
|
+
return href;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
function findSalary(text) {
|
|
61
|
+
const m = text.match(/\$[\d,]+(?:\.\d+)?(?:\s*[-–]\s*\$[\d,]+(?:\.\d+)?)?(?:\s*(?:a\s+year|per\s+hour|an\s+hour|\/hr|\/yr|K(?:\s|$)))?/i);
|
|
62
|
+
return m ? m[0].trim() : undefined;
|
|
63
|
+
}
|
|
64
|
+
function findDate(text) {
|
|
65
|
+
const m = text.match(/(\d+[dhm])\b/) || text.match(/(\d+\s+(?:day|week|month|hour|minute)s?\s+ago)/i);
|
|
66
|
+
return m ? m[1].trim() : undefined;
|
|
67
|
+
}
|
|
68
|
+
function hasRemote(text) {
|
|
69
|
+
return /\bremote\b/i.test(text);
|
|
70
|
+
}
|
|
71
|
+
/** Simple concurrency limiter — runs at most `n` tasks in parallel. */
|
|
72
|
+
async function pLimited(tasks, n) {
|
|
73
|
+
const results = new Array(tasks.length);
|
|
74
|
+
let cursor = 0;
|
|
75
|
+
async function worker() {
|
|
76
|
+
while (cursor < tasks.length) {
|
|
77
|
+
const idx = cursor++;
|
|
78
|
+
results[idx] = await tasks[idx]();
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
await Promise.all(Array.from({ length: Math.min(n, tasks.length) }, () => worker()));
|
|
82
|
+
return results;
|
|
83
|
+
}
|
|
84
|
+
function parseTotalFromHeading(content) {
|
|
85
|
+
const m = content.match(/^#\s+([\d,]+)\+?\s+/m);
|
|
86
|
+
return m ? parseInt(m[1].replace(/,/g, ''), 10) : 0;
|
|
87
|
+
}
|
|
88
|
+
// ── LinkedIn Parser ────────────────────────────────────────────────────
|
|
89
|
+
function parseLinkedIn(content, searchUrl, limit) {
|
|
90
|
+
const jobs = [];
|
|
91
|
+
const totalFound = parseTotalFromHeading(content);
|
|
92
|
+
// Each card starts with "- [Title](url)" in the markdown list
|
|
93
|
+
const blocks = content.split(/\n-\s+\[/).slice(1);
|
|
94
|
+
for (const block of blocks) {
|
|
95
|
+
if (jobs.length >= limit)
|
|
96
|
+
break;
|
|
97
|
+
// Link: we stripped the leading "- [", so block starts with "Title](url)…"
|
|
98
|
+
const lm = block.match(/^([^\]]+)\]\(([^)]+)\)/);
|
|
99
|
+
if (!lm)
|
|
100
|
+
continue;
|
|
101
|
+
const detailUrl = lm[2];
|
|
102
|
+
if (!detailUrl.includes('linkedin.com/jobs/view/'))
|
|
103
|
+
continue;
|
|
104
|
+
// Title from ### heading (preferred) or link text
|
|
105
|
+
const hm = block.match(/###\s+(.+)/);
|
|
106
|
+
const title = clean(hm ? hm[1] : lm[1]);
|
|
107
|
+
if (!title)
|
|
108
|
+
continue;
|
|
109
|
+
// Company from #### [Company](url) or #### Company
|
|
110
|
+
const cm = block.match(/####\s+\[([^\]]+)\]/) || block.match(/####\s+(.+)/);
|
|
111
|
+
const company = cm ? clean(cm[1]) : '';
|
|
112
|
+
// Scan remaining lines for location + date
|
|
113
|
+
// Only look at lines AFTER the #### company heading
|
|
114
|
+
let location = '';
|
|
115
|
+
let postedAt;
|
|
116
|
+
let pastCompany = false;
|
|
117
|
+
for (const raw of block.split('\n')) {
|
|
118
|
+
const l = raw.trim();
|
|
119
|
+
if (!l)
|
|
120
|
+
continue;
|
|
121
|
+
// Skip everything until we're past the company heading
|
|
122
|
+
if (l.startsWith('####')) {
|
|
123
|
+
pastCompany = true;
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
if (!pastCompany)
|
|
127
|
+
continue;
|
|
128
|
+
if (l.startsWith('#') || l.startsWith('[') || l.startsWith('-') || l === 'Actively Hiring' || l === 'Promoted')
|
|
129
|
+
continue;
|
|
130
|
+
// Skip lines that contain URLs
|
|
131
|
+
if (l.includes('http://') || l.includes('https://'))
|
|
132
|
+
continue;
|
|
133
|
+
// Date-only line
|
|
134
|
+
const dateCandidate = findDate(l);
|
|
135
|
+
// Location line that may have date appended: "New York, NY 2 weeks ago"
|
|
136
|
+
if (!location && /^[A-Z][a-z]+.*,\s*[A-Z]/.test(l)) {
|
|
137
|
+
// Split off trailing date if present
|
|
138
|
+
const dateInLine = findDate(l);
|
|
139
|
+
if (dateInLine) {
|
|
140
|
+
postedAt = dateInLine;
|
|
141
|
+
location = clean(l.replace(/\d+\s+(?:week|day|month|hour|minute)s?\s+ago/i, '').replace(/\d+[dhm]\s*$/i, ''));
|
|
142
|
+
}
|
|
143
|
+
else {
|
|
144
|
+
location = clean(l);
|
|
145
|
+
}
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
if (dateCandidate && l.length < 30) {
|
|
149
|
+
postedAt = dateCandidate;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
jobs.push({
|
|
153
|
+
title, company, location,
|
|
154
|
+
salary: findSalary(block),
|
|
155
|
+
remote: hasRemote(block),
|
|
156
|
+
postedAt,
|
|
157
|
+
detailUrl: absUrl(detailUrl, searchUrl),
|
|
158
|
+
source: 'linkedin',
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
return { jobs, totalFound: totalFound || jobs.length };
|
|
162
|
+
}
|
|
163
|
+
// ── Glassdoor Parser ───────────────────────────────────────────────────
|
|
164
|
+
function parseGlassdoor(content, searchUrl, limit) {
|
|
165
|
+
const jobs = [];
|
|
166
|
+
const totalFound = parseTotalFromHeading(content);
|
|
167
|
+
// Each card is a top-level list item: company, rating, [Title](url), location, salary, snippet, skills, date
|
|
168
|
+
const blocks = content.split(/\n-\s+/).slice(1);
|
|
169
|
+
for (const block of blocks) {
|
|
170
|
+
if (jobs.length >= limit)
|
|
171
|
+
break;
|
|
172
|
+
const lines = block.split('\n').map(l => l.trim()).filter(Boolean);
|
|
173
|
+
if (lines.length < 2)
|
|
174
|
+
continue;
|
|
175
|
+
// Job title link
|
|
176
|
+
const lm = block.match(/\[([^\]]+)\]\((https?:\/\/[^\s)]*glassdoor\.com\/job-listing\/[^)]+)\)/);
|
|
177
|
+
if (!lm)
|
|
178
|
+
continue;
|
|
179
|
+
const title = clean(lm[1]);
|
|
180
|
+
const detailUrl = lm[2];
|
|
181
|
+
// Company + rating come before the link
|
|
182
|
+
let company = '';
|
|
183
|
+
let rating;
|
|
184
|
+
for (const l of lines) {
|
|
185
|
+
if (l.includes('[') && l.includes('glassdoor.com'))
|
|
186
|
+
break;
|
|
187
|
+
const rm = l.match(/^(\d\.\d)$/);
|
|
188
|
+
if (rm) {
|
|
189
|
+
rating = parseFloat(rm[1]);
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
if (!company && l.length > 1 && !/^\d/.test(l))
|
|
193
|
+
company = clean(l);
|
|
194
|
+
}
|
|
195
|
+
// Fields after the title link
|
|
196
|
+
let location = '';
|
|
197
|
+
let salary;
|
|
198
|
+
let snippet;
|
|
199
|
+
let skills;
|
|
200
|
+
let postedAt;
|
|
201
|
+
let pastLink = false;
|
|
202
|
+
for (const l of lines) {
|
|
203
|
+
if (l.includes(title) || l.includes('glassdoor.com/job-listing/')) {
|
|
204
|
+
pastLink = true;
|
|
205
|
+
continue;
|
|
206
|
+
}
|
|
207
|
+
if (!pastLink)
|
|
208
|
+
continue;
|
|
209
|
+
const sm = l.match(/\*\*Skills?:\*\*\s*(.+)/i);
|
|
210
|
+
if (sm) {
|
|
211
|
+
skills = sm[1].split(',').map(s => s.trim()).filter(Boolean);
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
if (/^\d+[dwm]$/.test(l)) {
|
|
215
|
+
postedAt = l;
|
|
216
|
+
continue;
|
|
217
|
+
}
|
|
218
|
+
if (!salary && /\$/.test(l)) {
|
|
219
|
+
salary = findSalary(l) || clean(l);
|
|
220
|
+
continue;
|
|
221
|
+
}
|
|
222
|
+
if (!location && /^[A-Z][a-z]+.*,\s*[A-Z]{2}/.test(l)) {
|
|
223
|
+
location = clean(l);
|
|
224
|
+
continue;
|
|
225
|
+
}
|
|
226
|
+
if (!snippet && l.length > 40 && !l.startsWith('**'))
|
|
227
|
+
snippet = clean(l);
|
|
228
|
+
}
|
|
229
|
+
jobs.push({
|
|
230
|
+
title, company, location, salary,
|
|
231
|
+
remote: hasRemote(block), postedAt,
|
|
232
|
+
detailUrl: absUrl(detailUrl, searchUrl),
|
|
233
|
+
snippet, skills, rating,
|
|
234
|
+
source: 'glassdoor',
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
return { jobs, totalFound: totalFound || jobs.length };
|
|
238
|
+
}
|
|
239
|
+
// ── Indeed Parser ──────────────────────────────────────────────────────
|
|
240
|
+
function parseIndeed(content, _searchUrl, limit) {
|
|
241
|
+
const jobs = [];
|
|
242
|
+
// Indeed markdown: job listings as list items with [Title](url), company, location, salary
|
|
243
|
+
// Also try HTML attribute patterns in case raw HTML leaks through
|
|
244
|
+
const htmlJobRe = /id="job_([a-f0-9]+)"[^>]*>.*?<span\s+title="([^"]+)"[^>]*>[^<]*<\/span>/gs;
|
|
245
|
+
const htmlJobs = [...content.matchAll(htmlJobRe)];
|
|
246
|
+
if (htmlJobs.length > 0) {
|
|
247
|
+
// HTML mode — parse HTML attributes directly
|
|
248
|
+
const companyRe = /data-testid="company-name"[^>]*>([^<]+)<\/span>/g;
|
|
249
|
+
const locRe = /data-testid="text-location"[^>]*>([^<]+)<\/div>/g;
|
|
250
|
+
const cm = [...content.matchAll(companyRe)];
|
|
251
|
+
const lm = [...content.matchAll(locRe)];
|
|
252
|
+
for (let i = 0; i < htmlJobs.length && jobs.length < limit; i++) {
|
|
253
|
+
const jk = htmlJobs[i][1];
|
|
254
|
+
const title = clean(htmlJobs[i][2]);
|
|
255
|
+
jobs.push({
|
|
256
|
+
title,
|
|
257
|
+
company: cm[i] ? clean(cm[i][1]) : '',
|
|
258
|
+
location: lm[i] ? clean(lm[i][1]) : '',
|
|
259
|
+
salary: findSalary(content.slice(htmlJobs[i].index || 0, (htmlJobs[i + 1]?.index) || content.length)),
|
|
260
|
+
remote: false,
|
|
261
|
+
detailUrl: `https://www.indeed.com/viewjob?jk=${jk}`,
|
|
262
|
+
source: 'indeed',
|
|
263
|
+
});
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
else {
|
|
267
|
+
// Markdown mode — parse the converted markdown output
|
|
268
|
+
// Indeed search results have title links followed by company, location, salary lines
|
|
269
|
+
const blocks = content.split(/\n-\s+/).slice(1);
|
|
270
|
+
for (const block of blocks) {
|
|
271
|
+
if (jobs.length >= limit)
|
|
272
|
+
break;
|
|
273
|
+
// Title link: [Job Title](url)
|
|
274
|
+
const lm = block.match(/\[([^\]]+)\]\((https?:\/\/[^\s)]*indeed\.com\/[^)]*(?:viewjob|rc\/clk)[^)]*)\)/);
|
|
275
|
+
if (!lm)
|
|
276
|
+
continue;
|
|
277
|
+
const title = clean(lm[1]);
|
|
278
|
+
let detailUrl = lm[2];
|
|
279
|
+
// Extract jk parameter from URL for clean detail URL
|
|
280
|
+
const jkMatch = detailUrl.match(/[?&]jk=([a-f0-9]+)/);
|
|
281
|
+
if (jkMatch)
|
|
282
|
+
detailUrl = `https://www.indeed.com/viewjob?jk=${jkMatch[1]}`;
|
|
283
|
+
// Parse remaining lines for company, location, salary
|
|
284
|
+
const lines = block.split('\n').map(l => l.trim()).filter(Boolean);
|
|
285
|
+
let company = '';
|
|
286
|
+
let location = '';
|
|
287
|
+
let salary;
|
|
288
|
+
for (const l of lines) {
|
|
289
|
+
if (l.includes(title) || l.includes('indeed.com'))
|
|
290
|
+
continue;
|
|
291
|
+
if (!salary) {
|
|
292
|
+
const s = findSalary(l);
|
|
293
|
+
if (s) {
|
|
294
|
+
salary = s;
|
|
295
|
+
continue;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
// Company is usually the first non-title, non-link, non-salary text
|
|
299
|
+
if (!company && l.length > 2 && !l.startsWith('[') && !l.startsWith('#') && !/^\d/.test(l)) {
|
|
300
|
+
company = clean(l);
|
|
301
|
+
continue;
|
|
302
|
+
}
|
|
303
|
+
// Location matches City, ST pattern
|
|
304
|
+
if (!location && /^[A-Z][a-z]+.*,\s*[A-Z]{2}/.test(l)) {
|
|
305
|
+
location = clean(l);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
if (title) {
|
|
309
|
+
jobs.push({
|
|
310
|
+
title, company, location, salary,
|
|
311
|
+
remote: hasRemote(block),
|
|
312
|
+
detailUrl,
|
|
313
|
+
source: 'indeed',
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
const totalRe = content.match(/(?:of|about)\s+([\d,]+)\s+jobs/i) || content.match(/([\d,]+)\s+jobs/i);
|
|
319
|
+
const totalFound = totalRe ? parseInt(totalRe[1].replace(/,/g, ''), 10) : jobs.length;
|
|
320
|
+
return { jobs, totalFound };
|
|
321
|
+
}
|
|
322
|
+
// ── Upwork Parser ──────────────────────────────────────────────────────
|
|
323
|
+
/**
|
|
324
|
+
* Parse Upwork job search results from converted markdown.
|
|
325
|
+
*
|
|
326
|
+
* Upwork search URL pattern:
|
|
327
|
+
* https://www.upwork.com/nx/search/jobs/?q=AI+engineer&sort=recency
|
|
328
|
+
*
|
|
329
|
+
* The markdown representation varies; we handle both list-item blocks and
|
|
330
|
+
* heading-separated blocks.
|
|
331
|
+
*/
|
|
332
|
+
function parseUpwork(content, searchUrl, limit) {
|
|
333
|
+
const jobs = [];
|
|
334
|
+
// Total count — Upwork often shows "X+ jobs found"
|
|
335
|
+
const totalMatch = content.match(/(\d[\d,]*)\+?\s+(?:jobs?|results?)\s+(?:found|available|match)/i);
|
|
336
|
+
const totalFound = totalMatch ? parseInt(totalMatch[1].replace(/,/g, ''), 10) : 0;
|
|
337
|
+
// Split into job blocks — each job typically starts with a link to /jobs/
|
|
338
|
+
// Pattern: [Job Title](https://www.upwork.com/jobs/...)
|
|
339
|
+
const jobLinkRe = /\[([^\]]+)\]\((https:\/\/www\.upwork\.com\/jobs\/[^)]+)\)/g;
|
|
340
|
+
const titleMatches = [...content.matchAll(jobLinkRe)];
|
|
341
|
+
if (titleMatches.length === 0) {
|
|
342
|
+
// Fallback: try /nx/jobs/ links (search page variant)
|
|
343
|
+
const altLinkRe = /\[([^\]]+)\]\((https:\/\/www\.upwork\.com\/(?:nx\/)?(?:jobs?|freelance-jobs?)[^)]*)\)/g;
|
|
344
|
+
const altMatches = [...content.matchAll(altLinkRe)];
|
|
345
|
+
if (altMatches.length === 0) {
|
|
346
|
+
return { jobs, totalFound };
|
|
347
|
+
}
|
|
348
|
+
titleMatches.push(...altMatches);
|
|
349
|
+
}
|
|
350
|
+
for (let i = 0; i < titleMatches.length && jobs.length < limit; i++) {
|
|
351
|
+
const match = titleMatches[i];
|
|
352
|
+
const title = clean(match[1]);
|
|
353
|
+
const detailUrl = absUrl(match[2], searchUrl);
|
|
354
|
+
// Extract the block of text between this match and the next
|
|
355
|
+
const blockStart = match.index ?? 0;
|
|
356
|
+
const blockEnd = (titleMatches[i + 1]?.index) ?? content.length;
|
|
357
|
+
const block = content.slice(blockStart, blockEnd);
|
|
358
|
+
// Budget / hourly rate — look for $ amounts near keywords
|
|
359
|
+
let budget;
|
|
360
|
+
const budgetMatch = block.match(/\$[\d,]+(?:\.\d+)?(?:\s*[-–]\s*\$[\d,]+(?:\.\d+)?)?\s*(?:\/\s*hr|per\s+hour|hourly)?/i) ||
|
|
361
|
+
block.match(/(?:budget|fixed[\s-]?price|hourly\s+rate)[:\s]+\$[\d,]+(?:\s*[-–]\s*\$[\d,]+)?/i);
|
|
362
|
+
if (budgetMatch)
|
|
363
|
+
budget = budgetMatch[0].trim();
|
|
364
|
+
// Job type
|
|
365
|
+
let jobType;
|
|
366
|
+
if (/\bhourly\b/i.test(block))
|
|
367
|
+
jobType = 'hourly';
|
|
368
|
+
else if (/\bfixed[\s-]?price\b/i.test(block))
|
|
369
|
+
jobType = 'fixed-price';
|
|
370
|
+
// Experience level
|
|
371
|
+
let experienceLevel;
|
|
372
|
+
const expMatch = block.match(/\b(entry[- ]?level|intermediate|expert|beginner)\b/i);
|
|
373
|
+
if (expMatch)
|
|
374
|
+
experienceLevel = expMatch[1];
|
|
375
|
+
// Client rating
|
|
376
|
+
let clientRating;
|
|
377
|
+
const ratingMatch = block.match(/(\d+(?:\.\d+)?)\s*(?:of\s+5\s+)?(?:stars?|★)/i);
|
|
378
|
+
if (ratingMatch) {
|
|
379
|
+
const r = parseFloat(ratingMatch[1]);
|
|
380
|
+
if (r >= 0 && r <= 5)
|
|
381
|
+
clientRating = r;
|
|
382
|
+
}
|
|
383
|
+
// Client spend
|
|
384
|
+
let clientSpend;
|
|
385
|
+
const spendMatch = block.match(/\$[\d,.]+[KkMm]?\+?\s*(?:spent|total\s+spent)/i);
|
|
386
|
+
if (spendMatch)
|
|
387
|
+
clientSpend = spendMatch[0].replace(/\s*(?:spent|total\s+spent)/i, '').trim();
|
|
388
|
+
// Skills (look for "Skills:" or comma-separated tech terms)
|
|
389
|
+
let skills;
|
|
390
|
+
const skillsMatch = block.match(/(?:skills?|tags?)[:\s]+([^\n]+)/i);
|
|
391
|
+
if (skillsMatch) {
|
|
392
|
+
skills = skillsMatch[1].split(/[,;]/).map((s) => s.trim()).filter((s) => s.length > 1 && s.length < 40);
|
|
393
|
+
}
|
|
394
|
+
// Posted time
|
|
395
|
+
const postedAt = findDate(block);
|
|
396
|
+
// Description snippet — first substantial non-metadata line after the title
|
|
397
|
+
let snippet;
|
|
398
|
+
const lines = block.split('\n').map((l) => l.trim()).filter(Boolean);
|
|
399
|
+
for (const line of lines) {
|
|
400
|
+
if (line === title)
|
|
401
|
+
continue;
|
|
402
|
+
if (line.startsWith('[') || line.startsWith('http'))
|
|
403
|
+
continue;
|
|
404
|
+
if (/^\$/.test(line) || /^\d+\s*(?:star|hour|day|week|month|review)/i.test(line))
|
|
405
|
+
continue;
|
|
406
|
+
if (line.length > 60) {
|
|
407
|
+
snippet = clean(line).slice(0, 200);
|
|
408
|
+
break;
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
if (!title)
|
|
412
|
+
continue;
|
|
413
|
+
jobs.push({
|
|
414
|
+
title,
|
|
415
|
+
company: '', // Upwork jobs don't surface a company on the search page
|
|
416
|
+
location: 'Remote', // Upwork is inherently remote
|
|
417
|
+
remote: true,
|
|
418
|
+
salary: budget, // Reuse salary field for budget display
|
|
419
|
+
budget,
|
|
420
|
+
jobType,
|
|
421
|
+
experienceLevel,
|
|
422
|
+
clientRating,
|
|
423
|
+
clientSpend,
|
|
424
|
+
skills,
|
|
425
|
+
snippet,
|
|
426
|
+
postedAt,
|
|
427
|
+
detailUrl,
|
|
428
|
+
source: 'upwork',
|
|
429
|
+
});
|
|
430
|
+
}
|
|
431
|
+
return { jobs, totalFound: totalFound || jobs.length };
|
|
432
|
+
}
|
|
433
|
+
// Match both heading markers (## Section) and bold markers (**Section:**)
|
|
434
|
+
const SEC_DESC = /(?:#{1,4}\s*|^\*\*)(?:(?:full\s+)?job\s+description|about\s+(?:the\s+)?(?:role|position|job|opportunity)|overview|summary)\*?\*?:?\s*$/im;
|
|
435
|
+
const SEC_REQ = /(?:#{1,4}\s*|^\*\*)(?:requirements?|qualifications?|what\s+(?:you(?:'ll)?\s+)?(?:need|bring)|minimum\s+qualifications?|must\s+have|what\s+we(?:'re)?\s+look(?:ing)?\s+for|nice\s+to\s+have)\*?\*?:?\s*$/im;
|
|
436
|
+
const SEC_RESP = /(?:#{1,4}\s*|^\*\*)(?:responsibilities|what\s+you(?:'ll)?\s+do|duties|key\s+responsibilities|your\s+role|in\s+this\s+role)\*?\*?:?\s*$/im;
|
|
437
|
+
const SEC_BEN = /(?:#{1,4}\s*|^\*\*)(?:benefits?|perks?|what\s+we\s+offer|compensation(?:\s+and\s+benefits)?|why\s+(?:join|work)|our\s+offer)\*?\*?:?\s*$/im;
|
|
438
|
+
function extractBullets(text) {
|
|
439
|
+
const out = [];
|
|
440
|
+
for (const line of text.split('\n')) {
|
|
441
|
+
const t = line.replace(/^[-*•]\s+/, '').trim();
|
|
442
|
+
if (t.length > 5)
|
|
443
|
+
out.push(t);
|
|
444
|
+
}
|
|
445
|
+
return out.length > 0 ? out : undefined;
|
|
446
|
+
}
|
|
447
|
+
function splitSections(content) {
|
|
448
|
+
const res = {};
|
|
449
|
+
let cur = null;
|
|
450
|
+
let buf = [];
|
|
451
|
+
function flush() {
|
|
452
|
+
const txt = buf.join('\n').trim();
|
|
453
|
+
buf = [];
|
|
454
|
+
if (!txt || !cur)
|
|
455
|
+
return;
|
|
456
|
+
if (cur === 'desc')
|
|
457
|
+
res.description = txt;
|
|
458
|
+
else if (cur === 'req')
|
|
459
|
+
res.requirements = extractBullets(txt);
|
|
460
|
+
else if (cur === 'resp')
|
|
461
|
+
res.responsibilities = extractBullets(txt);
|
|
462
|
+
else if (cur === 'ben')
|
|
463
|
+
res.benefits = extractBullets(txt);
|
|
464
|
+
}
|
|
465
|
+
for (const line of content.split('\n')) {
|
|
466
|
+
if (SEC_DESC.test(line)) {
|
|
467
|
+
flush();
|
|
468
|
+
cur = 'desc';
|
|
469
|
+
continue;
|
|
470
|
+
}
|
|
471
|
+
if (SEC_REQ.test(line)) {
|
|
472
|
+
flush();
|
|
473
|
+
cur = 'req';
|
|
474
|
+
continue;
|
|
475
|
+
}
|
|
476
|
+
if (SEC_RESP.test(line)) {
|
|
477
|
+
flush();
|
|
478
|
+
cur = 'resp';
|
|
479
|
+
continue;
|
|
480
|
+
}
|
|
481
|
+
if (SEC_BEN.test(line)) {
|
|
482
|
+
flush();
|
|
483
|
+
cur = 'ben';
|
|
484
|
+
continue;
|
|
485
|
+
}
|
|
486
|
+
if (cur && /^#{1,4}\s+/.test(line)) {
|
|
487
|
+
flush();
|
|
488
|
+
cur = null;
|
|
489
|
+
continue;
|
|
490
|
+
}
|
|
491
|
+
if (cur)
|
|
492
|
+
buf.push(line);
|
|
493
|
+
}
|
|
494
|
+
flush();
|
|
495
|
+
if (!res.description)
|
|
496
|
+
res.description = content.slice(0, 2000).trim();
|
|
497
|
+
return res;
|
|
498
|
+
}
|
|
499
|
+
/** Strip trailing noise sections (similar jobs, people also viewed, etc.) */
|
|
500
|
+
function stripDetailNoise(content) {
|
|
501
|
+
const cutPatterns = [
|
|
502
|
+
/^#{1,3}\s*similar\s+jobs/im,
|
|
503
|
+
/^#{1,3}\s*people\s+also\s+viewed/im,
|
|
504
|
+
/^#{1,3}\s*similar\s+searches/im,
|
|
505
|
+
/^#{1,3}\s*explore\s+collaborative/im,
|
|
506
|
+
/^#{1,3}\s*seniority\s+level/im,
|
|
507
|
+
/^#{1,3}\s*company\s+and\s+salary/im,
|
|
508
|
+
/^#{1,3}\s*career\s+guide/im,
|
|
509
|
+
/^#{1,3}\s*jobs\s+with\s+similar/im,
|
|
510
|
+
/^#{1,3}\s*similar\s+jobs?\s+nearby/im,
|
|
511
|
+
];
|
|
512
|
+
let result = content;
|
|
513
|
+
for (const pattern of cutPatterns) {
|
|
514
|
+
const m = pattern.exec(result);
|
|
515
|
+
if (m && m.index !== undefined && m.index > result.length * 0.3) {
|
|
516
|
+
result = result.slice(0, m.index).trim();
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
return result;
|
|
520
|
+
}
|
|
521
|
+
function parseJobDetail(content, card) {
|
|
522
|
+
// Strip noise sections before parsing
|
|
523
|
+
const cleaned = stripDetailNoise(content);
|
|
524
|
+
const sections = splitSections(cleaned);
|
|
525
|
+
const empMatch = cleaned.match(/\b(full[- ]?time|part[- ]?time|contract|internship|freelance|temporary)\b/i);
|
|
526
|
+
const expMatch = cleaned.match(/\b(entry[- ]?level|mid[- ]?level|senior|lead|principal|staff|junior|intern)\b/i);
|
|
527
|
+
const applyMatch = cleaned.match(/\[(?:apply|submit)[^\]]*\]\(([^)]+)\)/i) ||
|
|
528
|
+
cleaned.match(/href="([^"]*(?:apply|submit|careers)[^"]*)"/i);
|
|
529
|
+
// Salary from "## Pay found in job post" or "### Base pay range" sections
|
|
530
|
+
const salary = card.salary || findSalary(cleaned);
|
|
531
|
+
return {
|
|
532
|
+
...card,
|
|
533
|
+
salary: salary || card.salary,
|
|
534
|
+
description: sections.description || cleaned.slice(0, 3000),
|
|
535
|
+
requirements: sections.requirements,
|
|
536
|
+
responsibilities: sections.responsibilities,
|
|
537
|
+
benefits: sections.benefits,
|
|
538
|
+
applyUrl: applyMatch ? applyMatch[1] : undefined,
|
|
539
|
+
employmentType: empMatch ? empMatch[1].toLowerCase().replace(/\s+/g, '-') : undefined,
|
|
540
|
+
experienceLevel: expMatch ? expMatch[1] : undefined,
|
|
541
|
+
};
|
|
542
|
+
}
|
|
543
|
+
// ── Main ───────────────────────────────────────────────────────────────
|
|
544
|
+
/**
|
|
545
|
+
* Search job boards and return structured results.
|
|
546
|
+
*
|
|
547
|
+
* Uses `peel()` internally so all smart-escalation / stealth logic applies.
|
|
548
|
+
* Call `cleanup()` from the main webpeel export when you're done with all
|
|
549
|
+
* fetching (this module does **not** call it automatically because the
|
|
550
|
+
* browser instance is shared across the library).
|
|
551
|
+
*/
|
|
552
|
+
export async function searchJobs(options) {
|
|
553
|
+
const startTime = Date.now();
|
|
554
|
+
const { url, keywords = '', location = '', source: reqSource = 'linkedin', limit = 25, fetchDetails = 0, timeout = 30000, } = options;
|
|
555
|
+
// 1. Determine source & URL
|
|
556
|
+
let searchUrl;
|
|
557
|
+
let source;
|
|
558
|
+
if (url) {
|
|
559
|
+
searchUrl = url;
|
|
560
|
+
source = detectSource(url);
|
|
561
|
+
}
|
|
562
|
+
else {
|
|
563
|
+
if (!keywords)
|
|
564
|
+
throw new Error('Either url or keywords must be provided');
|
|
565
|
+
source = reqSource;
|
|
566
|
+
searchUrl = buildSearchUrl(source, keywords, location);
|
|
567
|
+
}
|
|
568
|
+
// 2. Fetch search page
|
|
569
|
+
const needsStealth = stealthNeeded(source);
|
|
570
|
+
const result = await peel(searchUrl, {
|
|
571
|
+
stealth: needsStealth,
|
|
572
|
+
render: needsStealth, // Stealth sites are usually SPAs requiring browser rendering
|
|
573
|
+
timeout,
|
|
574
|
+
format: 'markdown',
|
|
575
|
+
});
|
|
576
|
+
// 3. Parse job cards
|
|
577
|
+
let parsed;
|
|
578
|
+
switch (source) {
|
|
579
|
+
case 'linkedin':
|
|
580
|
+
parsed = parseLinkedIn(result.content, searchUrl, limit);
|
|
581
|
+
break;
|
|
582
|
+
case 'glassdoor':
|
|
583
|
+
parsed = parseGlassdoor(result.content, searchUrl, limit);
|
|
584
|
+
break;
|
|
585
|
+
case 'indeed':
|
|
586
|
+
parsed = parseIndeed(result.content, searchUrl, limit);
|
|
587
|
+
break;
|
|
588
|
+
case 'upwork':
|
|
589
|
+
parsed = parseUpwork(result.content, searchUrl, limit);
|
|
590
|
+
break;
|
|
591
|
+
default: {
|
|
592
|
+
// Try each parser for unknown URLs
|
|
593
|
+
parsed = parseLinkedIn(result.content, searchUrl, limit);
|
|
594
|
+
if (!parsed.jobs.length)
|
|
595
|
+
parsed = parseGlassdoor(result.content, searchUrl, limit);
|
|
596
|
+
if (!parsed.jobs.length)
|
|
597
|
+
parsed = parseIndeed(result.content, searchUrl, limit);
|
|
598
|
+
if (!parsed.jobs.length)
|
|
599
|
+
parsed = parseUpwork(result.content, searchUrl, limit);
|
|
600
|
+
break;
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
// 4. Optionally fetch detail pages (max 3 concurrent)
|
|
604
|
+
let detailsFetched = 0;
|
|
605
|
+
let jobs = parsed.jobs;
|
|
606
|
+
if (fetchDetails > 0 && parsed.jobs.length > 0) {
|
|
607
|
+
const toFetch = parsed.jobs.slice(0, fetchDetails);
|
|
608
|
+
const srcForStealth = source;
|
|
609
|
+
const tasks = toFetch.map((card) => async () => {
|
|
610
|
+
try {
|
|
611
|
+
const dr = await peel(card.detailUrl, {
|
|
612
|
+
stealth: stealthNeeded(srcForStealth),
|
|
613
|
+
timeout,
|
|
614
|
+
format: 'markdown',
|
|
615
|
+
});
|
|
616
|
+
detailsFetched++;
|
|
617
|
+
return parseJobDetail(dr.content, card);
|
|
618
|
+
}
|
|
619
|
+
catch {
|
|
620
|
+
return card; // graceful fallback
|
|
621
|
+
}
|
|
622
|
+
});
|
|
623
|
+
const detailed = await pLimited(tasks, 3);
|
|
624
|
+
jobs = [...detailed, ...parsed.jobs.slice(fetchDetails)];
|
|
625
|
+
}
|
|
626
|
+
return {
|
|
627
|
+
jobs,
|
|
628
|
+
totalFound: parsed.totalFound,
|
|
629
|
+
source,
|
|
630
|
+
searchUrl,
|
|
631
|
+
detailsFetched,
|
|
632
|
+
timeTakenMs: Date.now() - startTime,
|
|
633
|
+
};
|
|
634
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSON-LD Structured Data Extractor
|
|
3
|
+
*
|
|
4
|
+
* Extracts and converts JSON-LD (schema.org) data to clean markdown.
|
|
5
|
+
* Handles Recipe, Product, Article, FAQPage, HowTo, Event, LocalBusiness, Review.
|
|
6
|
+
* This is a FIRST-CLASS content source — tried before HTML DOM parsing.
|
|
7
|
+
*/
|
|
8
|
+
export interface JsonLdResult {
|
|
9
|
+
found: boolean;
|
|
10
|
+
type: string;
|
|
11
|
+
content: string;
|
|
12
|
+
title: string;
|
|
13
|
+
data: any;
|
|
14
|
+
}
|
|
15
|
+
export declare function extractJsonLd(html: string): JsonLdResult | null;
|