@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,634 @@
1
+ /**
2
+ * Jobs extraction module — turns job board pages into structured JSON
3
+ *
4
+ * Two-phase pipeline:
5
+ * Phase A (Search): Fetch a job search URL → parse markdown → extract job cards
6
+ * Phase B (Details): For top N results, fetch each detail URL → extract full description
7
+ *
8
+ * Supports LinkedIn, Glassdoor, and Indeed out of the box.
9
+ * Call `cleanup()` from the main webpeel export when you are done fetching.
10
+ */
11
+ import { peel } from '../index.js';
12
+ function detectSource(url) {
13
+ const h = url.toLowerCase();
14
+ if (h.includes('linkedin.com'))
15
+ return 'linkedin';
16
+ if (h.includes('glassdoor.com'))
17
+ return 'glassdoor';
18
+ if (h.includes('indeed.com'))
19
+ return 'indeed';
20
+ if (h.includes('upwork.com'))
21
+ return 'upwork';
22
+ return 'generic';
23
+ }
24
+ function stealthNeeded(src) {
25
+ return src === 'indeed' || src === 'glassdoor' || src === 'upwork';
26
+ }
27
+ function buildSearchUrl(src, kw, loc) {
28
+ switch (src) {
29
+ case 'linkedin':
30
+ return `https://www.linkedin.com/jobs/search/?keywords=${enc(kw)}&location=${enc(loc)}`;
31
+ case 'glassdoor':
32
+ return `https://www.glassdoor.com/Job/jobs.htm?sc.keyword=${enc(kw)}&locT=C&locId=1132348&sc.location=${enc(loc)}`;
33
+ case 'indeed':
34
+ return `https://www.indeed.com/jobs?q=${enc(kw)}&l=${enc(loc)}`;
35
+ case 'upwork':
36
+ return `https://www.upwork.com/nx/search/jobs/?q=${enc(kw)}&sort=recency`;
37
+ default:
38
+ throw new Error('Cannot build URL for generic source — provide a url');
39
+ }
40
+ }
41
+ const enc = encodeURIComponent;
42
+ function clean(s) {
43
+ return s
44
+ .replace(/&/g, '&')
45
+ .replace(/…/g, '…')
46
+ .replace(/ /g, ' ')
47
+ .replace(/'/g, "'")
48
+ .replace(/"/g, '"')
49
+ .replace(/\s+/g, ' ')
50
+ .trim();
51
+ }
52
+ function absUrl(href, base) {
53
+ try {
54
+ return new URL(href, base).href;
55
+ }
56
+ catch {
57
+ return href;
58
+ }
59
+ }
60
+ function findSalary(text) {
61
+ const m = text.match(/\$[\d,]+(?:\.\d+)?(?:\s*[-–]\s*\$[\d,]+(?:\.\d+)?)?(?:\s*(?:a\s+year|per\s+hour|an\s+hour|\/hr|\/yr|K(?:\s|$)))?/i);
62
+ return m ? m[0].trim() : undefined;
63
+ }
64
+ function findDate(text) {
65
+ const m = text.match(/(\d+[dhm])\b/) || text.match(/(\d+\s+(?:day|week|month|hour|minute)s?\s+ago)/i);
66
+ return m ? m[1].trim() : undefined;
67
+ }
68
+ function hasRemote(text) {
69
+ return /\bremote\b/i.test(text);
70
+ }
71
+ /** Simple concurrency limiter — runs at most `n` tasks in parallel. */
72
+ async function pLimited(tasks, n) {
73
+ const results = new Array(tasks.length);
74
+ let cursor = 0;
75
+ async function worker() {
76
+ while (cursor < tasks.length) {
77
+ const idx = cursor++;
78
+ results[idx] = await tasks[idx]();
79
+ }
80
+ }
81
+ await Promise.all(Array.from({ length: Math.min(n, tasks.length) }, () => worker()));
82
+ return results;
83
+ }
84
+ function parseTotalFromHeading(content) {
85
+ const m = content.match(/^#\s+([\d,]+)\+?\s+/m);
86
+ return m ? parseInt(m[1].replace(/,/g, ''), 10) : 0;
87
+ }
88
+ // ── LinkedIn Parser ────────────────────────────────────────────────────
89
+ function parseLinkedIn(content, searchUrl, limit) {
90
+ const jobs = [];
91
+ const totalFound = parseTotalFromHeading(content);
92
+ // Each card starts with "- [Title](url)" in the markdown list
93
+ const blocks = content.split(/\n-\s+\[/).slice(1);
94
+ for (const block of blocks) {
95
+ if (jobs.length >= limit)
96
+ break;
97
+ // Link: we stripped the leading "- [", so block starts with "Title](url)…"
98
+ const lm = block.match(/^([^\]]+)\]\(([^)]+)\)/);
99
+ if (!lm)
100
+ continue;
101
+ const detailUrl = lm[2];
102
+ if (!detailUrl.includes('linkedin.com/jobs/view/'))
103
+ continue;
104
+ // Title from ### heading (preferred) or link text
105
+ const hm = block.match(/###\s+(.+)/);
106
+ const title = clean(hm ? hm[1] : lm[1]);
107
+ if (!title)
108
+ continue;
109
+ // Company from #### [Company](url) or #### Company
110
+ const cm = block.match(/####\s+\[([^\]]+)\]/) || block.match(/####\s+(.+)/);
111
+ const company = cm ? clean(cm[1]) : '';
112
+ // Scan remaining lines for location + date
113
+ // Only look at lines AFTER the #### company heading
114
+ let location = '';
115
+ let postedAt;
116
+ let pastCompany = false;
117
+ for (const raw of block.split('\n')) {
118
+ const l = raw.trim();
119
+ if (!l)
120
+ continue;
121
+ // Skip everything until we're past the company heading
122
+ if (l.startsWith('####')) {
123
+ pastCompany = true;
124
+ continue;
125
+ }
126
+ if (!pastCompany)
127
+ continue;
128
+ if (l.startsWith('#') || l.startsWith('[') || l.startsWith('-') || l === 'Actively Hiring' || l === 'Promoted')
129
+ continue;
130
+ // Skip lines that contain URLs
131
+ if (l.includes('http://') || l.includes('https://'))
132
+ continue;
133
+ // Date-only line
134
+ const dateCandidate = findDate(l);
135
+ // Location line that may have date appended: "New York, NY 2 weeks ago"
136
+ if (!location && /^[A-Z][a-z]+.*,\s*[A-Z]/.test(l)) {
137
+ // Split off trailing date if present
138
+ const dateInLine = findDate(l);
139
+ if (dateInLine) {
140
+ postedAt = dateInLine;
141
+ location = clean(l.replace(/\d+\s+(?:week|day|month|hour|minute)s?\s+ago/i, '').replace(/\d+[dhm]\s*$/i, ''));
142
+ }
143
+ else {
144
+ location = clean(l);
145
+ }
146
+ continue;
147
+ }
148
+ if (dateCandidate && l.length < 30) {
149
+ postedAt = dateCandidate;
150
+ }
151
+ }
152
+ jobs.push({
153
+ title, company, location,
154
+ salary: findSalary(block),
155
+ remote: hasRemote(block),
156
+ postedAt,
157
+ detailUrl: absUrl(detailUrl, searchUrl),
158
+ source: 'linkedin',
159
+ });
160
+ }
161
+ return { jobs, totalFound: totalFound || jobs.length };
162
+ }
163
+ // ── Glassdoor Parser ───────────────────────────────────────────────────
164
+ function parseGlassdoor(content, searchUrl, limit) {
165
+ const jobs = [];
166
+ const totalFound = parseTotalFromHeading(content);
167
+ // Each card is a top-level list item: company, rating, [Title](url), location, salary, snippet, skills, date
168
+ const blocks = content.split(/\n-\s+/).slice(1);
169
+ for (const block of blocks) {
170
+ if (jobs.length >= limit)
171
+ break;
172
+ const lines = block.split('\n').map(l => l.trim()).filter(Boolean);
173
+ if (lines.length < 2)
174
+ continue;
175
+ // Job title link
176
+ const lm = block.match(/\[([^\]]+)\]\((https?:\/\/[^\s)]*glassdoor\.com\/job-listing\/[^)]+)\)/);
177
+ if (!lm)
178
+ continue;
179
+ const title = clean(lm[1]);
180
+ const detailUrl = lm[2];
181
+ // Company + rating come before the link
182
+ let company = '';
183
+ let rating;
184
+ for (const l of lines) {
185
+ if (l.includes('[') && l.includes('glassdoor.com'))
186
+ break;
187
+ const rm = l.match(/^(\d\.\d)$/);
188
+ if (rm) {
189
+ rating = parseFloat(rm[1]);
190
+ continue;
191
+ }
192
+ if (!company && l.length > 1 && !/^\d/.test(l))
193
+ company = clean(l);
194
+ }
195
+ // Fields after the title link
196
+ let location = '';
197
+ let salary;
198
+ let snippet;
199
+ let skills;
200
+ let postedAt;
201
+ let pastLink = false;
202
+ for (const l of lines) {
203
+ if (l.includes(title) || l.includes('glassdoor.com/job-listing/')) {
204
+ pastLink = true;
205
+ continue;
206
+ }
207
+ if (!pastLink)
208
+ continue;
209
+ const sm = l.match(/\*\*Skills?:\*\*\s*(.+)/i);
210
+ if (sm) {
211
+ skills = sm[1].split(',').map(s => s.trim()).filter(Boolean);
212
+ continue;
213
+ }
214
+ if (/^\d+[dwm]$/.test(l)) {
215
+ postedAt = l;
216
+ continue;
217
+ }
218
+ if (!salary && /\$/.test(l)) {
219
+ salary = findSalary(l) || clean(l);
220
+ continue;
221
+ }
222
+ if (!location && /^[A-Z][a-z]+.*,\s*[A-Z]{2}/.test(l)) {
223
+ location = clean(l);
224
+ continue;
225
+ }
226
+ if (!snippet && l.length > 40 && !l.startsWith('**'))
227
+ snippet = clean(l);
228
+ }
229
+ jobs.push({
230
+ title, company, location, salary,
231
+ remote: hasRemote(block), postedAt,
232
+ detailUrl: absUrl(detailUrl, searchUrl),
233
+ snippet, skills, rating,
234
+ source: 'glassdoor',
235
+ });
236
+ }
237
+ return { jobs, totalFound: totalFound || jobs.length };
238
+ }
239
+ // ── Indeed Parser ──────────────────────────────────────────────────────
240
+ function parseIndeed(content, _searchUrl, limit) {
241
+ const jobs = [];
242
+ // Indeed markdown: job listings as list items with [Title](url), company, location, salary
243
+ // Also try HTML attribute patterns in case raw HTML leaks through
244
+ const htmlJobRe = /id="job_([a-f0-9]+)"[^>]*>.*?<span\s+title="([^"]+)"[^>]*>[^<]*<\/span>/gs;
245
+ const htmlJobs = [...content.matchAll(htmlJobRe)];
246
+ if (htmlJobs.length > 0) {
247
+ // HTML mode — parse HTML attributes directly
248
+ const companyRe = /data-testid="company-name"[^>]*>([^<]+)<\/span>/g;
249
+ const locRe = /data-testid="text-location"[^>]*>([^<]+)<\/div>/g;
250
+ const cm = [...content.matchAll(companyRe)];
251
+ const lm = [...content.matchAll(locRe)];
252
+ for (let i = 0; i < htmlJobs.length && jobs.length < limit; i++) {
253
+ const jk = htmlJobs[i][1];
254
+ const title = clean(htmlJobs[i][2]);
255
+ jobs.push({
256
+ title,
257
+ company: cm[i] ? clean(cm[i][1]) : '',
258
+ location: lm[i] ? clean(lm[i][1]) : '',
259
+ salary: findSalary(content.slice(htmlJobs[i].index || 0, (htmlJobs[i + 1]?.index) || content.length)),
260
+ remote: false,
261
+ detailUrl: `https://www.indeed.com/viewjob?jk=${jk}`,
262
+ source: 'indeed',
263
+ });
264
+ }
265
+ }
266
+ else {
267
+ // Markdown mode — parse the converted markdown output
268
+ // Indeed search results have title links followed by company, location, salary lines
269
+ const blocks = content.split(/\n-\s+/).slice(1);
270
+ for (const block of blocks) {
271
+ if (jobs.length >= limit)
272
+ break;
273
+ // Title link: [Job Title](url)
274
+ const lm = block.match(/\[([^\]]+)\]\((https?:\/\/[^\s)]*indeed\.com\/[^)]*(?:viewjob|rc\/clk)[^)]*)\)/);
275
+ if (!lm)
276
+ continue;
277
+ const title = clean(lm[1]);
278
+ let detailUrl = lm[2];
279
+ // Extract jk parameter from URL for clean detail URL
280
+ const jkMatch = detailUrl.match(/[?&]jk=([a-f0-9]+)/);
281
+ if (jkMatch)
282
+ detailUrl = `https://www.indeed.com/viewjob?jk=${jkMatch[1]}`;
283
+ // Parse remaining lines for company, location, salary
284
+ const lines = block.split('\n').map(l => l.trim()).filter(Boolean);
285
+ let company = '';
286
+ let location = '';
287
+ let salary;
288
+ for (const l of lines) {
289
+ if (l.includes(title) || l.includes('indeed.com'))
290
+ continue;
291
+ if (!salary) {
292
+ const s = findSalary(l);
293
+ if (s) {
294
+ salary = s;
295
+ continue;
296
+ }
297
+ }
298
+ // Company is usually the first non-title, non-link, non-salary text
299
+ if (!company && l.length > 2 && !l.startsWith('[') && !l.startsWith('#') && !/^\d/.test(l)) {
300
+ company = clean(l);
301
+ continue;
302
+ }
303
+ // Location matches City, ST pattern
304
+ if (!location && /^[A-Z][a-z]+.*,\s*[A-Z]{2}/.test(l)) {
305
+ location = clean(l);
306
+ }
307
+ }
308
+ if (title) {
309
+ jobs.push({
310
+ title, company, location, salary,
311
+ remote: hasRemote(block),
312
+ detailUrl,
313
+ source: 'indeed',
314
+ });
315
+ }
316
+ }
317
+ }
318
+ const totalRe = content.match(/(?:of|about)\s+([\d,]+)\s+jobs/i) || content.match(/([\d,]+)\s+jobs/i);
319
+ const totalFound = totalRe ? parseInt(totalRe[1].replace(/,/g, ''), 10) : jobs.length;
320
+ return { jobs, totalFound };
321
+ }
322
+ // ── Upwork Parser ──────────────────────────────────────────────────────
323
+ /**
324
+ * Parse Upwork job search results from converted markdown.
325
+ *
326
+ * Upwork search URL pattern:
327
+ * https://www.upwork.com/nx/search/jobs/?q=AI+engineer&sort=recency
328
+ *
329
+ * The markdown representation varies; we handle both list-item blocks and
330
+ * heading-separated blocks.
331
+ */
332
+ function parseUpwork(content, searchUrl, limit) {
333
+ const jobs = [];
334
+ // Total count — Upwork often shows "X+ jobs found"
335
+ const totalMatch = content.match(/(\d[\d,]*)\+?\s+(?:jobs?|results?)\s+(?:found|available|match)/i);
336
+ const totalFound = totalMatch ? parseInt(totalMatch[1].replace(/,/g, ''), 10) : 0;
337
+ // Split into job blocks — each job typically starts with a link to /jobs/
338
+ // Pattern: [Job Title](https://www.upwork.com/jobs/...)
339
+ const jobLinkRe = /\[([^\]]+)\]\((https:\/\/www\.upwork\.com\/jobs\/[^)]+)\)/g;
340
+ const titleMatches = [...content.matchAll(jobLinkRe)];
341
+ if (titleMatches.length === 0) {
342
+ // Fallback: try /nx/jobs/ links (search page variant)
343
+ const altLinkRe = /\[([^\]]+)\]\((https:\/\/www\.upwork\.com\/(?:nx\/)?(?:jobs?|freelance-jobs?)[^)]*)\)/g;
344
+ const altMatches = [...content.matchAll(altLinkRe)];
345
+ if (altMatches.length === 0) {
346
+ return { jobs, totalFound };
347
+ }
348
+ titleMatches.push(...altMatches);
349
+ }
350
+ for (let i = 0; i < titleMatches.length && jobs.length < limit; i++) {
351
+ const match = titleMatches[i];
352
+ const title = clean(match[1]);
353
+ const detailUrl = absUrl(match[2], searchUrl);
354
+ // Extract the block of text between this match and the next
355
+ const blockStart = match.index ?? 0;
356
+ const blockEnd = (titleMatches[i + 1]?.index) ?? content.length;
357
+ const block = content.slice(blockStart, blockEnd);
358
+ // Budget / hourly rate — look for $ amounts near keywords
359
+ let budget;
360
+ const budgetMatch = block.match(/\$[\d,]+(?:\.\d+)?(?:\s*[-–]\s*\$[\d,]+(?:\.\d+)?)?\s*(?:\/\s*hr|per\s+hour|hourly)?/i) ||
361
+ block.match(/(?:budget|fixed[\s-]?price|hourly\s+rate)[:\s]+\$[\d,]+(?:\s*[-–]\s*\$[\d,]+)?/i);
362
+ if (budgetMatch)
363
+ budget = budgetMatch[0].trim();
364
+ // Job type
365
+ let jobType;
366
+ if (/\bhourly\b/i.test(block))
367
+ jobType = 'hourly';
368
+ else if (/\bfixed[\s-]?price\b/i.test(block))
369
+ jobType = 'fixed-price';
370
+ // Experience level
371
+ let experienceLevel;
372
+ const expMatch = block.match(/\b(entry[- ]?level|intermediate|expert|beginner)\b/i);
373
+ if (expMatch)
374
+ experienceLevel = expMatch[1];
375
+ // Client rating
376
+ let clientRating;
377
+ const ratingMatch = block.match(/(\d+(?:\.\d+)?)\s*(?:of\s+5\s+)?(?:stars?|★)/i);
378
+ if (ratingMatch) {
379
+ const r = parseFloat(ratingMatch[1]);
380
+ if (r >= 0 && r <= 5)
381
+ clientRating = r;
382
+ }
383
+ // Client spend
384
+ let clientSpend;
385
+ const spendMatch = block.match(/\$[\d,.]+[KkMm]?\+?\s*(?:spent|total\s+spent)/i);
386
+ if (spendMatch)
387
+ clientSpend = spendMatch[0].replace(/\s*(?:spent|total\s+spent)/i, '').trim();
388
+ // Skills (look for "Skills:" or comma-separated tech terms)
389
+ let skills;
390
+ const skillsMatch = block.match(/(?:skills?|tags?)[:\s]+([^\n]+)/i);
391
+ if (skillsMatch) {
392
+ skills = skillsMatch[1].split(/[,;]/).map((s) => s.trim()).filter((s) => s.length > 1 && s.length < 40);
393
+ }
394
+ // Posted time
395
+ const postedAt = findDate(block);
396
+ // Description snippet — first substantial non-metadata line after the title
397
+ let snippet;
398
+ const lines = block.split('\n').map((l) => l.trim()).filter(Boolean);
399
+ for (const line of lines) {
400
+ if (line === title)
401
+ continue;
402
+ if (line.startsWith('[') || line.startsWith('http'))
403
+ continue;
404
+ if (/^\$/.test(line) || /^\d+\s*(?:star|hour|day|week|month|review)/i.test(line))
405
+ continue;
406
+ if (line.length > 60) {
407
+ snippet = clean(line).slice(0, 200);
408
+ break;
409
+ }
410
+ }
411
+ if (!title)
412
+ continue;
413
+ jobs.push({
414
+ title,
415
+ company: '', // Upwork jobs don't surface a company on the search page
416
+ location: 'Remote', // Upwork is inherently remote
417
+ remote: true,
418
+ salary: budget, // Reuse salary field for budget display
419
+ budget,
420
+ jobType,
421
+ experienceLevel,
422
+ clientRating,
423
+ clientSpend,
424
+ skills,
425
+ snippet,
426
+ postedAt,
427
+ detailUrl,
428
+ source: 'upwork',
429
+ });
430
+ }
431
+ return { jobs, totalFound: totalFound || jobs.length };
432
+ }
433
+ // Match both heading markers (## Section) and bold markers (**Section:**)
434
+ const SEC_DESC = /(?:#{1,4}\s*|^\*\*)(?:(?:full\s+)?job\s+description|about\s+(?:the\s+)?(?:role|position|job|opportunity)|overview|summary)\*?\*?:?\s*$/im;
435
+ const SEC_REQ = /(?:#{1,4}\s*|^\*\*)(?:requirements?|qualifications?|what\s+(?:you(?:'ll)?\s+)?(?:need|bring)|minimum\s+qualifications?|must\s+have|what\s+we(?:'re)?\s+look(?:ing)?\s+for|nice\s+to\s+have)\*?\*?:?\s*$/im;
436
+ const SEC_RESP = /(?:#{1,4}\s*|^\*\*)(?:responsibilities|what\s+you(?:'ll)?\s+do|duties|key\s+responsibilities|your\s+role|in\s+this\s+role)\*?\*?:?\s*$/im;
437
+ const SEC_BEN = /(?:#{1,4}\s*|^\*\*)(?:benefits?|perks?|what\s+we\s+offer|compensation(?:\s+and\s+benefits)?|why\s+(?:join|work)|our\s+offer)\*?\*?:?\s*$/im;
438
+ function extractBullets(text) {
439
+ const out = [];
440
+ for (const line of text.split('\n')) {
441
+ const t = line.replace(/^[-*•]\s+/, '').trim();
442
+ if (t.length > 5)
443
+ out.push(t);
444
+ }
445
+ return out.length > 0 ? out : undefined;
446
+ }
447
+ function splitSections(content) {
448
+ const res = {};
449
+ let cur = null;
450
+ let buf = [];
451
+ function flush() {
452
+ const txt = buf.join('\n').trim();
453
+ buf = [];
454
+ if (!txt || !cur)
455
+ return;
456
+ if (cur === 'desc')
457
+ res.description = txt;
458
+ else if (cur === 'req')
459
+ res.requirements = extractBullets(txt);
460
+ else if (cur === 'resp')
461
+ res.responsibilities = extractBullets(txt);
462
+ else if (cur === 'ben')
463
+ res.benefits = extractBullets(txt);
464
+ }
465
+ for (const line of content.split('\n')) {
466
+ if (SEC_DESC.test(line)) {
467
+ flush();
468
+ cur = 'desc';
469
+ continue;
470
+ }
471
+ if (SEC_REQ.test(line)) {
472
+ flush();
473
+ cur = 'req';
474
+ continue;
475
+ }
476
+ if (SEC_RESP.test(line)) {
477
+ flush();
478
+ cur = 'resp';
479
+ continue;
480
+ }
481
+ if (SEC_BEN.test(line)) {
482
+ flush();
483
+ cur = 'ben';
484
+ continue;
485
+ }
486
+ if (cur && /^#{1,4}\s+/.test(line)) {
487
+ flush();
488
+ cur = null;
489
+ continue;
490
+ }
491
+ if (cur)
492
+ buf.push(line);
493
+ }
494
+ flush();
495
+ if (!res.description)
496
+ res.description = content.slice(0, 2000).trim();
497
+ return res;
498
+ }
499
+ /** Strip trailing noise sections (similar jobs, people also viewed, etc.) */
500
+ function stripDetailNoise(content) {
501
+ const cutPatterns = [
502
+ /^#{1,3}\s*similar\s+jobs/im,
503
+ /^#{1,3}\s*people\s+also\s+viewed/im,
504
+ /^#{1,3}\s*similar\s+searches/im,
505
+ /^#{1,3}\s*explore\s+collaborative/im,
506
+ /^#{1,3}\s*seniority\s+level/im,
507
+ /^#{1,3}\s*company\s+and\s+salary/im,
508
+ /^#{1,3}\s*career\s+guide/im,
509
+ /^#{1,3}\s*jobs\s+with\s+similar/im,
510
+ /^#{1,3}\s*similar\s+jobs?\s+nearby/im,
511
+ ];
512
+ let result = content;
513
+ for (const pattern of cutPatterns) {
514
+ const m = pattern.exec(result);
515
+ if (m && m.index !== undefined && m.index > result.length * 0.3) {
516
+ result = result.slice(0, m.index).trim();
517
+ }
518
+ }
519
+ return result;
520
+ }
521
+ function parseJobDetail(content, card) {
522
+ // Strip noise sections before parsing
523
+ const cleaned = stripDetailNoise(content);
524
+ const sections = splitSections(cleaned);
525
+ const empMatch = cleaned.match(/\b(full[- ]?time|part[- ]?time|contract|internship|freelance|temporary)\b/i);
526
+ const expMatch = cleaned.match(/\b(entry[- ]?level|mid[- ]?level|senior|lead|principal|staff|junior|intern)\b/i);
527
+ const applyMatch = cleaned.match(/\[(?:apply|submit)[^\]]*\]\(([^)]+)\)/i) ||
528
+ cleaned.match(/href="([^"]*(?:apply|submit|careers)[^"]*)"/i);
529
+ // Salary from "## Pay found in job post" or "### Base pay range" sections
530
+ const salary = card.salary || findSalary(cleaned);
531
+ return {
532
+ ...card,
533
+ salary: salary || card.salary,
534
+ description: sections.description || cleaned.slice(0, 3000),
535
+ requirements: sections.requirements,
536
+ responsibilities: sections.responsibilities,
537
+ benefits: sections.benefits,
538
+ applyUrl: applyMatch ? applyMatch[1] : undefined,
539
+ employmentType: empMatch ? empMatch[1].toLowerCase().replace(/\s+/g, '-') : undefined,
540
+ experienceLevel: expMatch ? expMatch[1] : undefined,
541
+ };
542
+ }
543
+ // ── Main ───────────────────────────────────────────────────────────────
544
+ /**
545
+ * Search job boards and return structured results.
546
+ *
547
+ * Uses `peel()` internally so all smart-escalation / stealth logic applies.
548
+ * Call `cleanup()` from the main webpeel export when you're done with all
549
+ * fetching (this module does **not** call it automatically because the
550
+ * browser instance is shared across the library).
551
+ */
552
+ export async function searchJobs(options) {
553
+ const startTime = Date.now();
554
+ const { url, keywords = '', location = '', source: reqSource = 'linkedin', limit = 25, fetchDetails = 0, timeout = 30000, } = options;
555
+ // 1. Determine source & URL
556
+ let searchUrl;
557
+ let source;
558
+ if (url) {
559
+ searchUrl = url;
560
+ source = detectSource(url);
561
+ }
562
+ else {
563
+ if (!keywords)
564
+ throw new Error('Either url or keywords must be provided');
565
+ source = reqSource;
566
+ searchUrl = buildSearchUrl(source, keywords, location);
567
+ }
568
+ // 2. Fetch search page
569
+ const needsStealth = stealthNeeded(source);
570
+ const result = await peel(searchUrl, {
571
+ stealth: needsStealth,
572
+ render: needsStealth, // Stealth sites are usually SPAs requiring browser rendering
573
+ timeout,
574
+ format: 'markdown',
575
+ });
576
+ // 3. Parse job cards
577
+ let parsed;
578
+ switch (source) {
579
+ case 'linkedin':
580
+ parsed = parseLinkedIn(result.content, searchUrl, limit);
581
+ break;
582
+ case 'glassdoor':
583
+ parsed = parseGlassdoor(result.content, searchUrl, limit);
584
+ break;
585
+ case 'indeed':
586
+ parsed = parseIndeed(result.content, searchUrl, limit);
587
+ break;
588
+ case 'upwork':
589
+ parsed = parseUpwork(result.content, searchUrl, limit);
590
+ break;
591
+ default: {
592
+ // Try each parser for unknown URLs
593
+ parsed = parseLinkedIn(result.content, searchUrl, limit);
594
+ if (!parsed.jobs.length)
595
+ parsed = parseGlassdoor(result.content, searchUrl, limit);
596
+ if (!parsed.jobs.length)
597
+ parsed = parseIndeed(result.content, searchUrl, limit);
598
+ if (!parsed.jobs.length)
599
+ parsed = parseUpwork(result.content, searchUrl, limit);
600
+ break;
601
+ }
602
+ }
603
+ // 4. Optionally fetch detail pages (max 3 concurrent)
604
+ let detailsFetched = 0;
605
+ let jobs = parsed.jobs;
606
+ if (fetchDetails > 0 && parsed.jobs.length > 0) {
607
+ const toFetch = parsed.jobs.slice(0, fetchDetails);
608
+ const srcForStealth = source;
609
+ const tasks = toFetch.map((card) => async () => {
610
+ try {
611
+ const dr = await peel(card.detailUrl, {
612
+ stealth: stealthNeeded(srcForStealth),
613
+ timeout,
614
+ format: 'markdown',
615
+ });
616
+ detailsFetched++;
617
+ return parseJobDetail(dr.content, card);
618
+ }
619
+ catch {
620
+ return card; // graceful fallback
621
+ }
622
+ });
623
+ const detailed = await pLimited(tasks, 3);
624
+ jobs = [...detailed, ...parsed.jobs.slice(fetchDetails)];
625
+ }
626
+ return {
627
+ jobs,
628
+ totalFound: parsed.totalFound,
629
+ source,
630
+ searchUrl,
631
+ detailsFetched,
632
+ timeTakenMs: Date.now() - startTime,
633
+ };
634
+ }
@@ -0,0 +1,15 @@
1
+ /**
2
+ * JSON-LD Structured Data Extractor
3
+ *
4
+ * Extracts and converts JSON-LD (schema.org) data to clean markdown.
5
+ * Handles Recipe, Product, Article, FAQPage, HowTo, Event, LocalBusiness, Review.
6
+ * This is a FIRST-CLASS content source — tried before HTML DOM parsing.
7
+ */
8
+ export interface JsonLdResult {
9
+ found: boolean;
10
+ type: string;
11
+ content: string;
12
+ title: string;
13
+ data: any;
14
+ }
15
+ export declare function extractJsonLd(html: string): JsonLdResult | null;