@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,156 @@
1
+ import { simpleFetch } from '../../core/fetcher.js';
2
+ import { fetchRedfinListings, formatRedfinListings } from './zillow.js';
3
+ // ---------------------------------------------------------------------------
4
+ // Redfin extractor — live listings via Redfin's internal stingray API
5
+ // ---------------------------------------------------------------------------
6
+ export async function redfinExtractor(_html, url) {
7
+ try {
8
+ const u = new URL(url);
9
+ const path = u.pathname;
10
+ // ── Pattern 1: /city/{id}/{state}/{city-name} ───────────────────────────
11
+ // e.g. redfin.com/city/30749/NY/New-York
12
+ const cityMatch = path.match(/^\/city\/(\d+)\/([A-Z]{2})\/([^/]+)/);
13
+ if (cityMatch) {
14
+ const regionId = cityMatch[1];
15
+ const stateCode = cityMatch[2];
16
+ const citySlug = cityMatch[3];
17
+ const cityName = citySlug.replace(/-/g, ' ');
18
+ const locationLabel = `${cityName}, ${stateCode}`;
19
+ const payload = await fetchRedfinListings(regionId, 6 /* city */);
20
+ if (payload?.homes && payload.homes.length > 0) {
21
+ return formatRedfinListings(payload.homes, locationLabel, url, payload.searchMedian);
22
+ }
23
+ }
24
+ // ── Pattern 2: /{state}/{city} or /{state}/{city}/filter/... ───────────
25
+ // e.g. redfin.com/NY/New-York or redfin.com/NY/Brooklyn
26
+ const stateCity = path.match(/^\/([A-Z]{2})\/([^/]+)(?:\/|$)/);
27
+ if (stateCity) {
28
+ const stateCode = stateCity[1];
29
+ const citySlug = stateCity[2];
30
+ const cityName = citySlug.replace(/-/g, ' ');
31
+ const locationLabel = `${cityName}, ${stateCode}`;
32
+ // No region ID in URL — use known Redfin city region IDs (region_type=6)
33
+ const cityRegionMap = {
34
+ 'NY-New-York': 30749, 'NY-Brooklyn': 30749, 'NY-Queens': 30749, 'NY-Bronx': 30749,
35
+ 'NY-Staten-Island': 30749, 'NY-Manhattan': 30749,
36
+ 'CA-Los-Angeles': 11203, 'CA-San-Francisco': 17151, 'CA-San-Diego': 18142,
37
+ 'CA-San-Jose': 17420,
38
+ 'TX-Houston': 30772, 'TX-Dallas': 35799, 'TX-Austin': 30818,
39
+ 'FL-Miami': 10201, 'FL-Orlando': 13140, 'FL-Tampa': 18280,
40
+ 'IL-Chicago': 29470, 'WA-Seattle': 16163, 'MA-Boston': 1826,
41
+ 'AZ-Phoenix': 14240, 'PA-Philadelphia': 13364, 'GA-Atlanta': 30756,
42
+ 'CO-Denver': 11093, 'MN-Minneapolis': 18959, 'OR-Portland': 14941,
43
+ 'NV-Las-Vegas': 32820, 'NC-Charlotte': 3105, 'OH-Columbus': 8528,
44
+ };
45
+ const marketKey = `${stateCode}-${citySlug}`;
46
+ const marketId = cityRegionMap[marketKey];
47
+ if (marketId) {
48
+ const payload = await fetchRedfinListings(marketId, 6 /* city */);
49
+ if (payload?.homes && payload.homes.length > 0) {
50
+ return formatRedfinListings(payload.homes, locationLabel, url, payload.searchMedian);
51
+ }
52
+ }
53
+ // Fallback: return helpful info about what Redfin offers
54
+ const cleanContent = [
55
+ `# 🏠 Redfin — ${locationLabel}`,
56
+ '',
57
+ `*Redfin listing search for ${locationLabel}*`,
58
+ '',
59
+ '> 💡 For the best results, use a city URL with a region ID:',
60
+ `> \`webpeel "https://www.redfin.com/city/{id}/${stateCode}/${citySlug}"\``,
61
+ '',
62
+ `**[Browse ${cityName} on Redfin](${url})**`,
63
+ ].join('\n');
64
+ return {
65
+ domain: 'redfin.com',
66
+ type: 'real-estate-search',
67
+ structured: { city: cityName, state: stateCode },
68
+ cleanContent,
69
+ };
70
+ }
71
+ // ── Pattern 3: Individual property page ─────────────────────────────────
72
+ // e.g. /NY/New-York/123-Main-St-10001/home/12345678
73
+ const propMatch = path.match(/^\/([A-Z]{2})\/([^/]+)\/(.+?)\/home\/(\d+)/);
74
+ if (propMatch) {
75
+ const stateCode = propMatch[1];
76
+ const citySlug = propMatch[2];
77
+ const addressSlug = propMatch[3];
78
+ const propertyId = propMatch[4];
79
+ const address = addressSlug.replace(/-/g, ' ');
80
+ const city = citySlug.replace(/-/g, ' ');
81
+ // Use the Redfin GIS API for a single property by ID
82
+ const apiUrl = `https://www.redfin.com/stingray/api/home/details/aboveTheFold?propertyId=${propertyId}&accessLevel=1`;
83
+ try {
84
+ const resp = await simpleFetch(apiUrl, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', 30000, { 'Accept': 'application/json', 'Referer': 'https://www.redfin.com/' });
85
+ if (resp && (!resp.statusCode || resp.statusCode < 400)) {
86
+ const raw = resp.html.replace(/^\{\}&&/, '');
87
+ const data = JSON.parse(raw);
88
+ if (data.resultCode === 0 && data.payload) {
89
+ const p = data.payload;
90
+ const price = p.basicInfo?.price?.amount;
91
+ const beds = p.basicInfo?.beds;
92
+ const baths = p.basicInfo?.baths;
93
+ const sqft = p.basicInfo?.sqFt;
94
+ const status = p.basicInfo?.status;
95
+ const desc = p.basicInfo?.description;
96
+ const cleanContent = [
97
+ `# 🏠 ${address}, ${city}, ${stateCode}`,
98
+ '',
99
+ price ? `**Price:** $${Number(price).toLocaleString()}` : '',
100
+ [beds && `${beds} beds`, baths && `${baths} baths`, sqft && `${Number(sqft).toLocaleString()} sqft`].filter(Boolean).join(' · '),
101
+ status ? `**Status:** ${status}` : '',
102
+ '',
103
+ desc ? `## Description\n\n${desc.slice(0, 800)}${desc.length > 800 ? '…' : ''}` : '',
104
+ '',
105
+ `[View on Redfin](${url})`,
106
+ ].filter(Boolean).join('\n');
107
+ return {
108
+ domain: 'redfin.com',
109
+ type: 'property',
110
+ structured: { address, city, state: stateCode, propertyId, price, beds, baths, sqFt: sqft, status },
111
+ cleanContent,
112
+ };
113
+ }
114
+ }
115
+ }
116
+ catch (e) {
117
+ if (process.env.DEBUG)
118
+ console.debug('[webpeel]', 'Redfin property detail error:', e instanceof Error ? e.message : e);
119
+ }
120
+ // Fallback for property pages
121
+ return {
122
+ domain: 'redfin.com',
123
+ type: 'property',
124
+ structured: { address, city, state: stateCode, propertyId },
125
+ cleanContent: `# 🏠 ${address}, ${city}, ${stateCode}\n\n[View on Redfin](${url})`,
126
+ };
127
+ }
128
+ // ── Pattern 4: Homepage or general search ───────────────────────────────
129
+ // Return info about how to use Redfin extractor
130
+ return {
131
+ domain: 'redfin.com',
132
+ type: 'homepage',
133
+ structured: {},
134
+ cleanContent: [
135
+ '# 🏠 Redfin — Real Estate Listings',
136
+ '',
137
+ 'For live MLS listings, use a city or neighborhood URL:',
138
+ '',
139
+ '**City search:**',
140
+ '- `webpeel "https://www.redfin.com/city/30749/NY/New-York"` — NYC listings',
141
+ '- `webpeel "https://www.redfin.com/city/17184/CA/Los-Angeles"` — LA listings',
142
+ '',
143
+ '**State/city search:**',
144
+ '- `webpeel "https://www.redfin.com/NY/New-York"` — NYC',
145
+ '- `webpeel "https://www.redfin.com/CA/San-Francisco"` — SF',
146
+ '',
147
+ '*Redfin uses live MLS data — no bot detection blocks WebPeel.*',
148
+ ].join('\n'),
149
+ };
150
+ }
151
+ catch (e) {
152
+ if (process.env.DEBUG)
153
+ console.debug('[webpeel]', 'Redfin extractor error:', e instanceof Error ? e.message : e);
154
+ return null;
155
+ }
156
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function semanticScholarExtractor(_html: string, url: string): Promise<DomainExtractResult | null>;
@@ -0,0 +1,131 @@
1
+ import { fetchJson } from './shared.js';
2
+ // ---------------------------------------------------------------------------
3
+ // Semantic Scholar extractor (Semantic Scholar API — free, no key needed)
4
+ // ---------------------------------------------------------------------------
5
+ export async function semanticScholarExtractor(_html, url) {
6
+ const urlObj = new URL(url);
7
+ const path = urlObj.pathname;
8
+ const domain = 'semanticscholar.org';
9
+ // --- Paper page: /paper/<title-slug>/<paperId> ---
10
+ const paperMatch = path.match(/^\/paper\/(?:[^/]+\/)?([a-f0-9]{40})/i);
11
+ if (paperMatch) {
12
+ const paperId = paperMatch[1];
13
+ try {
14
+ const fields = 'title,abstract,authors,year,citationCount,referenceCount,url,openAccessPdf,venue,publicationDate,tldr';
15
+ const apiUrl = `https://api.semanticscholar.org/graph/v1/paper/${paperId}?fields=${fields}`;
16
+ const data = await fetchJson(apiUrl);
17
+ if (!data)
18
+ return null;
19
+ // Rate limited — return null so pipeline falls back to browser rendering
20
+ if (data.code === '429' || (data.message && String(data.message).includes('Too Many Requests'))) {
21
+ return null;
22
+ }
23
+ if (!data.title)
24
+ return null;
25
+ const authors = data.authors || [];
26
+ const authorNames = authors.map((a) => a.name);
27
+ const authorLine = authorNames.length <= 5
28
+ ? authorNames.join(', ')
29
+ : `${authorNames.slice(0, 5).join(', ')} (+${authorNames.length - 5} more)`;
30
+ const pdfObj = data.openAccessPdf;
31
+ const pdfUrl = pdfObj?.url || null;
32
+ const tldrText = data.tldr?.text || null;
33
+ const citations = data.citationCount;
34
+ const citStr = citations != null ? citations.toLocaleString() : '?';
35
+ const structured = {
36
+ paperId,
37
+ title: data.title,
38
+ authors: authorNames,
39
+ year: data.year,
40
+ venue: data.venue,
41
+ citationCount: data.citationCount,
42
+ referenceCount: data.referenceCount,
43
+ abstract: data.abstract,
44
+ tldr: tldrText,
45
+ pdfUrl,
46
+ url: data.url,
47
+ publicationDate: data.publicationDate,
48
+ };
49
+ const lines = [
50
+ `# 📄 ${data.title}`,
51
+ '',
52
+ `**Authors:** ${authorLine}`,
53
+ `**Year:** ${data.year || '?'} | **Venue:** ${data.venue || 'N/A'} | **Citations:** ${citStr}`,
54
+ ];
55
+ if (data.referenceCount != null)
56
+ lines.push(`**References:** ${data.referenceCount.toLocaleString()}`);
57
+ if (tldrText) {
58
+ lines.push('', '## TL;DR', '', tldrText);
59
+ }
60
+ if (data.abstract) {
61
+ lines.push('', '## Abstract', '', data.abstract);
62
+ }
63
+ lines.push('');
64
+ if (pdfUrl)
65
+ lines.push(`**PDF:** [Open Access](${pdfUrl})`);
66
+ lines.push(`**Link:** [Semantic Scholar](${data.url || `https://www.semanticscholar.org/paper/${paperId}`})`);
67
+ return {
68
+ domain,
69
+ type: 'paper',
70
+ structured,
71
+ cleanContent: lines.join('\n'),
72
+ };
73
+ }
74
+ catch (e) {
75
+ if (process.env.DEBUG)
76
+ console.debug('[webpeel]', 'Semantic Scholar paper API failed:', e instanceof Error ? e.message : e);
77
+ return null;
78
+ }
79
+ }
80
+ // --- Search page: /search?q=... ---
81
+ const query = urlObj.searchParams.get('q') || urlObj.searchParams.get('query');
82
+ if (path === '/search' || path.startsWith('/search/')) {
83
+ if (!query)
84
+ return null;
85
+ try {
86
+ const fields = 'title,authors,year,citationCount,url,openAccessPdf';
87
+ const apiUrl = `https://api.semanticscholar.org/graph/v1/paper/search?query=${encodeURIComponent(query)}&limit=10&fields=${fields}`;
88
+ const data = await fetchJson(apiUrl);
89
+ // Rate limited or no data — return null so pipeline falls back to browser rendering
90
+ if (!data)
91
+ return null;
92
+ if (data.code === '429' || (data.message && String(data.message).includes('Too Many Requests'))) {
93
+ return null;
94
+ }
95
+ if (!Array.isArray(data.data))
96
+ return null;
97
+ const papers = data.data;
98
+ const total = data.total || 0;
99
+ const rows = papers.map((p, i) => {
100
+ const authors = p.authors || [];
101
+ const authorLine = authors.length === 0 ? '—'
102
+ : authors.length === 1 ? authors[0].name
103
+ : `${authors[0].name} et al.`;
104
+ const paperUrl = p.url || `https://www.semanticscholar.org/paper/${p.paperId}`;
105
+ const cits = p.citationCount != null ? p.citationCount.toLocaleString() : '?';
106
+ return `| ${i + 1} | [${p.title}](${paperUrl}) | ${p.year || '?'} | ${cits} | ${authorLine} |`;
107
+ }).join('\n');
108
+ const cleanContent = [
109
+ `# 🔍 Semantic Scholar — "${query}"`,
110
+ '',
111
+ '| # | Paper | Year | Citations | Authors |',
112
+ '|---|-------|------|-----------|---------|',
113
+ rows,
114
+ '',
115
+ `*Source: Semantic Scholar API · Total results: ${total.toLocaleString()}*`,
116
+ ].join('\n');
117
+ return {
118
+ domain,
119
+ type: 'search',
120
+ structured: { query, total, papers },
121
+ cleanContent,
122
+ };
123
+ }
124
+ catch (e) {
125
+ if (process.env.DEBUG)
126
+ console.debug('[webpeel]', 'Semantic Scholar search API failed:', e instanceof Error ? e.message : e);
127
+ return null;
128
+ }
129
+ }
130
+ return null;
131
+ }
@@ -0,0 +1,12 @@
1
+ import { simpleFetch } from '../../core/fetcher.js';
2
+ export { simpleFetch };
3
+ export declare function tryParseJson(text: string): any;
4
+ /** Strip HTML tags from a string. */
5
+ export declare function stripHtml(str: string): string;
6
+ /** Format a Unix timestamp (seconds) as ISO 8601. */
7
+ export declare function unixToIso(sec: number): string;
8
+ /** Fetch JSON from a URL using simpleFetch (reuses WebPeel's HTTP stack). */
9
+ export declare function fetchJson(url: string, customHeaders?: Record<string, string>): Promise<any>;
10
+ /** Fetch JSON with exponential backoff retry on 429 / rate-limit errors. */
11
+ export declare function fetchJsonWithRetry(url: string, headers?: Record<string, string>, retries?: number, baseDelayMs?: number): Promise<any>;
12
+ /** Recursively search an object for a value matching predicate (BFS). */
@@ -0,0 +1,76 @@
1
+ import { simpleFetch } from '../../core/fetcher.js';
2
+ export { simpleFetch };
3
+ export function tryParseJson(text) {
4
+ try {
5
+ return JSON.parse(text);
6
+ }
7
+ catch {
8
+ return null;
9
+ }
10
+ }
11
+ /** Strip HTML tags from a string. */
12
+ export function stripHtml(str) {
13
+ return str.replace(/<[^>]+>/g, '').replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&quot;/g, '"').replace(/&#39;/g, "'").replace(/&nbsp;/g, ' ').trim();
14
+ }
15
+ /** Format a Unix timestamp (seconds) as ISO 8601. */
16
+ export function unixToIso(sec) {
17
+ return new Date(sec * 1000).toISOString();
18
+ }
19
+ /** Fetch JSON from a URL using simpleFetch (reuses WebPeel's HTTP stack). */
20
+ export async function fetchJson(url, customHeaders) {
21
+ // Use plain fetch (not simpleFetch) for JSON API calls.
22
+ // simpleFetch adds stealth browser headers (Sec-CH-UA, Sec-Fetch-*, etc.)
23
+ // which confuse API endpoints like api.github.com into returning HTML.
24
+ const controller = new AbortController();
25
+ const timer = setTimeout(() => controller.abort(), 15000);
26
+ try {
27
+ const resp = await fetch(url, {
28
+ headers: {
29
+ 'User-Agent': 'webpeel/0.21 (https://webpeel.dev)',
30
+ 'Accept': 'application/json',
31
+ ...customHeaders,
32
+ },
33
+ signal: controller.signal,
34
+ redirect: 'follow',
35
+ });
36
+ clearTimeout(timer);
37
+ // Surface 429 as a thrown error so callers can detect rate-limiting
38
+ // and the cache wrapper can serve stale results instead of garbage.
39
+ if (resp.status === 429) {
40
+ const err = new Error(`429 Too Many Requests: ${url}`);
41
+ err.statusCode = 429;
42
+ throw err;
43
+ }
44
+ const text = await resp.text();
45
+ const parsed = tryParseJson(text);
46
+ if (parsed === null && text.length > 0) {
47
+ console.warn(`[webpeel:fetchJson] Non-JSON response from ${url} (${text.length} bytes, status: ${resp.status}): ${text.slice(0, 120)}`);
48
+ }
49
+ return parsed;
50
+ }
51
+ catch (e) {
52
+ clearTimeout(timer);
53
+ throw e;
54
+ }
55
+ }
56
+ /** Fetch JSON with exponential backoff retry on 429 / rate-limit errors. */
57
+ export async function fetchJsonWithRetry(url, headers, retries = 2, baseDelayMs = 1000) {
58
+ for (let attempt = 0; attempt <= retries; attempt++) {
59
+ try {
60
+ const result = await fetchJson(url, headers);
61
+ return result;
62
+ }
63
+ catch (e) {
64
+ // Retry on rate-limit or transient errors
65
+ if (attempt < retries && (e.message?.includes('429') || e.message?.includes('rate') || e.message?.includes('Too Many'))) {
66
+ await new Promise(resolve => setTimeout(resolve, baseDelayMs * Math.pow(2, attempt)));
67
+ continue;
68
+ }
69
+ throw e;
70
+ }
71
+ }
72
+ }
73
+ // ---------------------------------------------------------------------------
74
+ // 1. Twitter / X extractor
75
+ // ---------------------------------------------------------------------------
76
+ /** Recursively search an object for a value matching predicate (BFS). */
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function soundcloudExtractor(_html: string, url: string): Promise<DomainExtractResult | null>;
@@ -0,0 +1,34 @@
1
+ import { fetchJson } from './shared.js';
2
+ // ---------------------------------------------------------------------------
3
+ // 28. SoundCloud extractor (oEmbed)
4
+ // ---------------------------------------------------------------------------
5
+ export async function soundcloudExtractor(_html, url) {
6
+ try {
7
+ const oembedUrl = `https://soundcloud.com/oembed?url=${encodeURIComponent(url)}&format=json`;
8
+ const data = await fetchJson(oembedUrl);
9
+ if (!data || !data.title)
10
+ return null;
11
+ const urlObj = new URL(url);
12
+ const pathParts = urlObj.pathname.split('/').filter(Boolean);
13
+ const isPlaylist = pathParts.includes('sets');
14
+ const contentType = isPlaylist ? 'playlist' : pathParts.length >= 2 ? 'track' : 'profile';
15
+ const structured = {
16
+ title: data.title,
17
+ author: data.author_name || '',
18
+ authorUrl: data.author_url || '',
19
+ thumbnailUrl: data.thumbnail_url || '',
20
+ description: data.description || '',
21
+ contentType,
22
+ provider: 'SoundCloud',
23
+ };
24
+ const typeEmoji = contentType === 'track' ? '🎵' : contentType === 'playlist' ? '📋' : '🎤';
25
+ const descLine = structured.description ? `\n\n${structured.description.substring(0, 500)}` : '';
26
+ const cleanContent = `## ${typeEmoji} SoundCloud ${contentType}: ${structured.title}${descLine}\n\n**Artist:** [${structured.author}](${structured.authorUrl})\n**URL:** ${url}`;
27
+ return { domain: 'soundcloud.com', type: contentType, structured, cleanContent };
28
+ }
29
+ catch (e) {
30
+ if (process.env.DEBUG)
31
+ console.debug('[webpeel]', 'SoundCloud oEmbed failed:', e instanceof Error ? e.message : e);
32
+ return null;
33
+ }
34
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function sportsBettingExtractor(_html: string, url: string): Promise<DomainExtractResult | null>;
@@ -0,0 +1,37 @@
1
+ // ---------------------------------------------------------------------------
2
+ // 37. Sports betting sites — helpful redirect message
3
+ // ---------------------------------------------------------------------------
4
+ export async function sportsBettingExtractor(_html, url) {
5
+ let brandName = 'Sports Betting Site';
6
+ let domain = 'sportsbook';
7
+ try {
8
+ const hostname = new URL(url).hostname.replace('www.', '').replace('sportsbook.', '');
9
+ domain = hostname;
10
+ if (hostname.includes('draftkings'))
11
+ brandName = 'DraftKings Sportsbook';
12
+ else if (hostname.includes('fanduel'))
13
+ brandName = 'FanDuel Sportsbook';
14
+ else if (hostname.includes('betmgm'))
15
+ brandName = 'BetMGM Sportsbook';
16
+ }
17
+ catch { /* ignore */ }
18
+ const cleanContent = `# ⚠️ ${brandName}
19
+
20
+ ${brandName} requires authentication and geo-verification. WebPeel cannot scrape live odds directly.
21
+
22
+ **For live sports odds, use these alternatives:**
23
+ - \`webpeel "https://espn.com/nba/scoreboard"\` — Live scores and schedules
24
+ - \`webpeel "https://polymarket.com"\` — Prediction market prices
25
+ - The Odds API (theOddsApi.com) — Aggregated odds from all sportsbooks (requires API key)
26
+
27
+ **For team schedules and standings:**
28
+ - \`webpeel "https://espn.com/nba/standings"\` — NBA standings
29
+ - \`webpeel "https://espn.com/nfl/scoreboard"\` — NFL scores
30
+ - \`webpeel "https://espn.com/mlb/scoreboard"\` — MLB scores`;
31
+ return {
32
+ domain,
33
+ type: 'blocked',
34
+ structured: { site: brandName, reason: 'authentication and geo-verification required' },
35
+ cleanContent,
36
+ };
37
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function spotifyExtractor(_html: string, url: string): Promise<DomainExtractResult | null>;
@@ -0,0 +1,34 @@
1
+ import { fetchJson } from './shared.js';
2
+ // ---------------------------------------------------------------------------
3
+ // 21. Spotify extractor (oEmbed)
4
+ // ---------------------------------------------------------------------------
5
+ export async function spotifyExtractor(_html, url) {
6
+ try {
7
+ const oembedUrl = `https://open.spotify.com/oembed?url=${encodeURIComponent(url)}`;
8
+ const data = await fetchJson(oembedUrl);
9
+ if (!data || !data.title)
10
+ return null;
11
+ // Detect type from URL path: track, album, playlist, episode, show, artist
12
+ const pathMatch = url.match(/open\.spotify\.com\/(track|album|playlist|episode|show|artist)\/([A-Za-z0-9]+)/);
13
+ const contentType = pathMatch?.[1] || 'track';
14
+ const spotifyId = pathMatch?.[2] || '';
15
+ const structured = {
16
+ title: data.title,
17
+ type: contentType,
18
+ spotifyId,
19
+ provider: 'Spotify',
20
+ thumbnailUrl: data.thumbnail_url || '',
21
+ thumbnailWidth: data.thumbnail_width || 0,
22
+ thumbnailHeight: data.thumbnail_height || 0,
23
+ embedHtml: data.html || '',
24
+ };
25
+ const typeEmoji = contentType === 'track' ? '🎵' : contentType === 'album' ? '💿' : contentType === 'playlist' ? '📋' : contentType === 'episode' ? '🎙️' : contentType === 'artist' ? '🎤' : '🎵';
26
+ const cleanContent = `## ${typeEmoji} Spotify ${contentType.charAt(0).toUpperCase() + contentType.slice(1)}: ${data.title}\n\n**Platform:** Spotify\n**Type:** ${contentType}\n**URL:** ${url}`;
27
+ return { domain: 'open.spotify.com', type: contentType, structured, cleanContent };
28
+ }
29
+ catch (e) {
30
+ if (process.env.DEBUG)
31
+ console.debug('[webpeel]', 'Spotify oEmbed failed:', e instanceof Error ? e.message : e);
32
+ return null;
33
+ }
34
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function stackOverflowExtractor(_html: string, url: string): Promise<DomainExtractResult | null>;
@@ -0,0 +1,61 @@
1
+ import { stripHtml, fetchJson } from './shared.js';
2
+ // ---------------------------------------------------------------------------
3
+ // 8. Stack Overflow extractor (StackExchange API)
4
+ // ---------------------------------------------------------------------------
5
+ export async function stackOverflowExtractor(_html, url) {
6
+ const urlObj = new URL(url);
7
+ const path = urlObj.pathname;
8
+ // Match /questions/12345/optional-slug
9
+ const questionMatch = path.match(/\/questions\/(\d+)/);
10
+ if (!questionMatch)
11
+ return null;
12
+ const questionId = questionMatch[1];
13
+ try {
14
+ const apiUrl = `https://api.stackexchange.com/2.3/questions/${questionId}?order=desc&sort=votes&site=stackoverflow&filter=withbody`;
15
+ const data = await fetchJson(apiUrl);
16
+ if (!data?.items?.[0])
17
+ return null;
18
+ const q = data.items[0];
19
+ // Also fetch answers
20
+ let answers = [];
21
+ try {
22
+ const answersUrl = `https://api.stackexchange.com/2.3/questions/${questionId}/answers?order=desc&sort=votes&site=stackoverflow&filter=withbody&pagesize=5`;
23
+ const answersData = await fetchJson(answersUrl);
24
+ answers = answersData?.items || [];
25
+ }
26
+ catch { /* answers optional */ }
27
+ const structured = {
28
+ title: stripHtml(q.title || ''),
29
+ questionId: q.question_id,
30
+ score: q.score || 0,
31
+ views: q.view_count || 0,
32
+ answerCount: q.answer_count || 0,
33
+ isAnswered: q.is_answered || false,
34
+ tags: q.tags || [],
35
+ askedBy: q.owner?.display_name || 'anonymous',
36
+ askedDate: q.creation_date ? new Date(q.creation_date * 1000).toISOString() : undefined,
37
+ acceptedAnswerId: q.accepted_answer_id || null,
38
+ answers: answers.map(a => ({
39
+ id: a.answer_id,
40
+ score: a.score,
41
+ isAccepted: a.is_accepted || false,
42
+ body: stripHtml(a.body || '').substring(0, 2000),
43
+ author: a.owner?.display_name || 'anonymous',
44
+ })),
45
+ };
46
+ const questionBody = stripHtml(q.body || '').substring(0, 3000);
47
+ const tagLine = structured.tags.length ? `**Tags:** ${structured.tags.join(', ')}` : '';
48
+ let answersContent = '';
49
+ for (const a of structured.answers.slice(0, 3)) {
50
+ const acceptedMark = a.isAccepted ? ' ✅ Accepted' : '';
51
+ answersContent += `\n\n---\n\n### Answer by ${a.author} (Score: ${a.score}${acceptedMark})\n\n${a.body}`;
52
+ }
53
+ const cleanContent = `# ${structured.title}\n\n**Score:** ${structured.score} | **Views:** ${structured.views?.toLocaleString()} | **Answers:** ${structured.answerCount}\n${tagLine}\n**Asked by:** ${structured.askedBy}\n\n## Question\n\n${questionBody}${answersContent}`;
54
+ return { domain: 'stackoverflow.com', type: 'question', structured, cleanContent };
55
+ }
56
+ catch (e) {
57
+ if (process.env.DEBUG)
58
+ console.debug('[webpeel]', 'StackOverflow API failed:', e instanceof Error ? e.message : e);
59
+ return null;
60
+ }
61
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function substackExtractor(html: string, url: string): Promise<DomainExtractResult | null>;