@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,260 @@
1
+ import { fetchJson } from './shared.js';
2
+ // ---------------------------------------------------------------------------
3
+ // 36. ESPN extractor โ€” live scores, standings, schedules via ESPN public API
4
+ // ---------------------------------------------------------------------------
5
+ /** Map ESPN URL path prefixes to sport/league identifiers for the API. */
6
+ function matchESPN(url) {
7
+ let u;
8
+ try {
9
+ u = new URL(url);
10
+ }
11
+ catch {
12
+ return null;
13
+ }
14
+ if (!u.hostname.includes('espn.com'))
15
+ return null;
16
+ const path = u.pathname.toLowerCase();
17
+ // Map URL path prefixes to [sport, league]
18
+ const sportMap = {
19
+ '/nba': ['basketball', 'nba'],
20
+ '/wnba': ['basketball', 'wnba'],
21
+ '/nfl': ['football', 'nfl'],
22
+ '/mlb': ['baseball', 'mlb'],
23
+ '/nhl': ['hockey', 'nhl'],
24
+ '/college-football': ['football', 'college-football'],
25
+ '/mens-college-basketball': ['basketball', 'mens-college-basketball'],
26
+ '/womens-college-basketball': ['basketball', 'womens-college-basketball'],
27
+ '/soccer': ['soccer', 'eng.1'],
28
+ '/mma': ['mma', 'ufc'],
29
+ };
30
+ for (const [prefix, [sport, league]] of Object.entries(sportMap)) {
31
+ if (path.startsWith(prefix)) {
32
+ // Override soccer league if explicitly in URL path (e.g. /soccer/scoreboard/_/league/usa.1)
33
+ let resolvedLeague = league;
34
+ if (sport === 'soccer') {
35
+ const leagueMatch = path.match(/\/league\/([^/?#]+)/);
36
+ if (leagueMatch)
37
+ resolvedLeague = leagueMatch[1];
38
+ }
39
+ if (path.includes('standings'))
40
+ return { sport, league: resolvedLeague, type: 'standings' };
41
+ if (path.includes('/team/') || path.includes('/teams/')) {
42
+ const nameMatch = path.split('/name/')[1]?.split('/')[0];
43
+ return { sport, league: resolvedLeague, type: 'team', param: nameMatch };
44
+ }
45
+ if (path.includes('scores') || path.includes('scoreboard'))
46
+ return { sport, league: resolvedLeague, type: 'scoreboard' };
47
+ return { sport, league: resolvedLeague, type: 'scoreboard' }; // default to scoreboard
48
+ }
49
+ }
50
+ // Unknown path (e.g. /about, /fantasy, /watch) โ€” return null so pipeline
51
+ // falls through to browser rendering instead of showing wrong sport data.
52
+ // Only the root path / is treated as NBA scoreboard.
53
+ if (path === '/' || path === '') {
54
+ return { sport: 'basketball', league: 'nba', type: 'scoreboard' };
55
+ }
56
+ return null;
57
+ }
58
+ /** Sport emoji mapping. */
59
+ function espnSportEmoji(sport, league) {
60
+ if (league === 'nba' || league === 'wnba')
61
+ return '๐Ÿ€';
62
+ if (sport === 'football')
63
+ return '๐Ÿˆ';
64
+ if (sport === 'baseball')
65
+ return 'โšพ';
66
+ if (sport === 'hockey')
67
+ return '๐Ÿ’';
68
+ if (sport === 'soccer')
69
+ return 'โšฝ';
70
+ if (sport === 'mma' || league === 'ufc')
71
+ return '๐ŸฅŠ';
72
+ return '๐Ÿ†';
73
+ }
74
+ /** Format a UTC ISO date string to "7:30 PM ET" style. */
75
+ function fmtEspnTime(isoDate) {
76
+ try {
77
+ const d = new Date(isoDate);
78
+ return d.toLocaleTimeString('en-US', {
79
+ timeZone: 'America/New_York',
80
+ hour: 'numeric',
81
+ minute: '2-digit',
82
+ hour12: true,
83
+ }) + ' ET';
84
+ }
85
+ catch {
86
+ return isoDate;
87
+ }
88
+ }
89
+ /** Format today's date nicely: "March 18, 2026". */
90
+ function fmtTodayESPN() {
91
+ return new Date().toLocaleDateString('en-US', {
92
+ timeZone: 'America/New_York',
93
+ month: 'long',
94
+ day: 'numeric',
95
+ year: 'numeric',
96
+ });
97
+ }
98
+ async function fetchEspnScoreboard(sport, league) {
99
+ try {
100
+ const apiUrl = `https://site.api.espn.com/apis/site/v2/sports/${sport}/${league}/scoreboard`;
101
+ const data = await fetchJson(apiUrl);
102
+ const events = data?.events || [];
103
+ const emoji = espnSportEmoji(sport, league);
104
+ const leagueName = data?.leagues?.[0]?.name || league.toUpperCase();
105
+ const today = fmtTodayESPN();
106
+ if (events.length === 0) {
107
+ return `# ${emoji} ${leagueName} Scoreboard โ€” ${today}\n\n*No games scheduled today.*`;
108
+ }
109
+ const rows = events.map((e) => {
110
+ const comp = e.competitions?.[0] || {};
111
+ const status = comp.status?.type || {};
112
+ const competitors = comp.competitors || [];
113
+ // Away team first, home team second (standard display)
114
+ const away = competitors.find((c) => c.homeAway === 'away') || competitors[0];
115
+ const home = competitors.find((c) => c.homeAway === 'home') || competitors[1];
116
+ const awayName = away?.team?.displayName || away?.team?.name || '?';
117
+ const homeName = home?.team?.displayName || home?.team?.name || '?';
118
+ const gameLabel = `${awayName} at ${homeName}`;
119
+ let scoreStr = '-';
120
+ let statusStr = '';
121
+ const state = status.state || 'pre';
122
+ const description = status.description || 'Scheduled';
123
+ if (state === 'pre') {
124
+ scoreStr = '-';
125
+ statusStr = fmtEspnTime(comp.startDate || e.date || '');
126
+ }
127
+ else if (state === 'in') {
128
+ const awayScore = away?.score ?? '0';
129
+ const homeScore = home?.score ?? '0';
130
+ const awayAbbr = away?.team?.abbreviation || '?';
131
+ const homeAbbr = home?.team?.abbreviation || '?';
132
+ scoreStr = `${awayAbbr} ${awayScore}, ${homeAbbr} ${homeScore}`;
133
+ const period = comp.status?.period ?? '';
134
+ const clock = comp.status?.displayClock ?? '';
135
+ statusStr = period && clock ? `Q${period} ${clock}` : 'Live';
136
+ }
137
+ else {
138
+ const awayScore = away?.score ?? '0';
139
+ const homeScore = home?.score ?? '0';
140
+ const awayAbbr = away?.team?.abbreviation || '?';
141
+ const homeAbbr = home?.team?.abbreviation || '?';
142
+ scoreStr = `${awayAbbr} ${awayScore}, ${homeAbbr} ${homeScore}`;
143
+ statusStr = description || 'Final';
144
+ }
145
+ return `| ${gameLabel} | ${scoreStr} | ${statusStr} |`;
146
+ }).join('\n');
147
+ return `# ${emoji} ${leagueName} Scoreboard โ€” ${today}\n\n| Game | Score | Status |\n|------|-------|--------|\n${rows}`;
148
+ }
149
+ catch (e) {
150
+ if (process.env.DEBUG)
151
+ console.debug('[webpeel]', 'ESPN scoreboard fetch failed:', e instanceof Error ? e.message : e);
152
+ return null;
153
+ }
154
+ }
155
+ async function fetchEspnStandings(sport, league) {
156
+ try {
157
+ const apiUrl = `https://site.web.api.espn.com/apis/v2/sports/${sport}/${league}/standings?sort=winpercent:desc`;
158
+ const data = await fetchJson(apiUrl);
159
+ const children = data?.children || [];
160
+ const emoji = espnSportEmoji(sport, league);
161
+ const leagueName = data?.name || league.toUpperCase();
162
+ const today = fmtTodayESPN();
163
+ if (children.length === 0)
164
+ return null;
165
+ let output = `# ${emoji} ${leagueName} Standings โ€” ${today}\n\n`;
166
+ for (const conf of children) {
167
+ const confName = conf.name || conf.abbreviation || 'Conference';
168
+ const entries = conf.standings?.entries || [];
169
+ output += `## ${confName}\n\n`;
170
+ output += `| # | Team | W | L | PCT | Streak |\n`;
171
+ output += `|---|------|---|---|-----|--------|\n`;
172
+ // Sort by playoff seed
173
+ const sorted = entries.slice().sort((a, b) => {
174
+ const seedA = a.stats?.find((s) => s.name === 'playoffSeed')?.value ?? 99;
175
+ const seedB = b.stats?.find((s) => s.name === 'playoffSeed')?.value ?? 99;
176
+ return seedA - seedB;
177
+ });
178
+ for (const entry of sorted) {
179
+ const team = entry.team?.displayName || '?';
180
+ const stats = entry.stats || [];
181
+ const getDisplay = (name) => stats.find((s) => s.name === name)?.displayValue || '?';
182
+ const getStat = (name) => stats.find((s) => s.name === name)?.value ?? '?';
183
+ const seed = getStat('playoffSeed');
184
+ const wins = getDisplay('wins');
185
+ const losses = getDisplay('losses');
186
+ const pct = getDisplay('winPercent');
187
+ const streak = getDisplay('streak');
188
+ output += `| ${seed} | ${team} | ${wins} | ${losses} | ${pct} | ${streak} |\n`;
189
+ }
190
+ output += '\n';
191
+ }
192
+ return output.trim();
193
+ }
194
+ catch (e) {
195
+ if (process.env.DEBUG)
196
+ console.debug('[webpeel]', 'ESPN standings fetch failed:', e instanceof Error ? e.message : e);
197
+ return null;
198
+ }
199
+ }
200
+ export async function espnExtractor(_html, url) {
201
+ const match = matchESPN(url);
202
+ if (!match)
203
+ return null;
204
+ const { sport, league, type } = match;
205
+ const domain = 'espn.com';
206
+ if (type === 'standings') {
207
+ const content = await fetchEspnStandings(sport, league);
208
+ if (!content)
209
+ return null;
210
+ return {
211
+ domain,
212
+ type: 'standings',
213
+ structured: { sport, league, dataType: 'standings' },
214
+ cleanContent: content,
215
+ };
216
+ }
217
+ if (type === 'team') {
218
+ // Try to get team info from the teams API
219
+ try {
220
+ const teamsUrl = `https://site.api.espn.com/apis/site/v2/sports/${sport}/${league}/teams`;
221
+ const teamsData = await fetchJson(teamsUrl);
222
+ const teams = teamsData?.sports?.[0]?.leagues?.[0]?.teams || [];
223
+ const param = match.param?.toLowerCase();
224
+ const teamEntry = param
225
+ ? teams.find((t) => {
226
+ const td = t.team || t;
227
+ return td.abbreviation?.toLowerCase() === param ||
228
+ td.slug?.toLowerCase() === param ||
229
+ td.displayName?.toLowerCase().includes(param);
230
+ })
231
+ : teams[0];
232
+ if (teamEntry) {
233
+ const td = teamEntry.team || teamEntry;
234
+ const emoji = espnSportEmoji(sport, league);
235
+ const content = `# ${emoji} ${td.displayName}\n\n**League:** ${league.toUpperCase()}\n\n*For live scores and standings, use:*\n- \`webpeel "https://espn.com/${league}/scoreboard"\`\n- \`webpeel "https://espn.com/${league}/standings"\``;
236
+ return {
237
+ domain,
238
+ type: 'team',
239
+ structured: { sport, league, teamName: td.displayName, abbreviation: td.abbreviation },
240
+ cleanContent: content,
241
+ };
242
+ }
243
+ }
244
+ catch (e) {
245
+ if (process.env.DEBUG)
246
+ console.debug('[webpeel]', 'ESPN team fetch failed:', e instanceof Error ? e.message : e);
247
+ }
248
+ // Fallback to scoreboard
249
+ }
250
+ // Default: scoreboard
251
+ const content = await fetchEspnScoreboard(sport, league);
252
+ if (!content)
253
+ return null;
254
+ return {
255
+ domain,
256
+ type: 'scoreboard',
257
+ structured: { sport, league, dataType: 'scoreboard' },
258
+ cleanContent: content,
259
+ };
260
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function etsyExtractor(_html: string, url: string): Promise<DomainExtractResult | null>;
@@ -0,0 +1,52 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Etsy extractor (bot-block fallback with Google site-search suggestion)
3
+ // ---------------------------------------------------------------------------
4
+ export async function etsyExtractor(_html, url) {
5
+ const u = new URL(url);
6
+ // Extract search query from various URL patterns
7
+ // /search?q=handmade+jewelry OR /search/handmade-jewelry
8
+ let query = u.searchParams.get('q') || '';
9
+ if (!query) {
10
+ const pathMatch = u.pathname.match(/\/search\/([^?#]+)/);
11
+ if (pathMatch)
12
+ query = decodeURIComponent(pathMatch[1].replace(/-/g, ' '));
13
+ }
14
+ // Shop page: /shop/ShopName
15
+ const shopMatch = u.pathname.match(/^\/shop\/([^/?#]+)/);
16
+ const shopName = shopMatch?.[1] || '';
17
+ if (!query && !shopName)
18
+ return null;
19
+ const googleUrl = query
20
+ ? `https://www.google.com/search?q=site:etsy.com+${encodeURIComponent(query)}`
21
+ : `https://www.google.com/search?q=site:etsy.com+${encodeURIComponent(shopName)}`;
22
+ const etsySearchUrl = query ? `https://www.etsy.com/search?q=${encodeURIComponent(query)}` : url;
23
+ const displayTitle = query ? `"${query}"` : `Shop: ${shopName}`;
24
+ const cleanContent = [
25
+ `# ๐ŸŽจ Etsy โ€” ${displayTitle}`,
26
+ '',
27
+ '> โš ๏ธ Etsy blocks automated access. WebPeel cannot scrape listings directly.',
28
+ '',
29
+ '**Alternatives that work:**',
30
+ `- \`webpeel "${googleUrl}"\` โ€” Google site:etsy.com results`,
31
+ `- Direct link: [etsy.com/search?q=${encodeURIComponent(query || shopName)}](${etsySearchUrl})`,
32
+ '',
33
+ ...(query ? [
34
+ '**Similar items on open marketplaces:**',
35
+ `- \`webpeel "https://www.ebay.com/sch/i.html?_nkw=${encodeURIComponent(query)}&LH_BIN=1"\` โ€” eBay`,
36
+ `- \`webpeel "https://newyork.craigslist.org/search/sss?query=${encodeURIComponent(query)}"\` โ€” Craigslist`,
37
+ ] : []),
38
+ '',
39
+ '*Etsy Open API v3 (free key at etsy.com/developers) can unlock direct access.*',
40
+ ].join('\n');
41
+ return {
42
+ domain: 'etsy.com',
43
+ type: 'blocked',
44
+ structured: {
45
+ query,
46
+ shopName,
47
+ reason: 'bot-block',
48
+ googleFallback: googleUrl,
49
+ },
50
+ cleanContent,
51
+ };
52
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function facebookMarketplaceExtractor(_html: string, url: string): Promise<DomainExtractResult | null>;
@@ -0,0 +1,46 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Facebook Marketplace extractor (login-wall fallback)
3
+ // ---------------------------------------------------------------------------
4
+ export async function facebookMarketplaceExtractor(_html, url) {
5
+ const u = new URL(url);
6
+ if (!u.pathname.includes('/marketplace'))
7
+ return null;
8
+ const query = u.searchParams.get('query') || '';
9
+ const maxPrice = u.searchParams.get('maxPrice') || '';
10
+ const minPrice = u.searchParams.get('minPrice') || '';
11
+ // Extract location segment: /marketplace/nyc/search โ†’ "nyc"
12
+ const locationMatch = u.pathname.match(/\/marketplace\/([^/]+)(?:\/|$)/);
13
+ const location = (locationMatch?.[1] && locationMatch[1] !== 'search' && locationMatch[1] !== 'category') ? locationMatch[1] : '';
14
+ const priceRange = [minPrice && `$${minPrice}`, maxPrice && `$${maxPrice}`].filter(Boolean).join(' โ€“ ');
15
+ const lines = [
16
+ `# ๐Ÿ›’ Facebook Marketplace`,
17
+ '',
18
+ `**Search:** ${query || 'Browse all'}`,
19
+ ...(location ? [`**Location:** ${location}`] : []),
20
+ ...(priceRange ? [`**Price range:** ${priceRange}`] : []),
21
+ '',
22
+ '> โš ๏ธ Facebook Marketplace requires authentication. WebPeel cannot access listings directly.',
23
+ '',
24
+ '**Alternative searches that work:**',
25
+ ];
26
+ if (query) {
27
+ const clUrl = `https://newyork.craigslist.org/search/sss?query=${encodeURIComponent(query)}${maxPrice ? '&max_price=' + maxPrice : ''}`;
28
+ const carsUrl = `https://www.cars.com/shopping/results/?keyword=${encodeURIComponent(query)}&list_price_max=${maxPrice || ''}&zip=10001&stock_type=used`;
29
+ const ebayUrl = `https://www.ebay.com/sch/i.html?_nkw=${encodeURIComponent(query)}${maxPrice ? '&_udhi=' + maxPrice : ''}&LH_BIN=1`;
30
+ lines.push(`- \`webpeel "${clUrl}"\` โ€” Craigslist`, `- \`webpeel "${carsUrl}"\` โ€” Cars.com`, `- \`webpeel "${ebayUrl}"\` โ€” eBay`);
31
+ }
32
+ lines.push('', '*Tip: Craigslist and Cars.com return full structured results with WebPeel.*');
33
+ return {
34
+ domain: 'facebook.com',
35
+ type: 'blocked',
36
+ structured: {
37
+ query,
38
+ location,
39
+ minPrice,
40
+ maxPrice,
41
+ reason: 'authentication required',
42
+ alternatives: ['craigslist', 'cars.com', 'ebay'],
43
+ },
44
+ cleanContent: lines.join('\n'),
45
+ };
46
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function githubExtractor(_html: string, url: string): Promise<DomainExtractResult | null>;
@@ -0,0 +1,196 @@
1
+ import { fetchJson, fetchJsonWithRetry } from './shared.js';
2
+ // ---------------------------------------------------------------------------
3
+ // 3. GitHub extractor
4
+ // ---------------------------------------------------------------------------
5
+ export async function githubExtractor(_html, url) {
6
+ const urlObj = new URL(url);
7
+ const pathParts = urlObj.pathname.split('/').filter(Boolean);
8
+ const domain = 'github.com';
9
+ if (pathParts.length === 0)
10
+ return null;
11
+ const ghHeaders = { Accept: 'application/vnd.github.v3+json' };
12
+ // Use GITHUB_TOKEN if available for higher rate limits (5000/hr vs 60/hr)
13
+ const ghToken = process.env.GITHUB_TOKEN || process.env.GH_TOKEN;
14
+ if (ghToken)
15
+ ghHeaders.Authorization = `token ${ghToken}`;
16
+ // User profile: /username (single segment)
17
+ if (pathParts.length === 1) {
18
+ const username = pathParts[0];
19
+ const userData = await fetchJson(`https://api.github.com/users/${username}`, ghHeaders);
20
+ if (!userData || userData.message === 'Not Found')
21
+ return null;
22
+ const structured = {
23
+ login: userData.login,
24
+ name: userData.name || userData.login,
25
+ bio: userData.bio || '',
26
+ company: userData.company || null,
27
+ location: userData.location || null,
28
+ blog: userData.blog || null,
29
+ followers: userData.followers ?? 0,
30
+ following: userData.following ?? 0,
31
+ publicRepos: userData.public_repos ?? 0,
32
+ created: userData.created_at,
33
+ avatarUrl: userData.avatar_url,
34
+ };
35
+ const cleanContent = `## ๐Ÿ‘ค GitHub: ${structured.name} (@${structured.login})
36
+
37
+ ${structured.bio ? structured.bio + '\n\n' : ''}๐Ÿ“ ${structured.location || 'N/A'} | ๐Ÿ’ผ ${structured.company || 'N/A'} | ๐ŸŒ ${structured.blog || 'N/A'}
38
+ ๐Ÿ‘ฅ ${structured.followers} followers | Following: ${structured.following} | ๐Ÿ“ฆ ${structured.publicRepos} public repos`;
39
+ return { domain, type: 'user', structured, cleanContent };
40
+ }
41
+ const owner = pathParts[0];
42
+ const repo = pathParts[1];
43
+ // Issue: /owner/repo/issues/123
44
+ if (pathParts[2] === 'issues' && pathParts[3]) {
45
+ const issueNumber = pathParts[3];
46
+ const [issueData, commentsData] = await Promise.all([
47
+ fetchJson(`https://api.github.com/repos/${owner}/${repo}/issues/${issueNumber}`, ghHeaders),
48
+ fetchJson(`https://api.github.com/repos/${owner}/${repo}/issues/${issueNumber}/comments?per_page=20`, ghHeaders),
49
+ ]);
50
+ if (!issueData || issueData.message === 'Not Found')
51
+ return null;
52
+ const comments = Array.isArray(commentsData)
53
+ ? commentsData.map((c) => ({
54
+ author: c.user?.login || 'ghost',
55
+ text: c.body || '',
56
+ created: c.created_at,
57
+ }))
58
+ : [];
59
+ const structured = {
60
+ repo: `${owner}/${repo}`,
61
+ number: issueData.number,
62
+ title: issueData.title || '',
63
+ author: issueData.user?.login || 'ghost',
64
+ state: issueData.state,
65
+ body: issueData.body || '',
66
+ labels: (issueData.labels || []).map((l) => l.name),
67
+ created: issueData.created_at,
68
+ updated: issueData.updated_at,
69
+ commentCount: issueData.comments ?? 0,
70
+ comments,
71
+ };
72
+ const labelStr = structured.labels.length ? structured.labels.join(', ') : 'none';
73
+ const commentsMd = comments.slice(0, 10).map((c) => `**@${c.author}** (${c.created}):\n${c.text.slice(0, 300)}`).join('\n\n---\n\n');
74
+ const cleanContent = `## ๐Ÿ› Issue #${structured.number}: ${structured.title}
75
+
76
+ **Repo:** ${structured.repo} | **State:** ${structured.state} | **Author:** @${structured.author}
77
+ **Labels:** ${labelStr} | **Created:** ${structured.created}
78
+
79
+ ${structured.body.slice(0, 800)}
80
+
81
+ ---
82
+
83
+ ### Comments (${structured.commentCount})
84
+
85
+ ${commentsMd || '*No comments.*'}`;
86
+ return { domain, type: 'issue', structured, cleanContent };
87
+ }
88
+ // Pull request: /owner/repo/pull/123
89
+ if (pathParts[2] === 'pull' && pathParts[3]) {
90
+ const prNumber = pathParts[3];
91
+ const [prData, commentsData] = await Promise.all([
92
+ fetchJson(`https://api.github.com/repos/${owner}/${repo}/pulls/${prNumber}`, ghHeaders),
93
+ fetchJson(`https://api.github.com/repos/${owner}/${repo}/issues/${prNumber}/comments?per_page=20`, ghHeaders),
94
+ ]);
95
+ if (!prData || prData.message === 'Not Found')
96
+ return null;
97
+ const comments = Array.isArray(commentsData)
98
+ ? commentsData.map((c) => ({
99
+ author: c.user?.login || 'ghost',
100
+ text: c.body || '',
101
+ created: c.created_at,
102
+ }))
103
+ : [];
104
+ const structured = {
105
+ repo: `${owner}/${repo}`,
106
+ number: prData.number,
107
+ title: prData.title || '',
108
+ author: prData.user?.login || 'ghost',
109
+ state: prData.state,
110
+ merged: prData.merged ?? false,
111
+ body: prData.body || '',
112
+ labels: (prData.labels || []).map((l) => l.name),
113
+ created: prData.created_at,
114
+ updated: prData.updated_at,
115
+ commentCount: prData.comments ?? 0,
116
+ additions: prData.additions ?? 0,
117
+ deletions: prData.deletions ?? 0,
118
+ changedFiles: prData.changed_files ?? 0,
119
+ headBranch: prData.head?.label || '',
120
+ baseBranch: prData.base?.label || '',
121
+ comments,
122
+ };
123
+ const labelStr = structured.labels.length ? structured.labels.join(', ') : 'none';
124
+ const commentsMd = comments.slice(0, 8).map((c) => `**@${c.author}** (${c.created}):\n${c.text.slice(0, 300)}`).join('\n\n---\n\n');
125
+ const cleanContent = `## ๐Ÿ”€ PR #${structured.number}: ${structured.title}
126
+
127
+ **Repo:** ${structured.repo} | **State:** ${structured.state}${structured.merged ? ' (merged)' : ''} | **Author:** @${structured.author}
128
+ **Labels:** ${labelStr} | **${structured.headBranch} โ†’ ${structured.baseBranch}**
129
+ **Changes:** +${structured.additions} / -${structured.deletions} across ${structured.changedFiles} files
130
+
131
+ ${structured.body.slice(0, 800)}
132
+
133
+ ---
134
+
135
+ ### Comments (${structured.commentCount})
136
+
137
+ ${commentsMd || '*No comments.*'}`;
138
+ return { domain, type: 'pull_request', structured, cleanContent };
139
+ }
140
+ // Repository page: /owner/repo (and no deeper path we handle above)
141
+ if (pathParts.length >= 2) {
142
+ // Sequential fetches to avoid secondary rate limits on popular repos
143
+ const repoData = await fetchJsonWithRetry(`https://api.github.com/repos/${owner}/${repo}`, ghHeaders, 2, 1000);
144
+ if (!repoData) {
145
+ console.warn(`[webpeel:github] repo API returned null for ${owner}/${repo}`);
146
+ return null;
147
+ }
148
+ if (repoData.message) {
149
+ console.warn(`[webpeel:github] repo API error for ${owner}/${repo}: ${repoData.message}`);
150
+ if (repoData.message === 'Not Found')
151
+ return null;
152
+ if (repoData.message.includes('secondary rate limit') || repoData.message.includes('abuse'))
153
+ return null;
154
+ }
155
+ const structured = {
156
+ title: `${owner}/${repo}`,
157
+ name: `${owner}/${repo}`,
158
+ description: repoData.description || '',
159
+ stars: repoData.stargazers_count ?? 0,
160
+ forks: repoData.forks_count ?? 0,
161
+ watchers: repoData.watchers_count ?? 0,
162
+ language: repoData.language || null,
163
+ topics: repoData.topics || [],
164
+ license: repoData.license?.spdx_id || null,
165
+ openIssues: repoData.open_issues_count ?? 0,
166
+ lastPush: repoData.pushed_at,
167
+ createdAt: repoData.created_at,
168
+ defaultBranch: repoData.default_branch || 'main',
169
+ homepage: repoData.homepage || null,
170
+ archived: repoData.archived || false,
171
+ fork: repoData.fork || false,
172
+ url: repoData.html_url || `https://github.com/${owner}/${repo}`,
173
+ };
174
+ const topicsStr = structured.topics.length ? structured.topics.slice(0, 8).join(', ') : '';
175
+ const updatedDate = structured.lastPush ? structured.lastPush.slice(0, 10) : 'N/A';
176
+ const lines = [
177
+ `# ๐Ÿ’ป ${structured.name}`,
178
+ '',
179
+ structured.description ? `**${structured.description}**` : '*No description.*',
180
+ '',
181
+ `- โญ Stars: ${structured.stars.toLocaleString()} | ๐Ÿด Forks: ${structured.forks.toLocaleString()} | ๐Ÿ“ Language: ${structured.language || 'N/A'}`,
182
+ `- ๐Ÿ“ฆ License: ${structured.license || 'None'} | ๐Ÿ”„ Updated: ${updatedDate}`,
183
+ `- ๐Ÿ“Š Open Issues: ${structured.openIssues}${structured.archived ? ' | โš ๏ธ ARCHIVED' : ''}`,
184
+ ];
185
+ if (topicsStr)
186
+ lines.push(`- ๐Ÿท๏ธ Topics: ${topicsStr}`);
187
+ lines.push('');
188
+ const links = [`[Repository](${structured.url})`];
189
+ if (structured.homepage)
190
+ links.push(`[Homepage](${structured.homepage})`);
191
+ lines.push(`**Links:** ${links.join(' ยท ')}`);
192
+ const cleanContent = lines.join('\n');
193
+ return { domain, type: 'repository', structured, cleanContent };
194
+ }
195
+ return null;
196
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function googleFlightsExtractor(_html: string, url: string): Promise<DomainExtractResult | null>;