@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,80 @@
1
+ import { fetchJson } from './shared.js';
2
+ // ---------------------------------------------------------------------------
3
+ // 18. PyPI extractor
4
+ // ---------------------------------------------------------------------------
5
+ export async function pypiExtractor(_html, url) {
6
+ const urlObj = new URL(url);
7
+ const path = urlObj.pathname;
8
+ // Match /project/name or /project/name/version/
9
+ const packageMatch = path.match(/\/project\/([^/]+)/);
10
+ if (!packageMatch)
11
+ return null;
12
+ const packageName = packageMatch[1];
13
+ try {
14
+ const apiUrl = `https://pypi.org/pypi/${encodeURIComponent(packageName)}/json`;
15
+ const data = await fetchJson(apiUrl);
16
+ if (!data?.info)
17
+ return null;
18
+ const info = data.info;
19
+ const structured = {
20
+ title: `${info.name} ${info.version}`,
21
+ name: info.name,
22
+ version: info.version,
23
+ description: info.summary || '',
24
+ author: info.author || '',
25
+ authorEmail: info.author_email || '',
26
+ license: info.license || 'N/A',
27
+ homepage: info.home_page || info.project_url || null,
28
+ projectUrls: info.project_urls || {},
29
+ keywords: info.keywords ? info.keywords.split(/[,\s]+/).filter(Boolean) : [],
30
+ requiresPython: info.requires_python || '',
31
+ requiresDist: (info.requires_dist || []).slice(0, 20),
32
+ classifiers: (info.classifiers || []).slice(0, 10),
33
+ };
34
+ // Full description/README from PyPI (info.description is the full README in markdown)
35
+ const fullDescription = info.description && info.description.length > 100 &&
36
+ info.description !== 'UNKNOWN' && info.description !== info.summary
37
+ ? info.description.slice(0, 8000)
38
+ : null;
39
+ // Store full description in structured
40
+ structured.fullDescription = fullDescription;
41
+ const installCmd = `pip install ${info.name}`;
42
+ const keywordsLine = structured.keywords.length ? `\n**Keywords:** ${structured.keywords.join(', ')}` : '';
43
+ const pyVersionLine = structured.requiresPython ? `\n**Requires Python:** ${structured.requiresPython}` : '';
44
+ // Show all dependencies
45
+ const depsLine = structured.requiresDist.length
46
+ ? `\n\n## Dependencies\n\n${structured.requiresDist.map((d) => `- ${d}`).join('\n')}`
47
+ : '';
48
+ // Classifiers — extract useful ones (license, status, Python versions)
49
+ const usefulClassifiers = structured.classifiers.filter((c) => c.startsWith('Programming Language') || c.startsWith('License') || c.startsWith('Development Status'));
50
+ const classifiersSection = usefulClassifiers.length
51
+ ? `\n\n## Classifiers\n\n${usefulClassifiers.map((c) => `- ${c}`).join('\n')}`
52
+ : '';
53
+ // Find project URLs
54
+ const projectUrlLines = [];
55
+ for (const [label, u] of Object.entries(structured.projectUrls)) {
56
+ projectUrlLines.push(`- **${label}:** ${u}`);
57
+ }
58
+ // Full description section (package README from PyPI)
59
+ const descSection = fullDescription
60
+ ? `\n\n## Description\n\n${fullDescription}`
61
+ : '';
62
+ const cleanContent = `# 📦 ${info.name} ${info.version}
63
+
64
+ ${info.summary || ''}
65
+
66
+ \`\`\`
67
+ ${installCmd}
68
+ \`\`\`
69
+
70
+ **Author:** ${info.author || 'N/A'} | **License:** ${info.license || 'N/A'}${keywordsLine}${pyVersionLine}
71
+
72
+ ${projectUrlLines.length ? `## Links\n\n${projectUrlLines.join('\n')}\n` : ''}${depsLine}${classifiersSection}${descSection}`;
73
+ return { domain: 'pypi.org', type: 'package', structured, cleanContent };
74
+ }
75
+ catch (e) {
76
+ if (process.env.DEBUG)
77
+ console.debug('[webpeel]', 'PyPI API failed:', e instanceof Error ? e.message : e);
78
+ return null;
79
+ }
80
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function redditExtractor(_html: string, url: string): Promise<DomainExtractResult | null>;
@@ -0,0 +1,438 @@
1
+ import { unixToIso, fetchJsonWithRetry } from './shared.js';
2
+ async function resolveRedditShareUrl(url) {
3
+ const urlObj = new URL(url);
4
+ // Match /r/subreddit/s/CODE or /s/CODE patterns
5
+ if (!urlObj.pathname.includes('/s/'))
6
+ return url;
7
+ try {
8
+ const { default: https } = await import('https');
9
+ const { default: http } = await import('http');
10
+ return new Promise((resolve) => {
11
+ const client = url.startsWith('https') ? https : http;
12
+ const req = client.get(url, {
13
+ headers: { 'User-Agent': 'WebPeel/0.17.1 (web data platform; https://webpeel.dev) Node.js' },
14
+ timeout: 10000,
15
+ }, (res) => {
16
+ // Follow redirect (one hop)
17
+ if (res.statusCode && res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
18
+ const redirectUrl = res.headers.location.startsWith('http')
19
+ ? res.headers.location
20
+ : new URL(res.headers.location, url).href;
21
+ resolve(redirectUrl);
22
+ }
23
+ else {
24
+ resolve(url); // No redirect, return original
25
+ }
26
+ res.resume(); // Consume response
27
+ });
28
+ req.on('error', () => resolve(url));
29
+ req.on('timeout', () => {
30
+ req.destroy();
31
+ resolve(url);
32
+ });
33
+ });
34
+ }
35
+ catch {
36
+ return url; // On any error, return original URL
37
+ }
38
+ }
39
+ function parseRedditComment(data, depth) {
40
+ if (!data || data.kind === 'more')
41
+ return null;
42
+ const d = data.kind === 't1' ? data.data : data;
43
+ if (!d || !d.body)
44
+ return null;
45
+ const replies = [];
46
+ if (depth > 0 && d.replies && d.replies.data?.children) {
47
+ for (const child of d.replies.data.children) {
48
+ const c = parseRedditComment(child, depth - 1);
49
+ if (c)
50
+ replies.push(c);
51
+ }
52
+ // Sort replies by score
53
+ replies.sort((a, b) => b.score - a.score);
54
+ replies.splice(3); // max 3 replies per level
55
+ }
56
+ return {
57
+ author: `u/${d.author || '[deleted]'}`,
58
+ text: d.body || '',
59
+ score: d.score || 0,
60
+ replies,
61
+ };
62
+ }
63
+ export async function redditExtractor(_html, url) {
64
+ // Resolve Reddit share URLs (/s/CODE) to actual post URLs before any processing
65
+ let workingUrl = url;
66
+ if (url.includes('/s/')) {
67
+ const resolved = await resolveRedditShareUrl(url);
68
+ if (resolved !== url) {
69
+ if (process.env.DEBUG)
70
+ console.debug('[webpeel]', `Reddit share URL resolved: ${url} → ${resolved}`);
71
+ workingUrl = resolved;
72
+ }
73
+ }
74
+ const urlObj = new URL(workingUrl);
75
+ const path = urlObj.pathname;
76
+ const domain = 'reddit.com';
77
+ // Normalize old.reddit.com → www.reddit.com for JSON API
78
+ const normalizedUrl = workingUrl.replace(/old\.reddit\.com/, 'www.reddit.com');
79
+ const REDDIT_UA = { 'User-Agent': 'WebPeel/0.17.1 (web data platform; https://webpeel.dev) Node.js' };
80
+ // Detect page type
81
+ const isPost = /\/r\/[^/]+\/comments\//.test(path) || /^\/comments\//.test(path);
82
+ const isGallery = /\/gallery\//.test(path);
83
+ // Subreddit with any sort/filter: /r/sub, /r/sub/, /r/sub/hot, /r/sub/top, /r/sub/new, /r/sub/rising
84
+ const isSubreddit = /^\/r\/[^/]+\/?$/.test(path) || /^\/r\/[^/]+\/(hot|new|top|rising|controversial|best)\/?$/.test(path);
85
+ const isUser = /^\/(u|user)\/[^/]+/.test(path);
86
+ const isSearch = /\/search\/?/.test(path);
87
+ // Home/popular/all pages
88
+ const isHomeListing = /^\/(hot|new|top|rising|controversial|best|popular|all)\/?$/.test(path) || path === '/' || path === '';
89
+ const type = isPost || isGallery ? 'post' : isSearch ? 'search' : isSubreddit ? 'subreddit' : isUser ? 'user' : isHomeListing ? 'listing' : 'listing';
90
+ if (isGallery) {
91
+ // Gallery posts: fetch the gallery JSON and extract the post data
92
+ const galleryJsonUrl = normalizedUrl.split('?')[0].replace(/\/?$/, '') + '.json?limit=25&sort=top';
93
+ const requestedGallerySub = path.match(/\/r\/([^/]+)/)?.[1] || 'unknown';
94
+ let galleryData;
95
+ try {
96
+ galleryData = await fetchJsonWithRetry(galleryJsonUrl, REDDIT_UA);
97
+ }
98
+ catch (e) {
99
+ return {
100
+ domain,
101
+ type: 'post',
102
+ structured: { error: 'Post not found or has been deleted', subreddit: `r/${requestedGallerySub}` },
103
+ cleanContent: `## ❌ Reddit Post Not Found\n\nThe post at r/${requestedGallerySub} could not be found. It may have been deleted or removed.`,
104
+ };
105
+ }
106
+ if (!Array.isArray(galleryData) || galleryData.length < 1) {
107
+ return {
108
+ domain,
109
+ type: 'post',
110
+ structured: { error: 'Post not found', subreddit: `r/${requestedGallerySub}` },
111
+ cleanContent: `## ❌ Reddit Post Not Found\n\nThe post at r/${requestedGallerySub} could not be found. It may have been deleted or removed.`,
112
+ };
113
+ }
114
+ const postData = galleryData[0]?.data?.children?.[0]?.data;
115
+ if (!postData) {
116
+ return {
117
+ domain,
118
+ type: 'post',
119
+ structured: { error: 'Post not found', subreddit: `r/${requestedGallerySub}` },
120
+ cleanContent: `## ❌ Reddit Post Not Found\n\nThe post at r/${requestedGallerySub} could not be found. It may have been deleted or removed.`,
121
+ };
122
+ }
123
+ // Validate subreddit matches the request
124
+ const actualGallerySub = postData.subreddit?.toLowerCase();
125
+ if (requestedGallerySub !== 'unknown' && actualGallerySub && requestedGallerySub.toLowerCase() !== actualGallerySub) {
126
+ return {
127
+ domain,
128
+ type: 'post',
129
+ structured: { error: 'Post not found in requested subreddit', requestedSubreddit: `r/${requestedGallerySub}`, actualSubreddit: `r/${actualGallerySub}` },
130
+ cleanContent: `## ❌ Reddit Post Not Found\n\nThe post was not found in r/${requestedGallerySub}. It may have been deleted or moved.`,
131
+ };
132
+ }
133
+ const structured = {
134
+ subreddit: `r/${postData.subreddit || ''}`,
135
+ title: postData.title || '',
136
+ author: `u/${postData.author || '[deleted]'}`,
137
+ score: postData.score ?? 0,
138
+ upvoteRatio: postData.upvote_ratio ?? 1,
139
+ url: postData.url || url,
140
+ selftext: postData.selftext || '',
141
+ commentCount: postData.num_comments ?? 0,
142
+ created: unixToIso(postData.created_utc),
143
+ flair: postData.link_flair_text || null,
144
+ comments: [],
145
+ isGallery: true,
146
+ };
147
+ const cleanContent = `## 📋 ${structured.subreddit}: ${structured.title}
148
+
149
+ **Posted by** ${structured.author} | Score: ${structured.score} | ${structured.commentCount} comments
150
+ *${structured.created}*
151
+
152
+ *(Gallery post)*`;
153
+ return { domain, type: 'post', structured, cleanContent };
154
+ }
155
+ if (isPost) {
156
+ // Fetch post data via Reddit JSON API
157
+ const jsonUrl = normalizedUrl.split('?')[0].replace(/\/?$/, '') + '.json?limit=25&sort=top';
158
+ const requestedPostSub = path.match(/\/r\/([^/]+)/)?.[1] || 'unknown';
159
+ let data;
160
+ try {
161
+ data = await fetchJsonWithRetry(jsonUrl, REDDIT_UA);
162
+ }
163
+ catch (e) {
164
+ // Post not found or API error — return a "not found" result
165
+ // instead of null (which would trigger browser fallback with wrong content)
166
+ return {
167
+ domain,
168
+ type: 'post',
169
+ structured: { error: 'Post not found or has been deleted', subreddit: `r/${requestedPostSub}` },
170
+ cleanContent: `## ❌ Reddit Post Not Found\n\nThe post at r/${requestedPostSub} could not be found. It may have been deleted or removed.`,
171
+ };
172
+ }
173
+ if (!Array.isArray(data) || data.length < 2) {
174
+ return {
175
+ domain,
176
+ type: 'post',
177
+ structured: { error: 'Post not found', subreddit: `r/${requestedPostSub}` },
178
+ cleanContent: `## ❌ Reddit Post Not Found\n\nThe post at r/${requestedPostSub} could not be found. It may have been deleted or removed.`,
179
+ };
180
+ }
181
+ const postData = data[0]?.data?.children?.[0]?.data;
182
+ if (!postData) {
183
+ return {
184
+ domain,
185
+ type: 'post',
186
+ structured: { error: 'Post not found', subreddit: `r/${requestedPostSub}` },
187
+ cleanContent: `## ❌ Reddit Post Not Found\n\nThe post at r/${requestedPostSub} could not be found. It may have been deleted or removed.`,
188
+ };
189
+ }
190
+ // CRITICAL: Validate subreddit matches the request (prevents cross-subreddit ID reuse exploits)
191
+ const actualPostSub = postData.subreddit?.toLowerCase();
192
+ if (requestedPostSub !== 'unknown' && actualPostSub && requestedPostSub.toLowerCase() !== actualPostSub) {
193
+ // Reddit reused the post ID in a different subreddit — return error instead of wrong content
194
+ return {
195
+ domain,
196
+ type: 'post',
197
+ structured: { error: 'Post not found in requested subreddit', requestedSubreddit: `r/${requestedPostSub}`, actualSubreddit: `r/${actualPostSub}` },
198
+ cleanContent: `## ❌ Reddit Post Not Found\n\nThe post was not found in r/${requestedPostSub}. It may have been deleted or moved.`,
199
+ };
200
+ }
201
+ // Parse top comments (max 20)
202
+ const commentChildren = data[1]?.data?.children || [];
203
+ const comments = [];
204
+ for (const child of commentChildren) {
205
+ const c = parseRedditComment(child, 3);
206
+ if (c)
207
+ comments.push(c);
208
+ if (comments.length >= 20)
209
+ break;
210
+ }
211
+ comments.sort((a, b) => b.score - a.score);
212
+ const structured = {
213
+ subreddit: `r/${postData.subreddit}`,
214
+ title: postData.title || '',
215
+ author: `u/${postData.author || '[deleted]'}`,
216
+ score: postData.score ?? 0,
217
+ upvoteRatio: postData.upvote_ratio ?? 1,
218
+ url: postData.url || url,
219
+ selftext: postData.selftext || '',
220
+ commentCount: postData.num_comments ?? 0,
221
+ created: unixToIso(postData.created_utc),
222
+ flair: postData.link_flair_text || null,
223
+ comments,
224
+ };
225
+ // Build clean markdown
226
+ const commentsMd = comments.slice(0, 10).map(c => {
227
+ const repliesMd = c.replies.slice(0, 2).map(r => ` > **${r.author}** (${r.score}): ${r.text.slice(0, 200)}`).join('\n');
228
+ return `**${c.author}** (score: ${c.score})\n${c.text.slice(0, 300)}${repliesMd ? '\n' + repliesMd : ''}`;
229
+ }).join('\n\n---\n\n');
230
+ const selftextSection = structured.selftext
231
+ ? `\n\n${structured.selftext.slice(0, 1000)}`
232
+ : '';
233
+ const cleanContent = `## 📋 ${structured.subreddit}: ${structured.title}
234
+
235
+ **Posted by** ${structured.author} | Score: ${structured.score} (${Math.round(structured.upvoteRatio * 100)}% upvoted) | ${structured.commentCount} comments
236
+ ${structured.flair ? `**Flair:** ${structured.flair}` : ''}
237
+ *${structured.created}*${selftextSection}
238
+
239
+ ---
240
+
241
+ ### Top Comments
242
+
243
+ ${commentsMd || '*No comments found.*'}`;
244
+ return { domain, type, structured, cleanContent };
245
+ }
246
+ if (isSearch) {
247
+ const subredditMatch = path.match(/\/r\/([^/]+)\/search/);
248
+ const subredditName = subredditMatch ? subredditMatch[1] : null;
249
+ // Extract search params from URL
250
+ const q = urlObj.searchParams.get('q') || '';
251
+ const sort = urlObj.searchParams.get('sort') || 'relevance';
252
+ const t = urlObj.searchParams.get('t') || 'all';
253
+ const after = urlObj.searchParams.get('after') || '';
254
+ const searchType = urlObj.searchParams.get('type') || '';
255
+ if (!q) {
256
+ return {
257
+ domain,
258
+ type: 'search',
259
+ structured: { error: 'No search query provided' },
260
+ cleanContent: '## ❌ No Search Query\n\nProvide a search query: /r/subreddit/search?q=your+query',
261
+ };
262
+ }
263
+ // Build JSON search URL
264
+ const searchParams = new URLSearchParams({
265
+ q,
266
+ sort,
267
+ t,
268
+ limit: '25',
269
+ raw_json: '1',
270
+ });
271
+ if (subredditName)
272
+ searchParams.set('restrict_sr', 'on');
273
+ if (after)
274
+ searchParams.set('after', after);
275
+ if (searchType)
276
+ searchParams.set('type', searchType);
277
+ const jsonUrl = subredditName
278
+ ? `https://www.reddit.com/r/${subredditName}/search.json?${searchParams}`
279
+ : `https://www.reddit.com/search.json?${searchParams}`;
280
+ let data;
281
+ try {
282
+ data = await fetchJsonWithRetry(jsonUrl, REDDIT_UA);
283
+ }
284
+ catch {
285
+ return {
286
+ domain,
287
+ type: 'search',
288
+ structured: { error: 'Search failed', query: q },
289
+ cleanContent: `## ❌ Reddit Search Failed\n\nCould not search for "${q}". Reddit may be rate-limiting.`,
290
+ };
291
+ }
292
+ // Handle comment search (type=comment returns t1 children)
293
+ if (searchType === 'comment') {
294
+ if (!data?.data?.children) {
295
+ return {
296
+ domain,
297
+ type: 'search',
298
+ structured: { query: q, comments: [], resultCount: 0 },
299
+ cleanContent: `## 🔍 Reddit Comment Search: "${q}"\n\nNo results found.`,
300
+ };
301
+ }
302
+ const comments = data.data.children
303
+ .filter((c) => c.kind === 't1')
304
+ .map((c) => parseRedditComment(c, 0))
305
+ .filter(Boolean);
306
+ const afterCursor = data.data.after || null;
307
+ const scope = subredditName ? `r/${subredditName}` : 'all of Reddit';
308
+ const structured = {
309
+ query: q,
310
+ scope,
311
+ sort,
312
+ timeFilter: t,
313
+ resultCount: comments.length,
314
+ comments,
315
+ after: afterCursor,
316
+ hasMore: !!afterCursor,
317
+ };
318
+ const commentsMd = comments.map((c, i) => `### ${i + 1}. **${c.author}** (score: ${c.score})\n${c.text.slice(0, 400)}${c.text.length > 400 ? '...' : ''}`).join('\n\n---\n\n');
319
+ const paginationNote = afterCursor
320
+ ? `\n\n*Page has more results. Add \`&after=${afterCursor}\` to get the next page.*`
321
+ : '';
322
+ const cleanContent = `## 🔍 Reddit Comment Search: "${q}" in ${scope}\n*Sorted by ${sort} | Time: ${t} | ${comments.length} results*\n\n${commentsMd}${paginationNote}`;
323
+ return { domain, type: 'search', structured, cleanContent };
324
+ }
325
+ if (!data?.data?.children) {
326
+ return {
327
+ domain,
328
+ type: 'search',
329
+ structured: { query: q, posts: [], resultCount: 0 },
330
+ cleanContent: `## 🔍 Reddit Search: "${q}"\n\nNo results found.`,
331
+ };
332
+ }
333
+ const posts = data.data.children
334
+ .filter((c) => c.kind === 't3')
335
+ .map((c) => {
336
+ const d = c.data;
337
+ return {
338
+ title: d.title || '',
339
+ author: `u/${d.author || '[deleted]'}`,
340
+ score: d.score ?? 0,
341
+ commentCount: d.num_comments ?? 0,
342
+ selftext: d.selftext || '',
343
+ subreddit: `r/${d.subreddit || ''}`,
344
+ url: `https://reddit.com${d.permalink}`,
345
+ created: unixToIso(d.created_utc),
346
+ flair: d.link_flair_text || null,
347
+ isNsfw: d.over_18 || false,
348
+ };
349
+ });
350
+ const afterCursor = data.data.after || null;
351
+ const scope = subredditName ? `r/${subredditName}` : 'all of Reddit';
352
+ const structured = {
353
+ query: q,
354
+ scope,
355
+ sort,
356
+ timeFilter: t,
357
+ resultCount: posts.length,
358
+ posts,
359
+ after: afterCursor, // pagination cursor
360
+ hasMore: !!afterCursor,
361
+ };
362
+ // Build clean markdown with full post text (not just snippets!)
363
+ const postsMd = posts.map((p, i) => {
364
+ const selftext = p.selftext
365
+ ? `\n${p.selftext.slice(0, 500)}${p.selftext.length > 500 ? '...' : ''}`
366
+ : '';
367
+ return `### ${i + 1}. ${p.title}\n**${p.author}** in ${p.subreddit} | ↑ ${p.score} | 💬 ${p.commentCount} comments${p.flair ? ` | ${p.flair}` : ''}\n*${p.created}*${selftext}\n[Read full thread →](${p.url})`;
368
+ }).join('\n\n---\n\n');
369
+ const paginationNote = afterCursor
370
+ ? `\n\n*Page has more results. Add \`&after=${afterCursor}\` to get the next page.*`
371
+ : '';
372
+ const cleanContent = `## 🔍 Reddit Search: "${q}" in ${scope}\n*Sorted by ${sort} | Time: ${t} | ${posts.length} results*\n\n${postsMd}${paginationNote}`;
373
+ return { domain, type: 'search', structured, cleanContent };
374
+ }
375
+ if (isSubreddit) {
376
+ // Fetch subreddit listing
377
+ // Preserve query params (especially t=day, t=week etc. for sorted views)
378
+ const queryString = urlObj.search || '';
379
+ const sortMatch = path.match(/\/r\/[^/]+\/(hot|new|top|rising|controversial|best)/);
380
+ const sortPath = sortMatch ? `/${sortMatch[1]}` : '';
381
+ const baseSubUrl = normalizedUrl.match(/\/r\/[^/]+/)?.[0] || normalizedUrl.split('?')[0];
382
+ const jsonUrl = `https://www.reddit.com${baseSubUrl}${sortPath}.json?limit=15${queryString ? '&' + queryString.slice(1) : ''}`;
383
+ const data = await fetchJsonWithRetry(jsonUrl, REDDIT_UA);
384
+ if (!data?.data?.children)
385
+ return null;
386
+ const posts = data.data.children
387
+ .filter((c) => c.kind === 't3')
388
+ .map((c) => {
389
+ const d = c.data;
390
+ return {
391
+ title: d.title || '',
392
+ author: `u/${d.author || '[deleted]'}`,
393
+ score: d.score ?? 0,
394
+ commentCount: d.num_comments ?? 0,
395
+ url: `https://reddit.com${d.permalink}`,
396
+ flair: d.link_flair_text || null,
397
+ };
398
+ });
399
+ const subredditName = posts[0]?.url?.match(/\/r\/([^/]+)\//)?.[1] || path.match(/\/r\/([^/]+)/)?.[1] || '';
400
+ const structured = { title: `r/${subredditName} — Top Posts`, subreddit: `r/${subredditName}`, posts };
401
+ const cleanContent = `## 📋 r/${subredditName} — Hot Posts
402
+
403
+ ${posts.map((p, i) => `${i + 1}. **${p.title}**\n ${p.author} | ↑ ${p.score} | 💬 ${p.commentCount}${p.flair ? ` | ${p.flair}` : ''}\n ${p.url}`).join('\n\n')}`;
404
+ return { domain, type, structured, cleanContent };
405
+ }
406
+ if (isHomeListing) {
407
+ const sortMatch = path.match(/\/(hot|new|top|rising|controversial|best|popular|all)/);
408
+ const sortType = sortMatch ? sortMatch[1] : 'hot';
409
+ const queryString = urlObj.search || '';
410
+ const jsonUrl = `https://www.reddit.com/${sortType}.json?limit=15${queryString ? '&' + queryString.slice(1) : ''}`;
411
+ const data = await fetchJsonWithRetry(jsonUrl, REDDIT_UA);
412
+ if (!data?.data?.children)
413
+ return null;
414
+ const posts = data.data.children
415
+ .filter((c) => c.kind === 't3')
416
+ .map((c) => {
417
+ const d = c.data;
418
+ return {
419
+ title: d.title || '',
420
+ author: `u/${d.author || '[deleted]'}`,
421
+ score: d.score ?? 0,
422
+ commentCount: d.num_comments ?? 0,
423
+ url: `https://reddit.com${d.permalink}`,
424
+ subreddit: `r/${d.subreddit}`,
425
+ flair: d.link_flair_text || null,
426
+ };
427
+ });
428
+ const structured = { title: `Reddit — ${sortType.charAt(0).toUpperCase() + sortType.slice(1)} Posts`, sortType, posts, postCount: posts.length };
429
+ const listMd = posts.map((p, i) => {
430
+ const flairTag = p.flair ? ` | ${p.flair}` : '';
431
+ return `${i + 1}. **${p.title}**\n ${p.author} in ${p.subreddit} | ↑ ${p.score} | 💬 ${p.commentCount}${flairTag}\n ${p.url}`;
432
+ }).join('\n\n');
433
+ const cleanContent = `## 📋 Reddit — ${sortType.charAt(0).toUpperCase() + sortType.slice(1)} Posts\n\n${listMd}`;
434
+ return { domain: 'reddit.com', type: 'listing', structured, cleanContent };
435
+ }
436
+ // User or other — fall back to null (let normal HTML extraction handle it)
437
+ return null;
438
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function redfinExtractor(_html: string, url: string): Promise<DomainExtractResult | null>;