@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,325 @@
1
+ /**
2
+ * POST /v1/extract — Structured JSON Schema extraction endpoint.
3
+ *
4
+ * Firecrawl-compatible: pass a URL + JSON schema, get structured data back.
5
+ *
6
+ * Auth: API key required (full or read scope)
7
+ * Body: { url, schema, prompt?, llm?, render? }
8
+ *
9
+ * Also exposes:
10
+ * GET /v1/extract/auto — Auto-extract known structured types from a URL
11
+ * POST /v1/extract/auto — Same but via POST body
12
+ */
13
+ import { Router } from 'express';
14
+ import crypto from 'crypto';
15
+ import { peel } from '../../index.js';
16
+ import { extractStructured, } from '../../core/structured-extract.js';
17
+ import { getDefaultLLMConfig, isFreeTierLimitError, } from '../../core/llm-provider.js';
18
+ const VALID_PROVIDERS = [
19
+ 'cloudflare',
20
+ 'openai',
21
+ 'anthropic',
22
+ 'google',
23
+ 'ollama',
24
+ 'cerebras',
25
+ ];
26
+ function reqId(req) {
27
+ return req.requestId || crypto.randomUUID();
28
+ }
29
+ export function createExtractRouter() {
30
+ const router = Router();
31
+ // ── POST /v1/extract ─────────────────────────────────────────────────────
32
+ router.post('/v1/extract', async (req, res) => {
33
+ try {
34
+ const { url, schema: schemaRaw, prompt, llm: llmRaw, render,
35
+ // Legacy fields for backward compat
36
+ llmApiKey, llmProvider, model: legacyModel, } = req.body;
37
+ // ── Validate URL ────────────────────────────────────────────────────
38
+ if (!url || typeof url !== 'string') {
39
+ res.status(400).json({
40
+ success: false,
41
+ error: {
42
+ type: 'invalid_request',
43
+ message: 'Missing or invalid "url" field in request body.',
44
+ hint: 'Pass a URL: { "url": "https://example.com", "schema": { ... } }',
45
+ docs: 'https://webpeel.dev/docs/errors#invalid-request',
46
+ },
47
+ requestId: reqId(req),
48
+ });
49
+ return;
50
+ }
51
+ if (url.length > 2048) {
52
+ res.status(400).json({
53
+ success: false,
54
+ error: {
55
+ type: 'invalid_url',
56
+ message: 'URL too long (max 2048 characters)',
57
+ docs: 'https://webpeel.dev/docs/errors#invalid-url',
58
+ },
59
+ requestId: reqId(req),
60
+ });
61
+ return;
62
+ }
63
+ try {
64
+ const parsed = new URL(url);
65
+ if (!['http:', 'https:'].includes(parsed.protocol)) {
66
+ res.status(400).json({
67
+ success: false,
68
+ error: {
69
+ type: 'invalid_url',
70
+ message: 'Only HTTP and HTTPS URLs are supported',
71
+ docs: 'https://webpeel.dev/docs/errors#invalid-url',
72
+ },
73
+ requestId: reqId(req),
74
+ });
75
+ return;
76
+ }
77
+ }
78
+ catch {
79
+ res.status(400).json({
80
+ success: false,
81
+ error: {
82
+ type: 'invalid_url',
83
+ message: `Invalid URL format: ${url}`,
84
+ hint: 'Ensure the URL is well-formed: https://example.com',
85
+ docs: 'https://webpeel.dev/docs/errors#invalid-url',
86
+ },
87
+ requestId: reqId(req),
88
+ });
89
+ return;
90
+ }
91
+ // ── Validate schema ─────────────────────────────────────────────────
92
+ if (!schemaRaw && !prompt) {
93
+ res.status(400).json({
94
+ success: false,
95
+ error: {
96
+ type: 'invalid_request',
97
+ message: 'Either "schema" or "prompt" is required for structured extraction.',
98
+ hint: 'Include a JSON schema in the request body: { "schema": { "type": "object", "properties": { ... } } }',
99
+ docs: 'https://webpeel.dev/docs/errors#invalid-request',
100
+ },
101
+ requestId: reqId(req),
102
+ });
103
+ return;
104
+ }
105
+ // Build or validate schema
106
+ let schema;
107
+ if (schemaRaw) {
108
+ if (typeof schemaRaw !== 'object' || schemaRaw === null || Array.isArray(schemaRaw)) {
109
+ res.status(400).json({
110
+ success: false,
111
+ error: {
112
+ type: 'invalid_request',
113
+ message: '"schema" must be a JSON object',
114
+ hint: '{ "type": "object", "properties": { "field": { "type": "string" } } }',
115
+ docs: 'https://webpeel.dev/docs/errors#invalid-request',
116
+ },
117
+ requestId: reqId(req),
118
+ });
119
+ return;
120
+ }
121
+ const schemaObj = schemaRaw;
122
+ // Accept both full JSON Schema and shorthand { field: "type" }
123
+ if (schemaObj.type === 'object' && schemaObj.properties) {
124
+ schema = schemaObj;
125
+ }
126
+ else {
127
+ // Shorthand: { "company_mission": "string", "is_open_source": "boolean" }
128
+ const props = {};
129
+ for (const [k, v] of Object.entries(schemaObj)) {
130
+ props[k] = { type: typeof v === 'string' ? v : 'string' };
131
+ }
132
+ schema = { type: 'object', properties: props };
133
+ }
134
+ }
135
+ else {
136
+ // No schema provided but prompt is — create a minimal schema
137
+ schema = { type: 'object', properties: { result: { type: 'string', description: prompt } } };
138
+ }
139
+ // ── Resolve LLM config ──────────────────────────────────────────────
140
+ let llmConfig;
141
+ if (llmRaw && typeof llmRaw === 'object' && !Array.isArray(llmRaw)) {
142
+ // New format: { "provider": "openai", "apiKey": "sk-...", "model": "..." }
143
+ const llmObj = llmRaw;
144
+ const provider = typeof llmObj.provider === 'string' ? llmObj.provider : 'openai';
145
+ if (!VALID_PROVIDERS.includes(provider)) {
146
+ res.status(400).json({
147
+ success: false,
148
+ error: {
149
+ type: 'invalid_request',
150
+ message: `Invalid "llm.provider". Must be one of: ${VALID_PROVIDERS.join(', ')}`,
151
+ docs: 'https://webpeel.dev/docs/errors#invalid-request',
152
+ },
153
+ requestId: reqId(req),
154
+ });
155
+ return;
156
+ }
157
+ llmConfig = {
158
+ provider: provider,
159
+ apiKey: typeof llmObj.apiKey === 'string' ? llmObj.apiKey : undefined,
160
+ model: typeof llmObj.model === 'string' ? llmObj.model : undefined,
161
+ endpoint: typeof llmObj.endpoint === 'string' ? llmObj.endpoint : undefined,
162
+ };
163
+ }
164
+ else if (typeof llmApiKey === 'string' && llmApiKey) {
165
+ // Legacy format: llmApiKey + llmProvider at top level
166
+ const provider = (typeof llmProvider === 'string' && VALID_PROVIDERS.includes(llmProvider))
167
+ ? llmProvider
168
+ : 'openai';
169
+ llmConfig = {
170
+ provider,
171
+ apiKey: llmApiKey,
172
+ model: typeof legacyModel === 'string' ? legacyModel : undefined,
173
+ };
174
+ }
175
+ else {
176
+ // Try server-side default (env vars)
177
+ const defaultCfg = getDefaultLLMConfig();
178
+ // Only use server default if it has a real key (not bare cloudflare)
179
+ if (defaultCfg.provider !== 'cloudflare' || (process.env.CLOUDFLARE_ACCOUNT_ID && process.env.CLOUDFLARE_API_TOKEN)) {
180
+ llmConfig = defaultCfg;
181
+ }
182
+ // If still no config, we'll use heuristic extraction
183
+ }
184
+ // ── Fetch page content ──────────────────────────────────────────────
185
+ const useRender = render === true || render === 'true';
186
+ const peelResult = await peel(url, {
187
+ format: 'markdown',
188
+ render: useRender,
189
+ noEscalate: !useRender, // prevent OOM: only browser when render=true explicitly
190
+ timeout: 30000,
191
+ readable: true,
192
+ });
193
+ const content = peelResult.content || '';
194
+ // ── Extract structured data ─────────────────────────────────────────
195
+ // Seed hints from domain-api structured data (GitHub stars/language, etc.)
196
+ // This lets heuristic extraction use pre-parsed structured fields as ground truth.
197
+ const domainHints = {};
198
+ const rawDomainData = peelResult.domainData?.structured;
199
+ if (rawDomainData && typeof rawDomainData === 'object') {
200
+ for (const [k, v] of Object.entries(rawDomainData)) {
201
+ if (v !== null && v !== undefined && v !== '') {
202
+ domainHints[k] = v;
203
+ }
204
+ }
205
+ }
206
+ const extractResult = await extractStructured(content, schema, llmConfig, typeof prompt === 'string' ? prompt : undefined, Object.keys(domainHints).length > 0 ? domainHints : undefined);
207
+ const method = llmConfig ? 'llm' : 'heuristic';
208
+ res.json({
209
+ success: true,
210
+ data: {
211
+ url: peelResult.url || url,
212
+ extracted: extractResult.data,
213
+ confidence: extractResult.confidence,
214
+ tokensUsed: extractResult.tokensUsed,
215
+ method,
216
+ },
217
+ });
218
+ }
219
+ catch (error) {
220
+ const msg = error instanceof Error ? error.message : String(error);
221
+ console.error('[/v1/extract] Error:', msg);
222
+ if (isFreeTierLimitError(error)) {
223
+ res.status(429).json({
224
+ success: false,
225
+ error: {
226
+ type: 'free_tier_limit',
227
+ message: error.message,
228
+ hint: 'Provide your own API key in the "llm" config object for unlimited use.',
229
+ docs: 'https://webpeel.dev/docs/extract#free-tier',
230
+ },
231
+ requestId: reqId(req),
232
+ });
233
+ return;
234
+ }
235
+ if (msg.includes('401') || msg.includes('Unauthorized') || msg.includes('authentication failed')) {
236
+ res.status(401).json({
237
+ success: false,
238
+ error: { type: 'llm_auth_failed', message: msg },
239
+ requestId: reqId(req),
240
+ });
241
+ return;
242
+ }
243
+ if (msg.includes('429') || msg.includes('rate limit')) {
244
+ res.status(429).json({
245
+ success: false,
246
+ error: {
247
+ type: 'llm_rate_limited',
248
+ message: msg,
249
+ hint: 'Try again in a moment or use a different LLM provider.',
250
+ docs: 'https://webpeel.dev/docs/errors#llm-rate-limited',
251
+ },
252
+ requestId: reqId(req),
253
+ });
254
+ return;
255
+ }
256
+ res.status(500).json({
257
+ success: false,
258
+ error: {
259
+ type: 'extraction_failed',
260
+ message: msg,
261
+ docs: 'https://webpeel.dev/docs/errors#extraction-failed',
262
+ },
263
+ requestId: reqId(req),
264
+ });
265
+ }
266
+ });
267
+ // ── GET /v1/extract/auto ─────────────────────────────────────────────────
268
+ router.get('/v1/extract/auto', async (req, res) => {
269
+ const url = req.query.url;
270
+ if (!url) {
271
+ res.status(400).json({
272
+ success: false,
273
+ error: {
274
+ type: 'missing_url',
275
+ message: 'Missing url parameter',
276
+ hint: 'Pass a URL: GET /v1/extract/auto?url=https://example.com',
277
+ docs: 'https://webpeel.dev/docs/errors#missing-url',
278
+ },
279
+ requestId: reqId(req),
280
+ });
281
+ return;
282
+ }
283
+ const { autoExtract } = await import('../../core/auto-extract.js');
284
+ const result = await peel(url, { format: 'html' });
285
+ const extracted = autoExtract(result.content || '', url);
286
+ res.json({ url, pageType: extracted.type, structured: extracted });
287
+ });
288
+ // ── POST /v1/extract/auto ────────────────────────────────────────────────
289
+ router.post('/v1/extract/auto', async (req, res) => {
290
+ const { url, ...rest } = req.body;
291
+ if (!url || typeof url !== 'string') {
292
+ res.status(400).json({
293
+ success: false,
294
+ error: {
295
+ type: 'missing_url',
296
+ message: 'Missing or invalid url field in request body',
297
+ hint: 'Pass a URL in the request body: { "url": "https://example.com" }',
298
+ docs: 'https://webpeel.dev/docs/errors#missing-url',
299
+ },
300
+ requestId: reqId(req),
301
+ });
302
+ return;
303
+ }
304
+ try {
305
+ const { autoExtract } = await import('../../core/auto-extract.js');
306
+ const result = await peel(url, { format: 'html', ...rest });
307
+ const extracted = autoExtract(result.content || '', url);
308
+ res.json({ url, pageType: extracted.type, structured: extracted });
309
+ }
310
+ catch (error) {
311
+ const msg = error instanceof Error ? error.message : 'Unknown error';
312
+ console.error('[/v1/extract/auto POST] Error:', msg);
313
+ res.status(500).json({
314
+ success: false,
315
+ error: {
316
+ type: 'extraction_failed',
317
+ message: msg,
318
+ docs: 'https://webpeel.dev/docs/errors#extraction-failed',
319
+ },
320
+ requestId: reqId(req),
321
+ });
322
+ }
323
+ });
324
+ return router;
325
+ }
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Feed discovery and parsing endpoint — GET /v1/feed
3
+ *
4
+ * Discovers and fetches RSS/Atom feeds for any website URL.
5
+ * Supports direct feed URLs as well as HTML pages (auto-discovers via <link> tags
6
+ * or probes common feed paths like /feed, /rss.xml, etc.).
7
+ *
8
+ * Query params:
9
+ * - url (required) — website URL or direct feed URL
10
+ * - limit (optional) — max items to return (default 20, max 100)
11
+ * - format (optional) — "json" (default) or "markdown"
12
+ */
13
+ import { Router } from 'express';
14
+ import { AuthStore } from '../auth-store.js';
15
+ export declare function createFeedRouter(_authStore: AuthStore): Router;
@@ -0,0 +1,311 @@
1
+ /**
2
+ * Feed discovery and parsing endpoint — GET /v1/feed
3
+ *
4
+ * Discovers and fetches RSS/Atom feeds for any website URL.
5
+ * Supports direct feed URLs as well as HTML pages (auto-discovers via <link> tags
6
+ * or probes common feed paths like /feed, /rss.xml, etc.).
7
+ *
8
+ * Query params:
9
+ * - url (required) — website URL or direct feed URL
10
+ * - limit (optional) — max items to return (default 20, max 100)
11
+ * - format (optional) — "json" (default) or "markdown"
12
+ */
13
+ import { Router } from 'express';
14
+ import { validateUrlForSSRF, SSRFError } from '../middleware/url-validator.js';
15
+ // ── Helpers: XML text extraction ──────────────────────────────────────────────
16
+ /** Extract the inner text of the first matching XML tag. */
17
+ function extractTag(xml, tag) {
18
+ // Try with namespace prefix first (e.g. dc:creator), then plain
19
+ const patterns = [
20
+ new RegExp(`<${tag}[^>]*><!\\[CDATA\\[([\\s\\S]*?)\\]\\]></${tag}>`, 'i'),
21
+ new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`, 'i'),
22
+ ];
23
+ for (const re of patterns) {
24
+ const m = xml.match(re);
25
+ if (m)
26
+ return m[1].trim();
27
+ }
28
+ return '';
29
+ }
30
+ /** Extract an attribute value from an XML/HTML tag. */
31
+ function extractAttr(tag, attr) {
32
+ const re = new RegExp(`${attr}=["']([^"']+)["']`, 'i');
33
+ const m = tag.match(re);
34
+ return m ? m[1].trim() : '';
35
+ }
36
+ /** Strip HTML tags from a string (for description cleanup). */
37
+ function stripHtml(html) {
38
+ return html.replace(/<[^>]+>/g, '').replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&amp;/g, '&').replace(/&quot;/g, '"').replace(/&#39;/g, "'").trim();
39
+ }
40
+ // ── RSS/Atom parser ───────────────────────────────────────────────────────────
41
+ /**
42
+ * Parse an RSS 2.0 or Atom feed XML string into a flat array of FeedItem objects.
43
+ * Uses regex — no external dependencies.
44
+ */
45
+ function parseRSSFeed(xml) {
46
+ const items = [];
47
+ // Detect feed type
48
+ const isAtom = /<feed[\s>]/i.test(xml);
49
+ if (isAtom) {
50
+ // ── Atom ───────────────────────────────────────────────────────────────
51
+ const entryRe = /<entry[\s>]([\s\S]*?)<\/entry>/gi;
52
+ let m;
53
+ while ((m = entryRe.exec(xml)) !== null) {
54
+ const entry = m[1];
55
+ // title
56
+ const title = stripHtml(extractTag(entry, 'title') || '');
57
+ // link — prefer <link rel="alternate" href="..."> else <link href="...">
58
+ let link = '';
59
+ const linkTagRe = /<link([^>]*)\/?>/gi;
60
+ let lt;
61
+ while ((lt = linkTagRe.exec(entry)) !== null) {
62
+ const attrs = lt[1];
63
+ const rel = extractAttr(attrs, 'rel') || 'alternate';
64
+ const href = extractAttr(attrs, 'href');
65
+ if (href && (rel === 'alternate' || rel === '')) {
66
+ link = href;
67
+ break;
68
+ }
69
+ if (href && !link)
70
+ link = href; // fallback
71
+ }
72
+ // description — prefer <content>, fallback <summary>
73
+ const content = extractTag(entry, 'content') || extractTag(entry, 'summary') || '';
74
+ const description = stripHtml(content).substring(0, 500);
75
+ // date — prefer <published>, fallback <updated>
76
+ const pubDate = extractTag(entry, 'published') || extractTag(entry, 'updated') || '';
77
+ // author
78
+ const authorBlock = entry.match(/<author[\s>]([\s\S]*?)<\/author>/i);
79
+ const author = authorBlock ? (extractTag(authorBlock[1], 'name') || '') : '';
80
+ // id
81
+ const guid = extractTag(entry, 'id') || link;
82
+ items.push({ title, link, description, pubDate, author, guid });
83
+ }
84
+ }
85
+ else {
86
+ // ── RSS 2.0 ────────────────────────────────────────────────────────────
87
+ const itemRe = /<item[\s>]([\s\S]*?)<\/item>/gi;
88
+ let m;
89
+ while ((m = itemRe.exec(xml)) !== null) {
90
+ const item = m[1];
91
+ const title = stripHtml(extractTag(item, 'title') || '');
92
+ const link = extractTag(item, 'link') || extractTag(item, 'feedburner:origLink') || '';
93
+ const rawDesc = extractTag(item, 'description') || extractTag(item, 'content:encoded') || '';
94
+ const description = stripHtml(rawDesc).substring(0, 500);
95
+ const pubDate = extractTag(item, 'pubDate') || extractTag(item, 'dc:date') || '';
96
+ const author = extractTag(item, 'author') || extractTag(item, 'dc:creator') || '';
97
+ const guid = extractTag(item, 'guid') || link;
98
+ items.push({ title, link, description, pubDate, author, guid });
99
+ }
100
+ }
101
+ return items;
102
+ }
103
+ // ── HTML feed discovery ───────────────────────────────────────────────────────
104
+ /**
105
+ * Scan an HTML document for <link rel="alternate" type="application/rss+xml"> tags
106
+ * and similar, returning discovered feed URLs resolved against the page URL.
107
+ */
108
+ function discoverFeeds(html, pageUrl) {
109
+ const feeds = [];
110
+ const seen = new Set();
111
+ // Match all <link ...> tags in the <head>
112
+ const linkRe = /<link([^>]+)>/gi;
113
+ let m;
114
+ while ((m = linkRe.exec(html)) !== null) {
115
+ const attrs = m[1];
116
+ const rel = extractAttr(attrs, 'rel').toLowerCase();
117
+ const type = extractAttr(attrs, 'type').toLowerCase();
118
+ const href = extractAttr(attrs, 'href');
119
+ if (rel !== 'alternate' || !href)
120
+ continue;
121
+ // Accept RSS, Atom, and generic XML feed types
122
+ const isFeed = type.includes('rss') ||
123
+ type.includes('atom') ||
124
+ type.includes('application/xml') ||
125
+ type.includes('text/xml');
126
+ if (!isFeed)
127
+ continue;
128
+ // Resolve relative URLs
129
+ let resolvedUrl;
130
+ try {
131
+ resolvedUrl = new URL(href, pageUrl).toString();
132
+ }
133
+ catch {
134
+ continue;
135
+ }
136
+ if (seen.has(resolvedUrl))
137
+ continue;
138
+ seen.add(resolvedUrl);
139
+ const title = extractAttr(attrs, 'title') || 'Feed';
140
+ feeds.push({ url: resolvedUrl, type, title });
141
+ }
142
+ return feeds;
143
+ }
144
+ // ── Router factory ────────────────────────────────────────────────────────────
145
+ export function createFeedRouter(_authStore) {
146
+ const router = Router();
147
+ router.get('/v1/feed', async (req, res) => {
148
+ try {
149
+ const url = req.query.url;
150
+ const limitRaw = parseInt(req.query.limit || '20', 10);
151
+ const limit = isNaN(limitRaw) ? 20 : Math.min(limitRaw, 100);
152
+ const format = req.query.format || 'json';
153
+ // ── Validate required param ──────────────────────────────────────────
154
+ if (!url) {
155
+ res.status(400).json({
156
+ success: false,
157
+ error: { type: 'invalid_request', message: 'Missing required parameter: "url"' },
158
+ });
159
+ return;
160
+ }
161
+ // ── SSRF guard ───────────────────────────────────────────────────────
162
+ try {
163
+ validateUrlForSSRF(url);
164
+ }
165
+ catch (e) {
166
+ if (e instanceof SSRFError) {
167
+ res.status(400).json({
168
+ success: false,
169
+ error: { type: 'invalid_request', message: e.message },
170
+ });
171
+ return;
172
+ }
173
+ throw e;
174
+ }
175
+ let feedUrl = url;
176
+ let feedItems = [];
177
+ // ── Fetch the URL ────────────────────────────────────────────────────
178
+ const response = await fetch(url, {
179
+ headers: {
180
+ 'User-Agent': 'WebPeel/0.21 (+https://webpeel.dev/bot)',
181
+ 'Accept': 'application/rss+xml, application/atom+xml, application/xml, text/xml, text/html, */*',
182
+ },
183
+ signal: AbortSignal.timeout(10000),
184
+ });
185
+ if (!response.ok) {
186
+ res.status(502).json({
187
+ success: false,
188
+ error: { type: 'fetch_error', message: `Failed to fetch URL: HTTP ${response.status}` },
189
+ });
190
+ return;
191
+ }
192
+ const contentType = response.headers.get('content-type') || '';
193
+ const text = await response.text();
194
+ const trimmed = text.trimStart();
195
+ const looksLikeFeed = contentType.includes('xml') ||
196
+ contentType.includes('rss') ||
197
+ contentType.includes('atom') ||
198
+ trimmed.startsWith('<?xml') ||
199
+ trimmed.startsWith('<rss') ||
200
+ trimmed.startsWith('<feed');
201
+ if (looksLikeFeed) {
202
+ // ── Direct feed URL ────────────────────────────────────────────────
203
+ feedItems = parseRSSFeed(text);
204
+ }
205
+ else {
206
+ // ── HTML page — discover feeds ─────────────────────────────────────
207
+ const feedLinks = discoverFeeds(text, url);
208
+ if (feedLinks.length > 0) {
209
+ // Fetch the first (highest-priority) discovered feed
210
+ feedUrl = feedLinks[0].url;
211
+ try {
212
+ validateUrlForSSRF(feedUrl);
213
+ }
214
+ catch {
215
+ // If discovered feed URL is blocked, fall through to probe paths
216
+ feedUrl = url;
217
+ }
218
+ if (feedUrl !== url) {
219
+ const feedRes = await fetch(feedUrl, {
220
+ headers: { 'User-Agent': 'WebPeel/0.21 (+https://webpeel.dev/bot)' },
221
+ signal: AbortSignal.timeout(10000),
222
+ });
223
+ if (feedRes.ok) {
224
+ const feedText = await feedRes.text();
225
+ feedItems = parseRSSFeed(feedText);
226
+ }
227
+ }
228
+ }
229
+ // If still no items, probe common feed paths
230
+ if (feedItems.length === 0) {
231
+ const baseUrl = new URL(url).origin;
232
+ const commonPaths = [
233
+ '/feed',
234
+ '/rss',
235
+ '/rss.xml',
236
+ '/feed.xml',
237
+ '/atom.xml',
238
+ '/feed/rss',
239
+ '/blog/feed',
240
+ '/blog/rss',
241
+ '/index.xml',
242
+ ];
243
+ for (const path of commonPaths) {
244
+ const candidateUrl = baseUrl + path;
245
+ try {
246
+ validateUrlForSSRF(candidateUrl);
247
+ }
248
+ catch {
249
+ continue;
250
+ }
251
+ try {
252
+ const probeRes = await fetch(candidateUrl, {
253
+ headers: { 'User-Agent': 'WebPeel/0.21 (+https://webpeel.dev/bot)' },
254
+ signal: AbortSignal.timeout(3000),
255
+ });
256
+ if (!probeRes.ok)
257
+ continue;
258
+ const probeText = await probeRes.text();
259
+ const probeTrimmed = probeText.trimStart();
260
+ if (probeTrimmed.startsWith('<?xml') ||
261
+ probeTrimmed.startsWith('<rss') ||
262
+ probeTrimmed.startsWith('<feed')) {
263
+ feedItems = parseRSSFeed(probeText);
264
+ feedUrl = candidateUrl;
265
+ break;
266
+ }
267
+ }
268
+ catch {
269
+ // Continue to next candidate
270
+ }
271
+ }
272
+ }
273
+ }
274
+ // ── Trim to limit ────────────────────────────────────────────────────
275
+ feedItems = feedItems.slice(0, limit);
276
+ // ── Format response ──────────────────────────────────────────────────
277
+ if (format === 'markdown') {
278
+ const md = feedItems
279
+ .map((item, i) => `${i + 1}. **${item.title || '(no title)'}**\n ${item.link || ''}\n ${item.pubDate || ''}\n ${item.description?.substring(0, 200) || ''}`)
280
+ .join('\n\n');
281
+ res.json({
282
+ success: true,
283
+ data: {
284
+ feedUrl,
285
+ format: 'markdown',
286
+ content: md,
287
+ itemCount: feedItems.length,
288
+ },
289
+ });
290
+ }
291
+ else {
292
+ res.json({
293
+ success: true,
294
+ data: {
295
+ feedUrl,
296
+ items: feedItems,
297
+ itemCount: feedItems.length,
298
+ },
299
+ });
300
+ }
301
+ }
302
+ catch (err) {
303
+ const message = err instanceof Error ? err.message : String(err);
304
+ res.status(500).json({
305
+ success: false,
306
+ error: { type: 'internal', message },
307
+ });
308
+ }
309
+ });
310
+ return router;
311
+ }
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Queue-backed /v1/fetch and /v1/render endpoints.
3
+ *
4
+ * Used when API_MODE=queue (microservices mode).
5
+ * Instead of calling peel() directly, jobs are enqueued in Bull
6
+ * and results are polled from Redis via GET /v1/jobs/:id.
7
+ *
8
+ * POST /v1/fetch → enqueue in webpeel:fetch queue → return { jobId, status }
9
+ * POST /v1/render → enqueue in webpeel:render queue → return { jobId, status }
10
+ * GET /v1/jobs/:id → return job status + result from Redis
11
+ */
12
+ import { Router } from 'express';
13
+ export declare function createQueueFetchRouter(): Router;