@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,632 @@
1
+ /**
2
+ * WebPeel API Server
3
+ * Express-based REST API for hosted deployments
4
+ */
5
+ // Force IPv4-first DNS resolution to prevent IPv6 failures in containers
6
+ // (Render's Docker containers can't do IPv6 outbound, causing IANA/Cloudflare sites to fail)
7
+ import dns from 'dns';
8
+ dns.setDefaultResultOrder('ipv4first');
9
+ import express from 'express';
10
+ import './types.js'; // Augments Express.Request with requestId
11
+ import cors from 'cors';
12
+ import helmet from 'helmet';
13
+ import { createLogger } from './logger.js';
14
+ const log = createLogger('server');
15
+ import { InMemoryAuthStore } from './auth-store.js';
16
+ import { PostgresAuthStore } from './pg-auth-store.js';
17
+ import { createAuthMiddleware } from './middleware/auth.js';
18
+ import { createRateLimitMiddleware, RateLimiter } from './middleware/rate-limit.js';
19
+ import { createHealthRouter } from './routes/health.js';
20
+ import { createFetchRouter } from './routes/fetch.js';
21
+ import { createSearchRouter } from './routes/search.js';
22
+ import { createSmartSearchRouter } from './routes/smart-search/index.js';
23
+ import { createUserRouter } from './routes/users.js';
24
+ import { createStripeRouter, createBillingPortalRouter } from './routes/stripe.js';
25
+ import { createOAuthRouter } from './routes/oauth.js';
26
+ import { createStatsRouter } from './routes/stats.js';
27
+ import { createActivityRouter } from './routes/activity.js';
28
+ import { createCLIUsageRouter } from './routes/cli-usage.js';
29
+ import { createUsageRouter } from './routes/usage.js';
30
+ import { createAdminStatsRouter } from './routes/admin-stats.js';
31
+ import { createAdminActiveRouter } from './routes/admin-active.js';
32
+ import { createJobsRouter } from './routes/jobs.js';
33
+ import { createBatchRouter } from './routes/batch.js';
34
+ import { createAnswerRouter } from './routes/answer.js';
35
+ import { createDeepResearchRouter } from './routes/deep-research.js';
36
+ import { createResearchRouter } from './routes/research.js';
37
+ import { createAskRouter } from './routes/ask.js';
38
+ import { createMcpRouter } from './routes/mcp.js';
39
+ import { createDoRouter } from './routes/do.js';
40
+ import { createYouTubeRouter } from './routes/youtube.js';
41
+ import { createTranscriptExportRouter } from './routes/transcript-export.js';
42
+ import { createDeepFetchRouter } from './routes/deep-fetch.js';
43
+ import { createFeedRouter } from './routes/feed.js';
44
+ import { createGoRouter } from './routes/go.js';
45
+ import { createWatchRouter } from './routes/watch.js';
46
+ import pg from 'pg';
47
+ import { createScreenshotRouter } from './routes/screenshot.js';
48
+ import { createDemoRouter } from './routes/demo.js';
49
+ import { createPlaygroundRouter } from './routes/playground.js';
50
+ import { createReaderRouter } from './routes/reader.js';
51
+ import { createSharePublicRouter, createShareRouter } from './routes/share.js';
52
+ import { createJobQueue } from './job-queue.js';
53
+ import { createQueueFetchRouter } from './routes/fetch-queue.js';
54
+ import { createCompatRouter } from './routes/compat.js';
55
+ import { createCrawlRouter } from './routes/crawl.js';
56
+ import { createMapRouter } from './routes/map.js';
57
+ import { createExtractRouter } from './routes/extract.js';
58
+ import { createAgentRouter } from './routes/agent.js';
59
+ import { createSessionRouter } from './routes/session.js';
60
+ import { createSentryHooks } from './sentry.js';
61
+ import { createMetricsRouter, metricsMiddleware } from './routes/metrics.js';
62
+ import { requireScope } from './middleware/scope-guard.js';
63
+ import { auditMiddleware } from './middleware/audit-log.js';
64
+ import { createCacheWarmRouter, startCacheWarmer } from './routes/cache-warm.js';
65
+ import { warmup, cleanup as cleanupFetcher } from '../core/fetcher.js';
66
+ // Proprietary modules — loaded dynamically so the build works without TypeScript source.
67
+ let setExtractorRedis;
68
+ let registerPremiumHooks;
69
+ try {
70
+ const de = await import('../ee/domain-extractors.js');
71
+ setExtractorRedis = de.setExtractorRedis;
72
+ }
73
+ catch { /* ee module not available */ }
74
+ try {
75
+ const ph = await import('../ee/premium-hooks.js');
76
+ registerPremiumHooks = ph.registerPremiumHooks;
77
+ }
78
+ catch { /* ee module not available */ }
79
+ import { readFileSync } from 'fs';
80
+ import { join, dirname } from 'path';
81
+ import { fileURLToPath } from 'url';
82
+ // Resolve path to the OpenAPI spec (works from both src/ and dist/)
83
+ const __dirname_app = dirname(fileURLToPath(import.meta.url));
84
+ let _openApiYaml = null;
85
+ function getOpenApiYaml() {
86
+ if (_openApiYaml !== null)
87
+ return _openApiYaml;
88
+ try {
89
+ // Try src/server/openapi.yaml relative to compiled dist/server/
90
+ const candidates = [
91
+ join(__dirname_app, 'openapi.yaml'),
92
+ join(__dirname_app, '..', '..', 'src', 'server', 'openapi.yaml'),
93
+ ];
94
+ for (const candidate of candidates) {
95
+ try {
96
+ _openApiYaml = readFileSync(candidate, 'utf-8');
97
+ return _openApiYaml;
98
+ }
99
+ catch {
100
+ // try next
101
+ }
102
+ }
103
+ throw new Error('openapi.yaml not found');
104
+ }
105
+ catch (e) {
106
+ return '# openapi.yaml not found\n';
107
+ }
108
+ }
109
+ // ─── Graceful shutdown state ─────────────────────────────────────────────────
110
+ // Shared between createApp (middleware reads it) and startServer (shutdown sets it).
111
+ let isShuttingDown = false;
112
+ const activeRequests = new Set();
113
+ export function createApp(config = {}) {
114
+ const app = express();
115
+ // SECURITY: Trust proxy for Render/production (HTTPS only)
116
+ app.set('trust proxy', 1);
117
+ // ─── Request ID ─────────────────────────────────────────────────────────────
118
+ // Generate a UUID v4 for every request so errors and logs are traceable.
119
+ // Must run before all other middleware so req.requestId is always set.
120
+ app.use((req, res, next) => {
121
+ req.requestId = crypto.randomUUID();
122
+ res.setHeader('X-Request-Id', req.requestId);
123
+ next();
124
+ });
125
+ // ─── Shutdown-aware request tracking ────────────────────────────────────────
126
+ // Must be AFTER request ID (needs req.requestId) and BEFORE timeouts/auth.
127
+ // During shutdown: returns 503 to new requests; tracks active requests to drain.
128
+ app.use((req, res, next) => {
129
+ if (isShuttingDown) {
130
+ res.setHeader('Retry-After', '10');
131
+ res.status(503).json({
132
+ success: false,
133
+ error: {
134
+ type: 'service_unavailable',
135
+ message: 'Server is shutting down. Retry shortly.',
136
+ docs: 'https://webpeel.dev/docs/errors#service-unavailable',
137
+ },
138
+ });
139
+ return;
140
+ }
141
+ const id = req.requestId;
142
+ activeRequests.add(id);
143
+ const cleanup = () => activeRequests.delete(id);
144
+ res.on('finish', cleanup);
145
+ res.on('close', cleanup);
146
+ next();
147
+ });
148
+ // ─── Prometheus metrics middleware ──────────────────────────────────────────
149
+ // Records request counts, durations, and active request gauge.
150
+ app.use(metricsMiddleware());
151
+ // Hard server-side timeouts — no request runs longer than this
152
+ app.use((req, res, next) => {
153
+ const path = req.path;
154
+ let timeoutMs = 30000; // 30s default
155
+ const urlParam = req.query?.url || '';
156
+ if (path.includes('/crawl') || path.includes('/map'))
157
+ timeoutMs = 300000; // 5min for crawls
158
+ else if (path.includes('/batch'))
159
+ timeoutMs = 120000; // 2min for batch
160
+ else if (path.includes('/screenshot'))
161
+ timeoutMs = 60000; // 1min for screenshots
162
+ else if (path.includes('/search/smart'))
163
+ timeoutMs = 45000; // 45s for smart search (Yelp+Reddit+Ollama chain)
164
+ else if (req.query?.render === 'true' || req.query?.stealth === 'true')
165
+ timeoutMs = 60000; // 1min for browser/stealth fetches
166
+ else if (urlParam.includes('youtube.com') || urlParam.includes('youtu.be'))
167
+ timeoutMs = 90000; // 90s for YouTube (yt-dlp needs time after simpleFetch fails)
168
+ req.setTimeout(timeoutMs);
169
+ res.setTimeout(timeoutMs, () => {
170
+ if (!res.headersSent) {
171
+ res.status(504).json({
172
+ success: false,
173
+ error: {
174
+ type: 'timeout',
175
+ message: `Request timed out after ${timeoutMs / 1000}s`,
176
+ hint: 'Try reducing the scope of your request or upgrading your plan for higher limits.',
177
+ docs: 'https://webpeel.dev/docs/errors#timeout',
178
+ },
179
+ metadata: { requestId: req.requestId },
180
+ });
181
+ }
182
+ });
183
+ next();
184
+ });
185
+ // Optional error tracking (enabled only when SENTRY_DSN is set)
186
+ const sentry = createSentryHooks();
187
+ if (sentry.requestHandler) {
188
+ app.use(sentry.requestHandler);
189
+ }
190
+ // Stripe webhook route MUST come before express.json() to get raw body
191
+ const stripeRouter = createStripeRouter();
192
+ app.use('/v1/webhooks/stripe', express.raw({ type: 'application/json' }), stripeRouter);
193
+ // Middleware
194
+ // SECURITY: Limit request body size to prevent DoS
195
+ app.use(express.json({ limit: '1mb' }));
196
+ // Security headers via Helmet
197
+ app.use(helmet({
198
+ contentSecurityPolicy: false, // Disabled — API serves JSON, not HTML pages
199
+ crossOriginEmbedderPolicy: false, // Allow embedding for widget/docs
200
+ hsts: { maxAge: 31536000, includeSubDomains: true, preload: true },
201
+ referrerPolicy: { policy: 'strict-origin-when-cross-origin' },
202
+ frameguard: { action: 'deny' }, // Prevent clickjacking
203
+ noSniff: true, // X-Content-Type-Options: nosniff
204
+ xssFilter: true, // X-XSS-Protection
205
+ }));
206
+ // Audit logging — records who accessed which /v1/ endpoints and the outcome.
207
+ // Privacy-safe: logs userId, method, path, status, duration, IP — no bodies or secrets.
208
+ app.use(auditMiddleware);
209
+ // GDPR / Privacy: Data retention policy header — indicates we only store metadata, not fetched content
210
+ app.use((_req, res, next) => {
211
+ res.setHeader('X-Data-Retention', 'metadata-only');
212
+ next();
213
+ });
214
+ // CORS configuration
215
+ // Always allow our own domains + any env-configured origins
216
+ const envOrigins = process.env.CORS_ORIGINS ? process.env.CORS_ORIGINS.split(',').map(s => s.trim()) : [];
217
+ const defaultOrigins = [
218
+ 'https://app.webpeel.dev',
219
+ 'https://webpeel.dev',
220
+ // Only allow localhost in development (security: prevents credentialed cross-origin from local pages)
221
+ ...(process.env.NODE_ENV !== 'production' ? ['http://localhost:3000', 'http://localhost:3001'] : []),
222
+ ];
223
+ const corsOrigins = config.corsOrigins || [...new Set([...defaultOrigins, ...envOrigins])];
224
+ app.use(cors({
225
+ origin: (origin, callback) => {
226
+ // Allow requests with no origin (e.g. curl, server-to-server)
227
+ if (!origin)
228
+ return callback(null, true);
229
+ if (corsOrigins.includes(origin))
230
+ return callback(null, origin);
231
+ // Unknown origins: allow (API key clients need cross-origin access) but no credentials.
232
+ // SECURITY: Return '*' instead of reflecting the origin — wildcard is incompatible with
233
+ // credentials (browsers reject Allow-Credentials + *), prevents origin-specific CORS caching,
234
+ // and avoids security-scanner false positives from reflected origins.
235
+ return callback(null, '*');
236
+ },
237
+ // credentials: set conditionally via post-cors middleware below
238
+ credentials: false,
239
+ }));
240
+ // Set Access-Control-Allow-Credentials only for trusted origins
241
+ app.use((req, res, next) => {
242
+ const origin = req.headers.origin;
243
+ if (origin && corsOrigins.includes(origin)) {
244
+ res.setHeader('Access-Control-Allow-Credentials', 'true');
245
+ }
246
+ next();
247
+ });
248
+ // GDPR / Privacy: Data retention policy header — indicates we only store metadata, not fetched content
249
+ app.use((_req, res, next) => {
250
+ res.setHeader('X-Data-Retention', 'metadata-only');
251
+ next();
252
+ });
253
+ // SECURITY: Security headers
254
+ app.disable('x-powered-by');
255
+ app.use((_req, res, next) => {
256
+ res.setHeader('X-Content-Type-Options', 'nosniff');
257
+ res.setHeader('X-Frame-Options', 'DENY');
258
+ res.setHeader('Strict-Transport-Security', 'max-age=63072000; includeSubDomains; preload');
259
+ res.setHeader('X-XSS-Protection', '1; mode=block');
260
+ res.setHeader('Referrer-Policy', 'strict-origin-when-cross-origin');
261
+ res.setHeader('Permissions-Policy', 'geolocation=(), microphone=(), camera=()');
262
+ // API-safe CSP: JSON-only API does not need scripts/styles/fonts.
263
+ // Keep this strict to reduce attack surface without affecting API clients.
264
+ res.setHeader('Content-Security-Policy', "default-src 'none'; frame-ancestors 'none'; base-uri 'none'; form-action 'none'");
265
+ // Best-effort removal of Render's origin header (may be re-added by proxy)
266
+ res.removeHeader('x-render-origin-server');
267
+ next();
268
+ });
269
+ // SECURITY: JSON parse error handler
270
+ app.use((err, req, res, next) => {
271
+ if (err instanceof SyntaxError && 'body' in err) {
272
+ res.status(400).json({
273
+ success: false,
274
+ error: {
275
+ type: 'invalid_request',
276
+ message: 'Malformed JSON in request body',
277
+ hint: 'Ensure the request body is valid JSON',
278
+ docs: 'https://webpeel.dev/docs/api-reference#errors',
279
+ },
280
+ requestId: req.requestId,
281
+ });
282
+ return;
283
+ }
284
+ next(err);
285
+ });
286
+ // Auth store - Use PostgreSQL if DATABASE_URL is set, otherwise in-memory
287
+ const usePostgres = config.usePostgres ?? !!process.env.DATABASE_URL;
288
+ const authStore = usePostgres
289
+ ? new PostgresAuthStore()
290
+ : new InMemoryAuthStore();
291
+ log.info(`Using ${usePostgres ? 'PostgreSQL' : 'in-memory'} auth store`);
292
+ // PostgreSQL pool for features that need direct DB access (watch, etc.)
293
+ const pool = process.env.DATABASE_URL
294
+ ? new pg.Pool({
295
+ connectionString: process.env.DATABASE_URL,
296
+ ssl: process.env.NODE_ENV === 'production' ? { rejectUnauthorized: true } : false,
297
+ })
298
+ : null;
299
+ // Job queue - Use PostgreSQL if DATABASE_URL is set, otherwise in-memory
300
+ const jobQueue = createJobQueue();
301
+ // Rate limiter
302
+ const rateLimiter = new RateLimiter(config.rateLimitWindowMs || 3_600_000); // 1 hour
303
+ // Clean up rate limiter every 5 minutes
304
+ setInterval(() => {
305
+ rateLimiter.cleanup();
306
+ }, 5 * 60 * 1000);
307
+ // Health check MUST be before auth/rate-limit middleware
308
+ // Render hits /health every ~30s; rate-limiting it causes 429 → service marked as failed
309
+ // Pass pool so /ready can check DB connectivity
310
+ app.use(createHealthRouter(pool));
311
+ // Prometheus metrics — admin or internal IPs (K8s Prometheus scraper) only
312
+ app.use(createMetricsRouter());
313
+ // Affiliate redirect — /go/:store/*path — public, no auth required
314
+ app.use(createGoRouter());
315
+ // OpenAPI spec — public, no auth required
316
+ app.get('/openapi.yaml', (_req, res) => {
317
+ res.setHeader('Content-Type', 'application/yaml; charset=utf-8');
318
+ res.setHeader('Cache-Control', 'public, max-age=3600');
319
+ res.send(getOpenApiYaml());
320
+ });
321
+ // Redirect /openapi.json to YAML spec (no extra dependency needed)
322
+ app.get('/openapi.json', (_req, res) => {
323
+ res.redirect(301, '/openapi.yaml');
324
+ });
325
+ // Developer-friendly redirect
326
+ app.get('/docs/api', (_req, res) => {
327
+ res.redirect('/openapi.yaml');
328
+ });
329
+ // Internal cache-warming endpoints — unauthenticated (self-auth via bearer token)
330
+ // Must be BEFORE auth middleware so the CF Worker can call without an API key
331
+ app.use(createCacheWarmRouter(pool));
332
+ // Demo endpoint — unauthenticated, must be before auth middleware
333
+ app.use(createDemoRouter());
334
+ // Playground endpoint — unauthenticated, CORS-locked to webpeel.dev/localhost
335
+ app.use('/v1/playground', createPlaygroundRouter());
336
+ // Public share endpoint — GET /s/:id (no auth required, must be before reader router)
337
+ // Registered first so valid share IDs are served before falling through to reader's /s/* search
338
+ app.use(createSharePublicRouter(pool));
339
+ // Zero-auth reader API — Jina-style URL prefix (/r/URL) and search (/s/query)
340
+ // Must be BEFORE auth middleware so no API key is required
341
+ app.use(createReaderRouter());
342
+ // Apply auth middleware globally
343
+ app.use(createAuthMiddleware(authStore));
344
+ // Apply rate limiting middleware globally
345
+ app.use(createRateLimitMiddleware(rateLimiter));
346
+ // Share links — POST /v1/share (auth required, after auth middleware)
347
+ app.use(createShareRouter(pool));
348
+ // First-class native routes (registered before compat so they take precedence)
349
+ //
350
+ // Scope guards enforce API key permission scopes; JWT sessions bypass them.
351
+ // For routers with relative paths: app.use(path, guard, router) ← prefix stripped, relative paths match
352
+ // For routers with absolute paths: app.use(path, guard) then app.use(router) ← guard at path, router sees full path
353
+ // /v1/crawl — full or read only (router uses relative paths)
354
+ app.use('/v1/crawl', requireScope('full', 'read'), createCrawlRouter(jobQueue));
355
+ // /v1/map — full or read only (router uses relative paths)
356
+ app.use('/v1/map', requireScope('full', 'read'), createMapRouter());
357
+ // Compat routes (/v1/scrape, /v1/search) — all scopes allowed, no guard needed
358
+ app.use(createCompatRouter(jobQueue));
359
+ app.use(createSessionRouter());
360
+ // /v1/extract — full or read only (router uses absolute paths, guard before router)
361
+ app.use('/v1/extract', requireScope('full', 'read'));
362
+ app.use(createExtractRouter());
363
+ // /v1/deep-fetch — full or read only (router uses absolute paths, guard before router)
364
+ app.use('/v1/deep-fetch', requireScope('full', 'read'));
365
+ app.use(createDeepFetchRouter());
366
+ // /v1/watch — full or read only (router uses absolute paths, guard before router)
367
+ if (pool) {
368
+ app.use('/v1/watch', requireScope('full', 'read'));
369
+ app.use(createWatchRouter(pool));
370
+ }
371
+ // /v1/fetch, /v1/search — all scopes allowed, no guard needed
372
+ // In queue mode (API_MODE=queue), /v1/fetch and /v1/render are replaced by
373
+ // queue-backed endpoints that enqueue Bull jobs and return { jobId, status }.
374
+ // GET /v1/jobs/:id is also provided by the queue router for result polling.
375
+ if (process.env.API_MODE === 'queue') {
376
+ app.use(createQueueFetchRouter());
377
+ }
378
+ else {
379
+ app.use(createFetchRouter(authStore));
380
+ }
381
+ // /v1/screenshot — full or read only (router uses absolute paths, guard before router)
382
+ app.use('/v1/screenshot', requireScope('full', 'read'));
383
+ app.use(createScreenshotRouter(authStore));
384
+ // /v1/feed — feed discovery and parsing (all scopes allowed, no scope guard needed)
385
+ app.use(createFeedRouter(authStore));
386
+ app.use(createSearchRouter(authStore));
387
+ // /v1/search/smart — intent detection + travel/commerce routing (POST)
388
+ app.use(createSmartSearchRouter(authStore));
389
+ // /v1/research — lightweight research (search → fetch → compile), BYOK LLM optional
390
+ app.use('/v1/research', requireScope('full', 'read'));
391
+ app.use(createResearchRouter());
392
+ app.use(createBillingPortalRouter(pool));
393
+ app.use(createUserRouter());
394
+ app.use(createOAuthRouter());
395
+ app.use(createStatsRouter(authStore));
396
+ app.use(createActivityRouter(authStore));
397
+ app.use(createCLIUsageRouter());
398
+ // Usage API — authenticated users (professional usage tracking)
399
+ app.use(createUsageRouter());
400
+ // Admin endpoints — admin tier only (routers check internally)
401
+ app.use(createAdminStatsRouter());
402
+ app.use(createAdminActiveRouter());
403
+ app.use(createJobsRouter(jobQueue, authStore));
404
+ // /v1/batch — full or read only (router uses absolute paths, guard before router)
405
+ app.use('/v1/batch', requireScope('full', 'read'));
406
+ app.use(createBatchRouter(jobQueue));
407
+ // Deprecation headers for declining endpoints
408
+ app.use('/v1/answer', (_req, res, next) => {
409
+ res.set('Deprecation', 'true');
410
+ res.set('Sunset', '2026-06-01');
411
+ res.set('Link', '</v1/ask>; rel="successor-version"');
412
+ next();
413
+ });
414
+ // /v1/answer, /v1/ask — all scopes allowed, no guard needed
415
+ app.use(createAnswerRouter());
416
+ // /v1/deep-research — full or read only
417
+ app.use('/v1/deep-research', requireScope('full', 'read'));
418
+ app.use(createDeepResearchRouter());
419
+ app.use(createAskRouter());
420
+ // /v1/agent — full or read only (router uses relative paths)
421
+ app.use('/v1/agent', requireScope('full', 'read'), createAgentRouter());
422
+ // /v1/do — full only (router uses relative paths; admin-level operation)
423
+ app.use('/v1/do', requireScope('full'), createDoRouter());
424
+ app.use(createYouTubeRouter());
425
+ app.use(createTranscriptExportRouter());
426
+ app.use(createMcpRouter(authStore, pool));
427
+ // 404 handler
428
+ app.use((req, res) => {
429
+ res.status(404).json({
430
+ success: false,
431
+ error: {
432
+ type: 'not_found',
433
+ message: 'Not found',
434
+ docs: 'https://webpeel.dev/docs/api-reference',
435
+ },
436
+ requestId: req.requestId,
437
+ });
438
+ });
439
+ // Sentry error middleware should run before the generic error handler.
440
+ if (sentry.errorHandler) {
441
+ app.use(sentry.errorHandler);
442
+ }
443
+ // Global error response normalizer — ensures ALL errors use the same structured shape.
444
+ // Catches errors thrown via next(err) that may have a flat format {error: string, message: string}.
445
+ // Must run before the generic error handler below.
446
+ app.use((err, req, res, next) => {
447
+ // Skip if error is already in structured format (has error.type or error.message as object)
448
+ if (err && typeof err.error === 'object' && err.error !== null) {
449
+ return next(err);
450
+ }
451
+ // Skip standard Error objects (handled by the generic error handler with Playwright sanitization)
452
+ if (err instanceof Error && !err.hasOwnProperty('statusCode') && !err.hasOwnProperty('status')) {
453
+ return next(err);
454
+ }
455
+ const statusCode = (err && (err.statusCode || err.status)) || 500;
456
+ if (res.headersSent)
457
+ return next(err);
458
+ const requestId = req.requestId || req.headers['x-request-id'] || crypto.randomUUID();
459
+ res.status(statusCode).json({
460
+ success: false,
461
+ error: {
462
+ type: (err && (err.type || err.error)) || 'server_error',
463
+ message: (err && err.message) || 'An unexpected error occurred',
464
+ ...((err && err.hint) ? { hint: err.hint } : {}),
465
+ ...((err && err.docs) ? { docs: err.docs } : {}),
466
+ },
467
+ requestId,
468
+ });
469
+ });
470
+ // Error handler - SECURITY: sanitize errors in production to prevent leaking
471
+ // Playwright stack traces, internal paths, or other sensitive details.
472
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
473
+ app.use((err, req, res, _next) => {
474
+ log.error('Unhandled error', { message: err.message, stack: err.stack }); // Log full error server-side
475
+ if (res.headersSent)
476
+ return; // Avoid double-send crash
477
+ if (process.env.NODE_ENV === 'production') {
478
+ // Strip Playwright/browser launch errors and stack traces from responses
479
+ const sanitized = (err.message || 'An unexpected error occurred')
480
+ .replace(/browserType\.launch:.*$/s, 'Browser rendering unavailable on this server. Use the CLI with --render for browser-rendered content.')
481
+ .replace(/at\s+\S.*\n?/g, '') // strip "at <location>" stack lines
482
+ .trim() || 'An unexpected error occurred';
483
+ res.status(500).json({
484
+ success: false,
485
+ error: {
486
+ type: 'internal_error',
487
+ message: sanitized,
488
+ docs: 'https://webpeel.dev/docs/api-reference#errors',
489
+ },
490
+ requestId: req.requestId,
491
+ });
492
+ }
493
+ else {
494
+ res.status(500).json({
495
+ success: false,
496
+ error: {
497
+ type: 'internal_error',
498
+ message: err.message || 'An unexpected error occurred',
499
+ docs: 'https://webpeel.dev/docs/api-reference#errors',
500
+ },
501
+ requestId: req.requestId,
502
+ stack: err.stack,
503
+ });
504
+ }
505
+ });
506
+ return app;
507
+ }
508
+ export function startServer(config = {}) {
509
+ const app = createApp(config);
510
+ const port = config.port || parseInt(process.env.PORT || '3000', 10);
511
+ // Activate premium strategy hooks (SWR cache, domain intelligence, race).
512
+ registerPremiumHooks?.();
513
+ // Inject Redis into the domain extractor cache for cross-pod cache sharing.
514
+ // When REDIS_URL is set (multi-pod k8s deployments), all pods share one cache
515
+ // so the first pod to fetch a URL populates it for all others.
516
+ if (process.env.REDIS_URL) {
517
+ // @ts-ignore — ioredis CJS/ESM interop
518
+ import('ioredis').then((IoRedisModule) => {
519
+ const IoRedis = IoRedisModule.default ?? IoRedisModule;
520
+ const url = process.env.REDIS_URL;
521
+ const parsed = new URL(url);
522
+ const redis = new IoRedis({
523
+ host: parsed.hostname,
524
+ port: parseInt(parsed.port || '6379', 10),
525
+ db: parseInt(parsed.pathname?.slice(1) || '0', 10) || 0,
526
+ lazyConnect: true,
527
+ maxRetriesPerRequest: 3,
528
+ enableOfflineQueue: false,
529
+ });
530
+ setExtractorRedis?.(redis);
531
+ log.info('Redis extractor cache initialized (shared cross-pod cache active)');
532
+ }).catch((err) => {
533
+ log.warn('Failed to init Redis extractor cache (in-memory only)', { error: err.message });
534
+ });
535
+ }
536
+ // Pre-warm browser resources in the background to reduce first-request latency.
537
+ void warmup().catch((error) => {
538
+ log.warn('Browser warmup failed', { error: error instanceof Error ? error.message : String(error) });
539
+ });
540
+ // Build a dedicated pool for the cache warmer (separate from the app pool inside createApp)
541
+ const warmerPool = process.env.DATABASE_URL
542
+ ? new pg.Pool({
543
+ connectionString: process.env.DATABASE_URL,
544
+ ssl: process.env.NODE_ENV === 'production' ? { rejectUnauthorized: true } : false,
545
+ max: 2, // small pool — warmer only needs occasional queries
546
+ })
547
+ : null;
548
+ const server = app.listen(port, () => {
549
+ log.info(`WebPeel API server listening on port ${port}`);
550
+ log.info(`Health: http://localhost:${port}/health Fetch: /v1/fetch Search: /v1/search`);
551
+ // Start cache warmer only when opted-in
552
+ if (process.env.ENABLE_CACHE_WARM === 'true') {
553
+ log.info('Cache warming enabled (ENABLE_CACHE_WARM=true)');
554
+ startCacheWarmer(warmerPool);
555
+ }
556
+ });
557
+ // Graceful shutdown
558
+ const shutdown = async () => {
559
+ if (isShuttingDown)
560
+ return; // prevent double-shutdown
561
+ isShuttingDown = true;
562
+ log.info('SIGTERM received — draining active requests...');
563
+ // Stop accepting new connections
564
+ server.close(() => {
565
+ log.info('HTTP server closed (no new connections)');
566
+ });
567
+ // Wait for active requests (max 25s, leaving 5s buffer for cleanup)
568
+ const drainDeadline = Date.now() + 25_000;
569
+ while (activeRequests.size > 0 && Date.now() < drainDeadline) {
570
+ log.info(`Waiting for ${activeRequests.size} active requests to complete...`);
571
+ await new Promise(r => setTimeout(r, 500));
572
+ }
573
+ if (activeRequests.size > 0) {
574
+ log.warn(`Force-closing ${activeRequests.size} active requests`);
575
+ }
576
+ // Cleanup resources
577
+ try {
578
+ await cleanupFetcher();
579
+ // Close the warmer DB pool (app pool is internal to createApp)
580
+ if (warmerPool)
581
+ await warmerPool.end().catch(() => { });
582
+ }
583
+ catch (err) {
584
+ log.error('Error during cleanup', { error: err });
585
+ }
586
+ log.info('Clean shutdown complete');
587
+ process.exit(0);
588
+ };
589
+ // Force shutdown after 30s (K8s terminationGracePeriodSeconds should be 35s)
590
+ const forceShutdown = () => {
591
+ setTimeout(() => {
592
+ log.error('Forced shutdown after 30s timeout');
593
+ process.exit(1);
594
+ }, 30_000).unref();
595
+ };
596
+ process.on('SIGTERM', () => { forceShutdown(); void shutdown(); });
597
+ process.on('SIGINT', () => { forceShutdown(); void shutdown(); });
598
+ // Catch unhandled promise rejections (e.g. Playwright/rebrowser-patches
599
+ // throwing when a browser session closes mid-operation).
600
+ // Without this handler Node.js crashes the entire process.
601
+ process.on('unhandledRejection', (reason, promise) => {
602
+ const msg = reason instanceof Error ? reason.message : String(reason);
603
+ // Playwright/CDP session-closed errors are expected during high load — log but don't crash
604
+ if (msg.includes('cdpSession.send') ||
605
+ msg.includes('Target page, context or browser has been closed') ||
606
+ msg.includes('Protocol error') ||
607
+ msg.includes('session closed') ||
608
+ msg.includes('Target closed')) {
609
+ console.warn('[warn] Suppressed Playwright session-closed rejection:', msg.slice(0, 120));
610
+ return;
611
+ }
612
+ // Unknown rejections — log but still don't crash (pods are expensive to restart)
613
+ console.error('[error] Unhandled promise rejection:', reason, 'promise:', promise);
614
+ });
615
+ process.on('uncaughtException', (err) => {
616
+ const msg = err.message || String(err);
617
+ if (msg.includes('cdpSession.send') ||
618
+ msg.includes('Target page, context or browser has been closed') ||
619
+ msg.includes('Protocol error') ||
620
+ msg.includes('session closed')) {
621
+ console.warn('[warn] Suppressed Playwright uncaughtException:', msg.slice(0, 120));
622
+ return;
623
+ }
624
+ console.error('[error] Uncaught exception:', err);
625
+ // For genuinely unknown errors, exit so K8s can restart cleanly
626
+ process.exit(1);
627
+ });
628
+ }
629
+ // Start server if run directly
630
+ if (import.meta.url === `file://${process.argv[1]}`) {
631
+ startServer();
632
+ }
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Auth store abstraction for API key validation and usage tracking
3
+ * Designed to easily swap from in-memory to PostgreSQL
4
+ */
5
+ export interface ApiKeyInfo {
6
+ key: string;
7
+ tier: 'free' | 'starter' | 'pro' | 'enterprise' | 'max' | 'admin';
8
+ rateLimit: number;
9
+ accountId?: string;
10
+ createdAt: Date;
11
+ scope?: 'full' | 'read' | 'restricted';
12
+ }
13
+ export interface AuthStore {
14
+ validateKey(key: string): Promise<ApiKeyInfo | null>;
15
+ trackUsage(key: string, creditsOrType: number | 'basic' | 'stealth' | 'captcha' | 'search'): Promise<void>;
16
+ }
17
+ /**
18
+ * In-memory auth store for development and self-hosted deployments
19
+ */
20
+ export declare class InMemoryAuthStore implements AuthStore {
21
+ private keys;
22
+ private usage;
23
+ constructor();
24
+ validateKey(key: string): Promise<ApiKeyInfo | null>;
25
+ trackUsage(key: string, creditsOrType: number | 'basic' | 'stealth' | 'captcha' | 'search'): Promise<void>;
26
+ addKey(keyInfo: ApiKeyInfo): void;
27
+ getUsage(key: string): number;
28
+ }