@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,1274 @@
1
+ /**
2
+ * Fetch endpoint with caching
3
+ */
4
+ import { Router } from 'express';
5
+ import '../types.js'; // Augments Express.Request with requestId
6
+ import { peel } from '../../index.js';
7
+ import { proxyContextStorage, getProxyUsage } from '../../core/proxy-config.js';
8
+ import { normalizeActions } from '../../core/actions.js';
9
+ import { extractInlineJson } from '../../core/extract-inline.js';
10
+ import { LRUCache } from 'lru-cache';
11
+ import { validateUrlForSSRF, SSRFError } from '../middleware/url-validator.js';
12
+ import { wantsEnvelope, successResponse } from '../utils/response.js';
13
+ import { getSchemaTemplate } from '../../core/schema-templates.js';
14
+ import { quickAnswer } from '../../core/quick-answer.js';
15
+ import { sendUsageAlertEmail, checkAndSendDualAlert } from '../email-service.js';
16
+ import { extractLinks } from '../../core/links.js';
17
+ // ── Method cost table — exposed in response headers + body ────────────────────
18
+ const methodCosts = {
19
+ 'simple': '0.0002', // $0.0002 per page (basic HTTP)
20
+ 'domain-api': '0.0002', // same as simple (we use the site's API)
21
+ 'stealth': '0.005', // $0.005 per page (headless browser)
22
+ 'browser': '0.005', // same as stealth
23
+ 'captcha': '0.02', // $0.02 per page (anti-bot bypass)
24
+ };
25
+ // ── Helper: classify an error thrown by peel() into a FetchErrorType ─────────
26
+ function classifyFetchError(err) {
27
+ const code = err.code || err.name || '';
28
+ const msg = (err.message || '').toLowerCase();
29
+ if (code === 'TIMEOUT' || msg.includes('timeout') || msg.includes('timed out')) {
30
+ return 'timeout';
31
+ }
32
+ if (code === 'BLOCKED' || msg.includes('blocked') || msg.includes('cloudflare challenge') || msg.includes('captcha') || msg.includes('bot detection')) {
33
+ return 'blocked';
34
+ }
35
+ if (msg.includes('http 404') || msg.includes('not found') || msg.includes('dns resolution failed') || msg.includes('enotfound') || msg.includes('getaddrinfo')) {
36
+ return 'not_found';
37
+ }
38
+ if (msg.match(/http\s+5\d{2}/) || msg.includes('server error') || msg.includes('internal server')) {
39
+ return 'server_error';
40
+ }
41
+ if (code === 'NETWORK' || msg.includes('network') || msg.includes('econnrefused') || msg.includes('connection refused') || msg.includes('connection reset')) {
42
+ return 'network';
43
+ }
44
+ return 'unknown';
45
+ }
46
+ // ── Helper: build a clean, user-facing error message from a peel() error ─────
47
+ function buildFetchErrorMessage(err) {
48
+ const type = classifyFetchError(err);
49
+ const hints = {
50
+ timeout: 'Try increasing timeout with ?timeout=20000, or use render=true for JS-heavy sites.',
51
+ blocked: 'Site blocked the request. Try adding render=true or stealth mode.',
52
+ not_found: 'Verify the URL is correct and the site is accessible.',
53
+ server_error: 'The target site returned a server error. Try again later.',
54
+ network: 'Could not connect to the target URL. Verify the URL is correct and the site is online.',
55
+ unknown: undefined,
56
+ };
57
+ const docsLinks = {
58
+ timeout: 'https://webpeel.dev/docs/errors#timeout',
59
+ blocked: 'https://webpeel.dev/docs/errors#blocked',
60
+ not_found: 'https://webpeel.dev/docs/errors#not-found',
61
+ server_error: 'https://webpeel.dev/docs/errors#server-error',
62
+ network: 'https://webpeel.dev/docs/errors#network',
63
+ unknown: 'https://webpeel.dev/docs/errors',
64
+ };
65
+ // Sanitize message: strip HTML chars, truncate
66
+ const safeMsg = (err.message || 'An unexpected error occurred while fetching the URL')
67
+ .replace(/[<>"']/g, '')
68
+ .trim();
69
+ const messages = {
70
+ timeout: `Request timed out. Try increasing the timeout parameter or use render=true for JavaScript-heavy sites.`,
71
+ blocked: `Site blocked the request. Try adding render=true or stealth mode to bypass bot protection.`,
72
+ not_found: `The URL could not be reached — the domain may not exist or the page was not found.`,
73
+ server_error: `The target website returned a server error while processing the request.`,
74
+ network: `Could not reach this website. The server may be down or the URL may be incorrect.`,
75
+ unknown: safeMsg,
76
+ };
77
+ return { type, message: messages[type] || safeMsg, hint: hints[type], docs: docsLinks[type] };
78
+ }
79
+ // ── Helper: extractive summarizer (TF-IDF-like sentence scoring) ─────────────
80
+ function extractSummary(content, maxWords = 150) {
81
+ if (!content)
82
+ return '';
83
+ const sentences = content
84
+ .split(/(?<=[.!?])\s+/)
85
+ .map(s => s.trim())
86
+ .filter(s => s.length > 40 && s.length < 600);
87
+ if (sentences.length === 0) {
88
+ const words = content.split(/\s+/);
89
+ return words.slice(0, maxWords).join(' ') + (words.length > maxWords ? '\u2026' : '');
90
+ }
91
+ if (sentences.length <= 3)
92
+ return sentences.join(' ');
93
+ const allWords = content.toLowerCase().split(/\W+/).filter(w => w.length > 3);
94
+ const wordFreq = {};
95
+ for (const w of allWords)
96
+ wordFreq[w] = (wordFreq[w] || 0) + 1;
97
+ const maxFreq = Math.max(1, ...Object.values(wordFreq));
98
+ const scored = sentences.map((sentence, idx) => {
99
+ const words = sentence.toLowerCase().split(/\W+/).filter(w => w.length > 3);
100
+ const score = words.reduce((sum, w) => sum + (wordFreq[w] || 0) / maxFreq, 0) / Math.max(1, words.length);
101
+ const posBonus = idx === 0 ? 0.3 : idx === sentences.length - 1 ? 0.1 : 0;
102
+ return { sentence, score: score + posBonus, idx };
103
+ });
104
+ scored.sort((a, b) => b.score - a.score);
105
+ const selected = [];
106
+ let wc = 0;
107
+ for (const item of scored) {
108
+ const itemWc = item.sentence.split(/\s+/).length;
109
+ if (wc + itemWc > maxWords * 1.3)
110
+ break;
111
+ selected.push(item);
112
+ wc += itemWc;
113
+ if (selected.length >= 5)
114
+ break;
115
+ }
116
+ selected.sort((a, b) => a.idx - b.idx);
117
+ return selected.map(s => s.sentence).join(' ');
118
+ }
119
+ // ── Helper: check usage and determine if alert email should be sent ───────────
120
+ async function checkAndTriggerAlert(pgStore, userId) {
121
+ const getCurrentWeek = () => {
122
+ const now = new Date();
123
+ const year = now.getUTCFullYear();
124
+ const jan4 = new Date(Date.UTC(year, 0, 4));
125
+ const weekNum = Math.ceil(((now.getTime() - jan4.getTime()) / 86400000 + jan4.getUTCDay() + 1) / 7);
126
+ return `${year}-W${String(weekNum).padStart(2, '0')}`;
127
+ };
128
+ const currentWeek = getCurrentWeek();
129
+ const result = await pgStore.pool.query(`SELECT u.email, u.name, u.tier, u.alert_threshold, u.alert_email, u.alert_sent_at,
130
+ u.weekly_limit,
131
+ COALESCE(SUM(wu.total_count), 0) AS total_used,
132
+ u.weekly_limit + COALESCE(MAX(wu.rollover_credits), 0) AS total_available
133
+ FROM users u
134
+ LEFT JOIN api_keys ak ON ak.user_id = u.id
135
+ LEFT JOIN weekly_usage wu ON wu.api_key_id = ak.id AND wu.week = $2
136
+ WHERE u.id = $1
137
+ GROUP BY u.id, u.email, u.name, u.tier, u.alert_threshold, u.alert_email, u.alert_sent_at, u.weekly_limit`, [userId, currentWeek]);
138
+ const row = result.rows[0];
139
+ if (!row || !row.alert_threshold)
140
+ return { shouldSendAlert: false };
141
+ const used = parseInt(row.total_used, 10) || 0;
142
+ const total = parseInt(row.total_available, 10) || row.weekly_limit || 999;
143
+ const usagePercent = total > 0 ? Math.round((used / total) * 100) : 0;
144
+ // Only alert if: crosses threshold AND haven't sent alert this week
145
+ const lastAlert = row.alert_sent_at ? new Date(row.alert_sent_at) : null;
146
+ const oneWeekAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000);
147
+ const alreadySentThisWeek = lastAlert !== null && lastAlert > oneWeekAgo;
148
+ return {
149
+ shouldSendAlert: usagePercent >= row.alert_threshold && !alreadySentThisWeek,
150
+ usagePercent,
151
+ used,
152
+ total,
153
+ userEmail: row.email,
154
+ userName: row.name || undefined,
155
+ userTier: row.tier,
156
+ alertEmail: row.alert_email || undefined,
157
+ };
158
+ }
159
+ const VALID_LLM_PROVIDERS = ['openai', 'anthropic', 'google'];
160
+ export function createFetchRouter(authStore) {
161
+ const router = Router();
162
+ // LRU cache: 5 minute TTL, max 500 entries, 100MB total size
163
+ const cache = new LRUCache({
164
+ max: 500,
165
+ ttl: 5 * 60 * 1000, // 5 minutes default
166
+ maxSize: 100 * 1024 * 1024, // 100MB
167
+ sizeCalculation: (entry) => {
168
+ return JSON.stringify(entry).length;
169
+ },
170
+ });
171
+ router.get('/v1/fetch', async (req, res) => {
172
+ try {
173
+ // Require authentication — API key or JWT session
174
+ const userId = req.auth?.keyInfo?.accountId || req.user?.userId;
175
+ if (!userId) {
176
+ res.status(401).json({
177
+ success: false,
178
+ error: {
179
+ type: 'unauthorized',
180
+ message: 'API key required. Get one free at https://app.webpeel.dev/keys',
181
+ hint: 'Get a free API key at https://app.webpeel.dev/keys',
182
+ docs: 'https://webpeel.dev/docs/errors#unauthorized',
183
+ },
184
+ requestId: req.requestId,
185
+ });
186
+ return;
187
+ }
188
+ const { url, render, wait, format, includeTags, excludeTags, images, location, languages, onlyMainContent, actions, maxAge, storeInCache, stream, noCache, cacheTtl, budget, question, summary, readable, stealth, screenshot, maxTokens, selector, exclude, fullPage, raw, noDomainApi, lite, timeout, schema, detail, captionImages, highlightQuery, highlightMaxChars, } = req.query;
189
+ const detailMode = detail || 'standard';
190
+ // Validate URL parameter
191
+ if (!url || typeof url !== 'string') {
192
+ res.status(400).json({
193
+ success: false,
194
+ error: {
195
+ type: 'invalid_request',
196
+ message: 'Missing or invalid "url" parameter.',
197
+ hint: 'Pass a URL as a query parameter: GET /v1/fetch?url=https://example.com',
198
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
199
+ },
200
+ requestId: req.requestId,
201
+ });
202
+ return;
203
+ }
204
+ // SECURITY: Validate URL format and length
205
+ if (url.length > 2048) {
206
+ res.status(400).json({
207
+ success: false,
208
+ error: {
209
+ type: 'invalid_url',
210
+ message: 'URL too long (max 2048 characters)',
211
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
212
+ },
213
+ requestId: req.requestId,
214
+ });
215
+ return;
216
+ }
217
+ try {
218
+ const parsed = new URL(url);
219
+ // Normalize URL for consistent caching
220
+ const normalizedUrl = parsed.href;
221
+ // Use normalized URL for cache key
222
+ if (normalizedUrl !== url) {
223
+ // URL was normalized, update for caching
224
+ }
225
+ }
226
+ catch {
227
+ res.status(400).json({
228
+ success: false,
229
+ error: {
230
+ type: 'invalid_url',
231
+ message: 'Invalid URL format',
232
+ hint: 'Ensure the URL includes a scheme (https://) and a valid hostname',
233
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
234
+ },
235
+ requestId: req.requestId,
236
+ });
237
+ return;
238
+ }
239
+ // SECURITY: Validate URL to prevent SSRF attacks
240
+ try {
241
+ validateUrlForSSRF(url);
242
+ }
243
+ catch (error) {
244
+ if (error instanceof SSRFError) {
245
+ res.status(400).json({
246
+ success: false,
247
+ error: {
248
+ type: 'forbidden_url',
249
+ message: 'This URL is blocked for security. Localhost, private networks, and non-HTTP URLs are not allowed.',
250
+ hint: 'See docs for allowed URL formats.',
251
+ docs: 'https://webpeel.dev/docs/errors#forbidden-url',
252
+ },
253
+ requestId: req.requestId,
254
+ });
255
+ return;
256
+ }
257
+ throw error;
258
+ }
259
+ // Parse actions query param (JSON-encoded array)
260
+ let parsedActions;
261
+ if (actions && typeof actions === 'string') {
262
+ try {
263
+ const raw = JSON.parse(actions);
264
+ parsedActions = normalizeActions(raw);
265
+ }
266
+ catch (e) {
267
+ res.status(400).json({
268
+ success: false,
269
+ error: {
270
+ type: 'invalid_request',
271
+ message: 'Invalid "actions" parameter: must be a valid JSON array',
272
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
273
+ },
274
+ requestId: req.requestId,
275
+ });
276
+ return;
277
+ }
278
+ }
279
+ // Build cache key (include new parameters)
280
+ const actionsKey = parsedActions ? JSON.stringify(parsedActions) : '';
281
+ const cacheKey = `fetch:${url}:${render}:${wait}:${format}:${includeTags}:${excludeTags}:${images}:${location}:${languages}:${onlyMainContent}:${stream}:${actionsKey}:${budget}:${question}:${summary}:${readable}:${stealth}:${screenshot}:${maxTokens}:${selector}:${exclude}:${fullPage}:${raw}:${highlightQuery || ''}:${highlightMaxChars || ''}`;
282
+ // Cache bypass: ?noCache=true or Cache-Control: no-cache header
283
+ const bypassCache = noCache === 'true' || req.headers['cache-control'] === 'no-cache';
284
+ // Per-request TTL (cacheTtl in seconds, default 300s = 5 min)
285
+ const cacheTtlMs = cacheTtl !== undefined
286
+ ? parseInt(cacheTtl, 10) * 1000
287
+ : 5 * 60 * 1000;
288
+ // Check cache (with maxAge support)
289
+ const maxAgeMs = maxAge !== undefined ? parseInt(maxAge, 10) : 172800000; // Default 2 days
290
+ if (!bypassCache) {
291
+ const cached = cache.get(cacheKey);
292
+ if (cached && maxAgeMs > 0) {
293
+ const cacheAge = Date.now() - cached.timestamp;
294
+ if (cacheAge < maxAgeMs && cacheAge < cacheTtlMs) {
295
+ res.setHeader('X-Cache', 'HIT');
296
+ res.setHeader('X-Cache-Status', 'HIT');
297
+ res.setHeader('X-Cache-Age', Math.floor(cacheAge / 1000).toString());
298
+ // Cache-Control: allow Cloudflare edge to cache successful GET responses
299
+ res.setHeader('Cache-Control', 'public, s-maxage=60, stale-while-revalidate=300');
300
+ if (wantsEnvelope(req)) {
301
+ successResponse(res, cached.result, {
302
+ requestId: req.requestId,
303
+ cached: true,
304
+ });
305
+ }
306
+ else {
307
+ res.json(cached.result);
308
+ }
309
+ return;
310
+ }
311
+ }
312
+ }
313
+ // Parse options
314
+ const isSoftLimited = req.auth?.softLimited === true;
315
+ const hasExtraUsage = req.auth?.extraUsageAvailable === true;
316
+ // Parse tag arrays from comma-separated strings
317
+ const includeTagsArray = includeTags
318
+ ? includeTags.split(',').map(t => t.trim()).filter(Boolean)
319
+ : undefined;
320
+ const excludeTagsArray = excludeTags
321
+ ? excludeTags.split(',').map(t => t.trim()).filter(Boolean)
322
+ : undefined;
323
+ const languagesArray = languages
324
+ ? languages.split(',').map(l => l.trim()).filter(Boolean)
325
+ : undefined;
326
+ // onlyMainContent is a shortcut for common include tags
327
+ const finalIncludeTags = onlyMainContent === 'true'
328
+ ? ['main', 'article', '.content', '#content']
329
+ : includeTagsArray;
330
+ // When actions are present, force browser mode (skip HTTP fast path)
331
+ const hasActions = parsedActions && parsedActions.length > 0;
332
+ const shouldRender = hasActions || render === 'true';
333
+ const options = {
334
+ // SOFT LIMIT: When over quota AND no extra usage, force HTTP-only
335
+ // If extra usage is available, allow full functionality
336
+ // Exception: actions always require render
337
+ render: (isSoftLimited && !hasExtraUsage && !hasActions) ? false : shouldRender,
338
+ wait: (isSoftLimited && !hasExtraUsage) ? 0 : (wait ? parseInt(wait, 10) : undefined),
339
+ format: format || 'markdown',
340
+ stream: stream === 'true',
341
+ includeTags: finalIncludeTags,
342
+ excludeTags: excludeTagsArray,
343
+ images: images === 'true',
344
+ actions: parsedActions,
345
+ location: location || languagesArray ? {
346
+ country: location,
347
+ languages: languagesArray,
348
+ } : undefined,
349
+ budget: budget ? parseInt(budget, 10) : undefined,
350
+ question: question,
351
+ readable: readable === 'true',
352
+ stealth: (isSoftLimited && !hasExtraUsage) ? false : stealth === 'true',
353
+ screenshot: (isSoftLimited && !hasExtraUsage) ? false : screenshot === 'true',
354
+ maxTokens: maxTokens ? parseInt(maxTokens, 10) : undefined,
355
+ selector: selector,
356
+ exclude: exclude ? exclude.split(',').map(s => s.trim()).filter(Boolean) : undefined,
357
+ fullPage: fullPage === 'true',
358
+ raw: raw === 'true',
359
+ noDomainApi: noDomainApi === 'true',
360
+ lite: lite === 'true',
361
+ timeout: timeout ? parseInt(timeout, 10) : undefined,
362
+ captionImages: captionImages === 'true',
363
+ highlightQuery: highlightQuery,
364
+ highlightMaxChars: highlightMaxChars ? parseInt(highlightMaxChars, 10) : undefined,
365
+ // Prevent auto-escalation to browser unless render=true is explicitly requested.
366
+ // On 512MB containers, surprise browser launches cause OOM kills.
367
+ // Domain extractors (GitHub, Wikipedia, npm etc.) use HTTP APIs, not the browser.
368
+ noEscalate: !shouldRender,
369
+ };
370
+ // Auto-budget: default to 4000 tokens for API requests when no budget specified
371
+ // Opt-out: budget=0 explicitly disables. Lite mode disables auto-budget.
372
+ if (options.budget === undefined && !options.lite) {
373
+ options.budget = 4000;
374
+ res.setHeader('X-Auto-Budget', '4000');
375
+ }
376
+ // Inform the user if their request was degraded
377
+ if (isSoftLimited && !hasExtraUsage && render === 'true' && !hasActions) {
378
+ res.setHeader('X-Degraded', 'render=true downgraded to HTTP-only (quota exceeded)');
379
+ }
380
+ if (isSoftLimited && !hasExtraUsage && stealth === 'true') {
381
+ res.setHeader('X-Degraded', 'stealth=true downgraded (quota exceeded)');
382
+ }
383
+ if (isSoftLimited && !hasExtraUsage && screenshot === 'true') {
384
+ res.setHeader('X-Degraded', 'screenshot=true downgraded (quota exceeded)');
385
+ }
386
+ // Validate wait parameter
387
+ if (options.wait !== undefined && (isNaN(options.wait) || options.wait < 0 || options.wait > 60000)) {
388
+ res.status(400).json({
389
+ success: false,
390
+ error: {
391
+ type: 'invalid_request',
392
+ message: 'Invalid "wait" parameter: must be between 0 and 60000ms',
393
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
394
+ },
395
+ requestId: req.requestId,
396
+ });
397
+ return;
398
+ }
399
+ // Validate format parameter
400
+ if (!['markdown', 'text', 'html', 'clean'].includes(options.format || '')) {
401
+ res.status(400).json({
402
+ success: false,
403
+ error: {
404
+ type: 'invalid_request',
405
+ message: 'Invalid "format" parameter: must be "markdown", "text", "html", or "clean"',
406
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
407
+ },
408
+ requestId: req.requestId,
409
+ });
410
+ return;
411
+ }
412
+ const shouldStream = options.stream === true;
413
+ if (shouldStream) {
414
+ res.setHeader('X-Stream', 'true');
415
+ if (typeof res.flushHeaders === 'function') {
416
+ res.flushHeaders();
417
+ }
418
+ }
419
+ // Fetch content — wrap in proxy context so tier limits are enforced automatically
420
+ const startTime = Date.now();
421
+ const userTier = req.auth?.tier || req.auth?.keyInfo?.tier || 'free';
422
+ const result = await proxyContextStorage.run({ userId: userId ?? undefined, tier: userTier }, () => peel(url, options));
423
+ const elapsed = Date.now() - startTime;
424
+ // --- BM25 Schema Template Extraction (GET, no LLM needed) ---
425
+ if (schema && typeof schema === 'string' && result.content) {
426
+ const template = getSchemaTemplate(schema);
427
+ if (template) {
428
+ const { quickAnswer } = await import('../../core/quick-answer.js');
429
+ const { smartExtractSchemaFields } = await import('../../core/schema-postprocess.js');
430
+ const extracted = smartExtractSchemaFields(result.content, template.fields, quickAnswer, {
431
+ pageTitle: result.title,
432
+ pageUrl: result.url,
433
+ metadata: result.metadata,
434
+ });
435
+ result.extracted = extracted;
436
+ }
437
+ else {
438
+ // --- Custom JSON Schema → BM25 extraction (no LLM needed) ---
439
+ // Supports: {"price": "number", "title": "string"} or {"price": "What is the price?"}
440
+ let customFields = null;
441
+ try {
442
+ const parsed = JSON.parse(schema);
443
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
444
+ customFields = {};
445
+ for (const [key, typeOrQuestion] of Object.entries(parsed)) {
446
+ const val = String(typeOrQuestion);
447
+ // If the value looks like a question (has "?" or is >20 chars), use it directly
448
+ if (val.includes('?') || val.length > 20) {
449
+ customFields[key] = val;
450
+ }
451
+ else {
452
+ // Convert type name to a natural question
453
+ const typeStr = val.toLowerCase();
454
+ const humanKey = key.replace(/([A-Z])/g, ' $1').toLowerCase().replace(/_/g, ' ');
455
+ if (typeStr === 'number' || typeStr === 'float' || typeStr === 'integer') {
456
+ customFields[key] = `What is the ${humanKey}? (as a number)`;
457
+ }
458
+ else if (typeStr === 'boolean') {
459
+ customFields[key] = `Is the ${humanKey} true or false?`;
460
+ }
461
+ else if (typeStr.includes('[]') || typeStr === 'array') {
462
+ customFields[key] = `What are the ${humanKey}? List all of them.`;
463
+ }
464
+ else {
465
+ customFields[key] = `What is the ${humanKey}?`;
466
+ }
467
+ }
468
+ }
469
+ }
470
+ }
471
+ catch { /* not valid JSON, ignore */ }
472
+ if (customFields && Object.keys(customFields).length > 0) {
473
+ const { quickAnswer } = await import('../../core/quick-answer.js');
474
+ const { smartExtractSchemaFields } = await import('../../core/schema-postprocess.js');
475
+ const extracted = smartExtractSchemaFields(result.content, customFields, quickAnswer, {
476
+ pageTitle: result.title,
477
+ pageUrl: result.url,
478
+ metadata: result.metadata,
479
+ });
480
+ result.extracted = extracted;
481
+ }
482
+ }
483
+ }
484
+ // Determine fetch type from the result method
485
+ const fetchType = result.method === 'stealth' ? 'stealth' :
486
+ result.method === 'browser' ? 'stealth' : 'basic';
487
+ // Log request to database (PostgreSQL only)
488
+ const pgStore = authStore;
489
+ // Log usage for BOTH API key auth AND JWT session auth
490
+ const logUserId = req.auth?.keyInfo?.accountId || req.user?.userId;
491
+ if (logUserId && typeof pgStore.pool !== 'undefined') {
492
+ pgStore.pool.query(`INSERT INTO usage_logs
493
+ (user_id, endpoint, url, method, processing_time_ms, status_code, ip_address, user_agent, tokens_used)
494
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`, [
495
+ logUserId,
496
+ 'fetch',
497
+ url,
498
+ fetchType,
499
+ elapsed,
500
+ 200,
501
+ req.ip || req.socket.remoteAddress,
502
+ req.get('user-agent'),
503
+ result?.tokens || null,
504
+ ]).catch((err) => {
505
+ console.error('Failed to log request to usage_logs:', err);
506
+ });
507
+ }
508
+ // Track usage (check for trackBurstUsage method to detect PostgresAuthStore)
509
+ if (req.auth?.keyInfo?.key && typeof pgStore.trackBurstUsage === 'function') {
510
+ // Track burst usage (always)
511
+ await pgStore.trackBurstUsage(req.auth.keyInfo.key);
512
+ // If soft-limited with extra usage available, charge to extra usage
513
+ if (isSoftLimited && hasExtraUsage) {
514
+ const extraResult = await pgStore.trackExtraUsage(req.auth.keyInfo.key, fetchType, url, elapsed, 200 // PeelResult doesn't include statusCode, assume success
515
+ );
516
+ if (extraResult.success) {
517
+ res.setHeader('X-Extra-Usage-Charged', `$${extraResult.cost.toFixed(4)}`);
518
+ res.setHeader('X-Extra-Usage-New-Balance', extraResult.newBalance.toFixed(2));
519
+ }
520
+ else {
521
+ // Extra usage failed - fall back to soft limit
522
+ res.setHeader('X-Degraded', 'Extra usage insufficient, degraded to soft limit');
523
+ }
524
+ }
525
+ else if (!isSoftLimited) {
526
+ // Normal weekly usage tracking
527
+ await pgStore.trackUsage(req.auth.keyInfo.key, fetchType);
528
+ }
529
+ // If soft-limited WITHOUT extra usage, don't track (already over quota)
530
+ }
531
+ // Check usage alert (fire-and-forget, never block the response)
532
+ if (req.auth?.keyInfo?.accountId && typeof pgStore.pool !== 'undefined') {
533
+ // Automatic dual-threshold alerts (80% and 90%)
534
+ checkAndSendDualAlert(pgStore.pool, req.auth.keyInfo.accountId).catch(() => { });
535
+ // Legacy: user-configured single-threshold alert
536
+ try {
537
+ const alertResult = await checkAndTriggerAlert(pgStore, req.auth.keyInfo.accountId);
538
+ if (alertResult.shouldSendAlert && alertResult.usagePercent !== undefined) {
539
+ await sendUsageAlertEmail({
540
+ toEmail: alertResult.alertEmail || alertResult.userEmail,
541
+ userName: alertResult.userName,
542
+ usagePercent: alertResult.usagePercent,
543
+ used: alertResult.used,
544
+ total: alertResult.total,
545
+ tier: alertResult.userTier,
546
+ });
547
+ // Mark alert as sent so we don't spam (rate-limited to once/week)
548
+ await pgStore.pool.query('UPDATE users SET alert_sent_at = NOW() WHERE id = $1', [req.auth.keyInfo.accountId]);
549
+ }
550
+ }
551
+ catch (alertErr) {
552
+ // Never let alert errors affect the main response
553
+ console.warn('[alert] Failed to check/send alert:', alertErr);
554
+ }
555
+ }
556
+ // Cache result (unless storeInCache is explicitly false or cache bypass requested)
557
+ if (storeInCache !== 'false' && !bypassCache) {
558
+ cache.set(cacheKey, {
559
+ result,
560
+ timestamp: Date.now(),
561
+ }, { ttl: cacheTtlMs });
562
+ }
563
+ // Apply ?detail=brief mode: truncate content and prepend TL;DR
564
+ if (detailMode === 'brief' && result.content) {
565
+ const words = result.content.split(/\s+/);
566
+ const truncatedWords = words.slice(0, 500);
567
+ const truncated = truncatedWords.join(' ');
568
+ // Extract TL;DR from first non-empty paragraph
569
+ const firstPara = result.content
570
+ .split(/\n{2,}/)
571
+ .map((p) => p.replace(/^#+\s*/, '').trim())
572
+ .find((p) => p.length > 40 && !p.startsWith('!') && !p.startsWith('['));
573
+ const tldr = firstPara
574
+ ? firstPara.replace(/\s+/g, ' ').slice(0, 300) + (firstPara.length > 300 ? '...' : '')
575
+ : truncated.slice(0, 200) + '...';
576
+ result.content = `**TL;DR:** ${tldr}\n\n---\n\n${truncated}${words.length > 500 ? '\n\n*[Content truncated — use ?detail=full for complete output]*' : ''}`;
577
+ const tokenEstimate = Math.round(truncatedWords.length * 0.75);
578
+ res.setHeader('X-Detail-Mode', 'brief');
579
+ res.setHeader('X-Token-Estimate', tokenEstimate.toString());
580
+ }
581
+ // --- question → answer field (GET) ---
582
+ // When ?question= is provided, run quickAnswer() on the fetched content
583
+ // and expose the result as an `answer` field in the response.
584
+ const getAnswerResult = (question && typeof question === 'string' && result.content)
585
+ ? quickAnswer({ question, content: result.content, url: result.url })
586
+ : undefined;
587
+ // --- summary field (GET) ---
588
+ // When ?summary=true, return a truncated 500-word summary in a `summary` field.
589
+ const getSummaryText = (summary === 'true' && result.content)
590
+ ? extractSummary(result.content)
591
+ : undefined;
592
+ // Add usage headers (kept for backward compat; also surfaced in envelope metadata)
593
+ res.setHeader('X-Cache', 'MISS');
594
+ res.setHeader('X-Cache-Status', 'MISS');
595
+ res.setHeader('X-Credits-Used', '1');
596
+ res.setHeader('X-Processing-Time', elapsed.toString());
597
+ res.setHeader('X-Fetch-Type', fetchType);
598
+ // Proxy bandwidth usage headers — let users track their proxy consumption
599
+ if (userId) {
600
+ const proxyStats = getProxyUsage(userId, userTier);
601
+ res.setHeader('X-Proxy-Bytes-Used', proxyStats.used.toString());
602
+ res.setHeader('X-Proxy-Bytes-Remaining', proxyStats.remaining === -1 ? 'unlimited' : proxyStats.remaining.toString());
603
+ res.setHeader('X-Proxy-Limit', proxyStats.limit === -1 ? 'unlimited' : proxyStats.limit.toString());
604
+ }
605
+ // Method + cost headers — customers can see what tier they're using
606
+ res.setHeader('X-Method', result.method || 'simple');
607
+ res.setHeader('X-Method-Cost', methodCosts[result.method || 'simple'] ?? '0.005');
608
+ res.setHeader('X-Tokens', String(result.tokens || 0));
609
+ // Cache-Control: allow Cloudflare edge to cache successful GET responses for 60s
610
+ res.setHeader('Cache-Control', 'public, s-maxage=60, stale-while-revalidate=300');
611
+ // Response timing headers — let customers see exactly where time is spent
612
+ const timingFetch = result.timing?.fetch ?? 0;
613
+ const timingParse = (result.timing?.convert ?? 0) + (result.timing?.metadata ?? 0) + (result.timing?.prune ?? 0);
614
+ res.setHeader('X-Response-Time', `${elapsed}ms`);
615
+ res.setHeader('X-Fetch-Time', `${timingFetch}ms`);
616
+ res.setHeader('X-Parse-Time', `${timingParse}ms`);
617
+ res.setHeader('Server-Timing', `fetch;dur=${timingFetch}, parse;dur=${timingParse}, total;dur=${elapsed}`);
618
+ // Build response — extend result with optional answer/summary fields
619
+ const getResponseBody = { ...result };
620
+ // Ensure method + cost are always present in body
621
+ if (!getResponseBody.method)
622
+ getResponseBody.method = result.method || 'simple';
623
+ getResponseBody.cost = methodCosts[result.method || 'simple'] ?? '0.005';
624
+ if (getAnswerResult !== undefined)
625
+ getResponseBody.answer = getAnswerResult.answer;
626
+ if (getSummaryText !== undefined)
627
+ getResponseBody.summary = getSummaryText;
628
+ if (wantsEnvelope(req)) {
629
+ successResponse(res, getResponseBody, {
630
+ requestId: req.requestId,
631
+ processingTimeMs: elapsed,
632
+ creditsUsed: 1,
633
+ cached: false,
634
+ fetchType,
635
+ });
636
+ }
637
+ else {
638
+ res.json(getResponseBody);
639
+ }
640
+ }
641
+ catch (error) {
642
+ const err = error;
643
+ // Log error to database (PostgreSQL only)
644
+ const pgStore = authStore;
645
+ if (req.auth?.keyInfo?.accountId && typeof pgStore.pool !== 'undefined') {
646
+ const url = req.query.url;
647
+ const render = req.query.render === 'true';
648
+ const fetchType = render ? 'stealth' : 'basic';
649
+ pgStore.pool.query(`INSERT INTO usage_logs
650
+ (user_id, endpoint, url, method, status_code, error, ip_address, user_agent, tokens_used)
651
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`, [
652
+ req.auth.keyInfo.accountId,
653
+ 'fetch',
654
+ url,
655
+ fetchType,
656
+ 500,
657
+ err.message || 'Unknown error',
658
+ req.ip || req.socket.remoteAddress,
659
+ req.get('user-agent'),
660
+ null,
661
+ ]).catch((logErr) => {
662
+ console.error('Failed to log error to usage_logs:', logErr);
663
+ });
664
+ }
665
+ // SECURITY: Sanitize error messages to prevent information disclosure
666
+ if (res.headersSent)
667
+ return; // Timeout middleware already responded
668
+ const requestUrl = req.query.url;
669
+ if (err.code || err.name === 'TimeoutError' || err.name === 'BlockedError' || err.name === 'NetworkError' || err.name === 'WebPeelError') {
670
+ // WebPeelError from core library - safe to expose with helpful context
671
+ const { type, message, hint, docs } = buildFetchErrorMessage(err);
672
+ const statusCode = type === 'timeout' ? 504
673
+ : type === 'blocked' ? 403
674
+ : type === 'not_found' ? 404
675
+ : type === 'network' || type === 'server_error' ? 502
676
+ : 500;
677
+ res.status(statusCode).json({
678
+ success: false,
679
+ error: {
680
+ type,
681
+ message,
682
+ url: requestUrl,
683
+ ...(hint ? { hint } : {}),
684
+ docs: docs || 'https://webpeel.dev/docs/errors',
685
+ },
686
+ requestId: req.requestId,
687
+ });
688
+ }
689
+ else {
690
+ // Unexpected error - generic message only
691
+ console.error('Fetch error:', err); // Log full error server-side
692
+ res.status(500).json({
693
+ success: false,
694
+ error: {
695
+ type: 'unknown',
696
+ message: 'An unexpected error occurred while fetching the URL. If this persists, check https://webpeel.dev/status',
697
+ url: requestUrl,
698
+ docs: 'https://webpeel.dev/docs/errors',
699
+ },
700
+ requestId: req.requestId,
701
+ });
702
+ }
703
+ }
704
+ });
705
+ // -----------------------------------------------------------------------
706
+ // POST /v1/fetch — same as GET but accepts JSON body with extract param
707
+ // POST /v2/scrape — alias with identical behaviour
708
+ // -----------------------------------------------------------------------
709
+ async function handlePostFetch(req, res) {
710
+ try {
711
+ // Require authentication — API key or JWT session
712
+ const postUserId = req.auth?.keyInfo?.accountId || req.user?.userId;
713
+ if (!postUserId) {
714
+ res.status(401).json({
715
+ success: false,
716
+ error: {
717
+ type: 'unauthorized',
718
+ message: 'API key required. Get one free at https://app.webpeel.dev/keys',
719
+ hint: 'Get a free API key at https://app.webpeel.dev/keys',
720
+ docs: 'https://webpeel.dev/docs/errors#unauthorized',
721
+ },
722
+ requestId: req.requestId,
723
+ });
724
+ return;
725
+ }
726
+ const { url, render, wait, format, includeTags, excludeTags, images, location, languages, onlyMainContent, actions: rawActions, storeInCache: storeFlag,
727
+ // Cache control
728
+ noCache: noCacheBody, cacheTtl: cacheTtlBody,
729
+ // Inline extraction (BYOK)
730
+ extract, llmProvider, llmApiKey, llmModel,
731
+ // Firecrawl-compatible formats array
732
+ formats, stream,
733
+ // Extended peel options
734
+ budget, question, summary: summaryParam, readable, stealth, screenshot, maxTokens, selector, exclude, fullPage, raw, noDomainApi, lite, timeout, proxies, chunk, device, viewportWidth, viewportHeight, deviceScaleFactor, waitUntil, waitSelector, blockResources, cloaked, schema: bodySchema, highlightQuery: bodyHighlightQuery, highlightMaxChars: bodyHighlightMaxChars, } = req.body;
735
+ // --- Validate URL -------------------------------------------------------
736
+ if (!url || typeof url !== 'string') {
737
+ res.status(400).json({
738
+ success: false,
739
+ error: {
740
+ type: 'invalid_request',
741
+ message: 'Missing or invalid "url" in request body.',
742
+ hint: 'Send JSON: { "url": "https://example.com" }',
743
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
744
+ },
745
+ requestId: req.requestId,
746
+ });
747
+ return;
748
+ }
749
+ if (url.length > 2048) {
750
+ res.status(400).json({
751
+ success: false,
752
+ error: {
753
+ type: 'invalid_url',
754
+ message: 'URL too long (max 2048 characters)',
755
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
756
+ },
757
+ requestId: req.requestId,
758
+ });
759
+ return;
760
+ }
761
+ try {
762
+ new URL(url);
763
+ }
764
+ catch {
765
+ res.status(400).json({
766
+ success: false,
767
+ error: {
768
+ type: 'invalid_url',
769
+ message: 'Invalid URL format',
770
+ hint: 'Ensure the URL includes a scheme (https://) and a valid hostname',
771
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
772
+ },
773
+ requestId: req.requestId,
774
+ });
775
+ return;
776
+ }
777
+ try {
778
+ validateUrlForSSRF(url);
779
+ }
780
+ catch (error) {
781
+ if (error instanceof SSRFError) {
782
+ res.status(400).json({
783
+ success: false,
784
+ error: {
785
+ type: 'forbidden_url',
786
+ message: 'This URL is blocked for security. Localhost, private networks, and non-HTTP URLs are not allowed.',
787
+ hint: 'See docs for allowed URL formats.',
788
+ docs: 'https://webpeel.dev/docs/errors#forbidden-url',
789
+ },
790
+ requestId: req.requestId,
791
+ });
792
+ return;
793
+ }
794
+ throw error;
795
+ }
796
+ // --- Parse and normalize actions -----------------------------------------
797
+ let postActions;
798
+ if (rawActions !== undefined) {
799
+ try {
800
+ postActions = normalizeActions(rawActions);
801
+ }
802
+ catch (e) {
803
+ res.status(400).json({
804
+ success: false,
805
+ error: {
806
+ type: 'invalid_request',
807
+ message: `Invalid "actions" parameter: ${e.message}`,
808
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
809
+ },
810
+ requestId: req.requestId,
811
+ });
812
+ return;
813
+ }
814
+ }
815
+ // --- Cache bypass and lookup -------------------------------------------
816
+ const postBypassCache = noCacheBody === true || req.headers['cache-control'] === 'no-cache';
817
+ const postCacheTtlMs = typeof cacheTtlBody === 'number' ? cacheTtlBody * 1000 : 5 * 60 * 1000;
818
+ const postActionsKey = postActions ? JSON.stringify(postActions) : '';
819
+ const postCacheKey = `fetch:${url}:${render}:${wait}:${format}:${JSON.stringify(includeTags)}:${JSON.stringify(excludeTags)}:${images}:${location}:${JSON.stringify(languages)}:${onlyMainContent}:${stream}:${postActionsKey}:${budget}:${question}:${summaryParam}:${readable}:${stealth}:${screenshot}:${maxTokens}:${selector}:${JSON.stringify(exclude)}:${fullPage}:${raw}`;
820
+ if (!postBypassCache && !extract) {
821
+ const cached = cache.get(postCacheKey);
822
+ if (cached) {
823
+ const cacheAge = Date.now() - cached.timestamp;
824
+ if (cacheAge < postCacheTtlMs) {
825
+ res.setHeader('X-Cache', 'HIT');
826
+ res.setHeader('X-Cache-Status', 'HIT');
827
+ res.setHeader('X-Cache-Age', Math.floor(cacheAge / 1000).toString());
828
+ if (wantsEnvelope(req)) {
829
+ successResponse(res, cached.result, {
830
+ requestId: req.requestId,
831
+ cached: true,
832
+ });
833
+ }
834
+ else {
835
+ res.json(cached.result);
836
+ }
837
+ return;
838
+ }
839
+ }
840
+ }
841
+ // --- Resolve inline extract from body or Firecrawl-compatible formats ---
842
+ let resolvedExtract = extract;
843
+ if (!resolvedExtract && Array.isArray(formats)) {
844
+ const jsonFormat = formats.find((f) => (typeof f === 'object' && f !== null && f.type === 'json') ||
845
+ (typeof f === 'string' && f === 'json'));
846
+ if (jsonFormat && typeof jsonFormat === 'object' && (jsonFormat.schema || jsonFormat.prompt)) {
847
+ resolvedExtract = {
848
+ schema: jsonFormat.schema,
849
+ prompt: jsonFormat.prompt,
850
+ };
851
+ }
852
+ }
853
+ // Resolve schema template names (e.g. "product", "article") to field objects
854
+ if (resolvedExtract && typeof resolvedExtract.schema === 'string') {
855
+ const tmpl = getSchemaTemplate(resolvedExtract.schema);
856
+ if (tmpl) {
857
+ resolvedExtract = { ...resolvedExtract, schema: tmpl.fields };
858
+ }
859
+ else {
860
+ // Try parsing as JSON string
861
+ try {
862
+ resolvedExtract = { ...resolvedExtract, schema: JSON.parse(resolvedExtract.schema) };
863
+ }
864
+ catch { /* leave as-is */ }
865
+ }
866
+ }
867
+ // Validate LLM params if extraction is requested
868
+ if (resolvedExtract && (resolvedExtract.schema || resolvedExtract.prompt)) {
869
+ if (!llmProvider || !VALID_LLM_PROVIDERS.includes(llmProvider)) {
870
+ res.status(400).json({
871
+ success: false,
872
+ error: {
873
+ type: 'invalid_request',
874
+ message: `"llmProvider" is required for inline extraction and must be one of: ${VALID_LLM_PROVIDERS.join(', ')}`,
875
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
876
+ },
877
+ requestId: req.requestId,
878
+ });
879
+ return;
880
+ }
881
+ if (!llmApiKey || typeof llmApiKey !== 'string' || llmApiKey.trim().length === 0) {
882
+ res.status(400).json({
883
+ success: false,
884
+ error: {
885
+ type: 'invalid_request',
886
+ message: 'Missing or invalid "llmApiKey" (BYOK required for inline extraction)',
887
+ hint: 'Pass your LLM provider API key in the "llmApiKey" field',
888
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
889
+ },
890
+ requestId: req.requestId,
891
+ });
892
+ return;
893
+ }
894
+ }
895
+ // --- Build PeelOptions ---------------------------------------------------
896
+ const isSoftLimited = req.auth?.softLimited === true;
897
+ const hasExtraUsage = req.auth?.extraUsageAvailable === true;
898
+ const includeTagsArray = Array.isArray(includeTags) ? includeTags : undefined;
899
+ const excludeTagsArray = Array.isArray(excludeTags) ? excludeTags : undefined;
900
+ const languagesArray = Array.isArray(languages) ? languages : undefined;
901
+ const finalIncludeTags = onlyMainContent === true
902
+ ? ['main', 'article', '.content', '#content']
903
+ : includeTagsArray;
904
+ const resolvedFormat = format || 'markdown';
905
+ if (!['markdown', 'text', 'html', 'clean'].includes(resolvedFormat)) {
906
+ res.status(400).json({
907
+ success: false,
908
+ error: {
909
+ type: 'invalid_request',
910
+ message: 'Invalid "format" parameter: must be "markdown", "text", "html", or "clean"',
911
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
912
+ },
913
+ requestId: req.requestId,
914
+ });
915
+ return;
916
+ }
917
+ const resolvedWait = typeof wait === 'number' ? wait : undefined;
918
+ if (resolvedWait !== undefined && (isNaN(resolvedWait) || resolvedWait < 0 || resolvedWait > 60000)) {
919
+ res.status(400).json({
920
+ success: false,
921
+ error: {
922
+ type: 'invalid_request',
923
+ message: 'Invalid "wait" parameter: must be between 0 and 60000ms',
924
+ docs: 'https://webpeel.dev/docs/api-reference#fetch',
925
+ },
926
+ requestId: req.requestId,
927
+ });
928
+ return;
929
+ }
930
+ // When actions are present, force browser mode
931
+ const postHasActions = postActions && postActions.length > 0;
932
+ const postShouldRender = postHasActions || render === true;
933
+ // Normalize exclude: accept string (comma-separated) or string array
934
+ const excludeArray = exclude
935
+ ? (Array.isArray(exclude) ? exclude : exclude.split(',').map(s => s.trim()).filter(Boolean))
936
+ : undefined;
937
+ const options = {
938
+ render: (isSoftLimited && !hasExtraUsage && !postHasActions) ? false : postShouldRender,
939
+ wait: (isSoftLimited && !hasExtraUsage) ? 0 : resolvedWait,
940
+ format: resolvedFormat,
941
+ stream: stream === true,
942
+ includeTags: finalIncludeTags,
943
+ excludeTags: excludeTagsArray,
944
+ images: images === true,
945
+ actions: postActions,
946
+ location: location || languagesArray ? {
947
+ country: location,
948
+ languages: languagesArray,
949
+ } : undefined,
950
+ budget: typeof budget === 'number' ? budget : undefined,
951
+ question: question,
952
+ readable: readable === true,
953
+ stealth: (isSoftLimited && !hasExtraUsage) ? false : stealth === true,
954
+ screenshot: (isSoftLimited && !hasExtraUsage) ? false : (screenshot === true || (Array.isArray(formats) && formats.some((f) => (typeof f === 'string' ? f : f?.type) === 'screenshot'))),
955
+ maxTokens: typeof maxTokens === 'number' ? maxTokens : undefined,
956
+ selector: selector,
957
+ exclude: excludeArray,
958
+ fullPage: fullPage === true,
959
+ raw: raw === true,
960
+ noDomainApi: noDomainApi === true,
961
+ lite: lite === true,
962
+ timeout: typeof timeout === 'number' ? timeout : undefined,
963
+ proxies: Array.isArray(proxies) ? proxies : undefined,
964
+ device: device,
965
+ viewportWidth: typeof viewportWidth === 'number' ? viewportWidth : undefined,
966
+ viewportHeight: typeof viewportHeight === 'number' ? viewportHeight : undefined,
967
+ deviceScaleFactor: typeof deviceScaleFactor === 'number' ? deviceScaleFactor : undefined,
968
+ waitUntil: waitUntil,
969
+ waitSelector: waitSelector,
970
+ blockResources: Array.isArray(blockResources) ? blockResources : undefined,
971
+ };
972
+ if (cloaked)
973
+ options.cloaked = cloaked;
974
+ if (bodyHighlightQuery)
975
+ options.highlightQuery = bodyHighlightQuery;
976
+ if (bodyHighlightMaxChars)
977
+ options.highlightMaxChars = bodyHighlightMaxChars;
978
+ if (chunk)
979
+ options.chunk = chunk === true ? true : chunk;
980
+ // Auto-budget: default to 4000 tokens for API requests when no budget specified
981
+ // Opt-out: budget=0 explicitly disables. Lite mode disables auto-budget.
982
+ if (options.budget === undefined && !options.lite) {
983
+ options.budget = 4000;
984
+ res.setHeader('X-Auto-Budget', '4000');
985
+ }
986
+ if (isSoftLimited && !hasExtraUsage && render === true && !postHasActions) {
987
+ res.setHeader('X-Degraded', 'render=true downgraded to HTTP-only (quota exceeded)');
988
+ }
989
+ if (isSoftLimited && !hasExtraUsage && stealth === true) {
990
+ res.setHeader('X-Degraded', 'stealth=true downgraded (quota exceeded)');
991
+ }
992
+ if (isSoftLimited && !hasExtraUsage && screenshot === true) {
993
+ res.setHeader('X-Degraded', 'screenshot=true downgraded (quota exceeded)');
994
+ }
995
+ const shouldStream = options.stream === true;
996
+ if (shouldStream) {
997
+ res.setHeader('X-Stream', 'true');
998
+ if (typeof res.flushHeaders === 'function') {
999
+ res.flushHeaders();
1000
+ }
1001
+ }
1002
+ // --- Fetch content — wrap in proxy context so tier limits are enforced automatically
1003
+ const startTime = Date.now();
1004
+ const postUserTier = req.auth?.tier || req.auth?.keyInfo?.tier || 'free';
1005
+ const result = await proxyContextStorage.run({ userId: postUserId ?? undefined, tier: postUserTier }, () => peel(url, options));
1006
+ const elapsed = Date.now() - startTime;
1007
+ // --- BM25 Schema Template Extraction (POST, no LLM needed) ---
1008
+ if (bodySchema && typeof bodySchema === 'string' && result.content) {
1009
+ const template = getSchemaTemplate(bodySchema);
1010
+ if (template) {
1011
+ const { quickAnswer } = await import('../../core/quick-answer.js');
1012
+ const { smartExtractSchemaFields } = await import('../../core/schema-postprocess.js');
1013
+ const extracted = smartExtractSchemaFields(result.content, template.fields, quickAnswer, {
1014
+ pageTitle: result.title,
1015
+ pageUrl: result.url,
1016
+ metadata: result.metadata,
1017
+ });
1018
+ result.extracted = extracted;
1019
+ }
1020
+ else {
1021
+ // --- Custom JSON Schema → BM25 extraction (no LLM needed) ---
1022
+ // Supports: {"price": "number", "title": "string"} or {"price": "What is the price?"}
1023
+ let customFields = null;
1024
+ try {
1025
+ const parsed = JSON.parse(bodySchema);
1026
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
1027
+ customFields = {};
1028
+ for (const [key, typeOrQuestion] of Object.entries(parsed)) {
1029
+ const val = String(typeOrQuestion);
1030
+ // If the value looks like a question (has "?" or is >20 chars), use it directly
1031
+ if (val.includes('?') || val.length > 20) {
1032
+ customFields[key] = val;
1033
+ }
1034
+ else {
1035
+ // Convert type name to a natural question
1036
+ const typeStr = val.toLowerCase();
1037
+ const humanKey = key.replace(/([A-Z])/g, ' $1').toLowerCase().replace(/_/g, ' ');
1038
+ if (typeStr === 'number' || typeStr === 'float' || typeStr === 'integer') {
1039
+ customFields[key] = `What is the ${humanKey}? (as a number)`;
1040
+ }
1041
+ else if (typeStr === 'boolean') {
1042
+ customFields[key] = `Is the ${humanKey} true or false?`;
1043
+ }
1044
+ else if (typeStr.includes('[]') || typeStr === 'array') {
1045
+ customFields[key] = `What are the ${humanKey}? List all of them.`;
1046
+ }
1047
+ else {
1048
+ customFields[key] = `What is the ${humanKey}?`;
1049
+ }
1050
+ }
1051
+ }
1052
+ }
1053
+ }
1054
+ catch { /* not valid JSON, ignore */ }
1055
+ if (customFields && Object.keys(customFields).length > 0) {
1056
+ const { quickAnswer } = await import('../../core/quick-answer.js');
1057
+ const { smartExtractSchemaFields } = await import('../../core/schema-postprocess.js');
1058
+ const extracted = smartExtractSchemaFields(result.content, customFields, quickAnswer, {
1059
+ pageTitle: result.title,
1060
+ pageUrl: result.url,
1061
+ metadata: result.metadata,
1062
+ });
1063
+ result.extracted = extracted;
1064
+ }
1065
+ }
1066
+ }
1067
+ // --- Inline extraction (post-fetch) -------------------------------------
1068
+ let jsonData;
1069
+ let extractTokensUsed;
1070
+ if (resolvedExtract && (resolvedExtract.schema || resolvedExtract.prompt) && llmApiKey) {
1071
+ const extractResult = await extractInlineJson(result.content, {
1072
+ schema: resolvedExtract.schema,
1073
+ prompt: resolvedExtract.prompt,
1074
+ llmProvider: llmProvider,
1075
+ llmApiKey: llmApiKey.trim(),
1076
+ llmModel,
1077
+ });
1078
+ jsonData = extractResult.data;
1079
+ extractTokensUsed = extractResult.tokensUsed;
1080
+ }
1081
+ // --- Usage tracking (same as GET) ----------------------------------------
1082
+ const fetchType = result.method === 'stealth' ? 'stealth' :
1083
+ result.method === 'browser' ? 'stealth' : 'basic';
1084
+ const pgStore = authStore;
1085
+ if (req.auth?.keyInfo?.accountId && typeof pgStore.pool !== 'undefined') {
1086
+ pgStore.pool.query(`INSERT INTO usage_logs
1087
+ (user_id, endpoint, url, method, processing_time_ms, status_code, ip_address, user_agent, tokens_used)
1088
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`, [
1089
+ req.auth.keyInfo.accountId,
1090
+ 'fetch',
1091
+ url,
1092
+ fetchType,
1093
+ elapsed,
1094
+ 200,
1095
+ req.ip || req.socket.remoteAddress,
1096
+ req.get('user-agent'),
1097
+ result?.tokens || null,
1098
+ ]).catch((err) => {
1099
+ console.error('Failed to log request to usage_logs:', err);
1100
+ });
1101
+ }
1102
+ if (req.auth?.keyInfo?.key && typeof pgStore.trackBurstUsage === 'function') {
1103
+ await pgStore.trackBurstUsage(req.auth.keyInfo.key);
1104
+ if (isSoftLimited && hasExtraUsage) {
1105
+ const extraResult = await pgStore.trackExtraUsage(req.auth.keyInfo.key, fetchType, url, elapsed, 200);
1106
+ if (extraResult.success) {
1107
+ res.setHeader('X-Extra-Usage-Charged', `$${extraResult.cost.toFixed(4)}`);
1108
+ res.setHeader('X-Extra-Usage-New-Balance', extraResult.newBalance.toFixed(2));
1109
+ }
1110
+ else {
1111
+ res.setHeader('X-Degraded', 'Extra usage insufficient, degraded to soft limit');
1112
+ }
1113
+ }
1114
+ else if (!isSoftLimited) {
1115
+ await pgStore.trackUsage(req.auth.keyInfo.key, fetchType);
1116
+ }
1117
+ // Automatic dual-threshold alerts (80% and 90%) — POST route
1118
+ if (req.auth?.keyInfo?.accountId) {
1119
+ checkAndSendDualAlert(pgStore.pool, req.auth.keyInfo.accountId).catch(() => { });
1120
+ }
1121
+ }
1122
+ // Cache result (skip extraction results — they depend on user's LLM keys)
1123
+ if (storeFlag !== false && !postBypassCache && !resolvedExtract) {
1124
+ cache.set(postCacheKey, { result, timestamp: Date.now() }, { ttl: postCacheTtlMs });
1125
+ }
1126
+ // --- question → answer field (POST) ---
1127
+ // When question is provided, run quickAnswer() on the fetched content
1128
+ // and expose the result as an `answer` field in the response.
1129
+ const postAnswerResult = (question && typeof question === 'string' && result.content)
1130
+ ? quickAnswer({ question, content: result.content, url: result.url })
1131
+ : undefined;
1132
+ // --- summary field (POST) ---
1133
+ // When summary: true, return a truncated 500-word summary in a `summary` field.
1134
+ const postSummaryText = (summaryParam === true && result.content)
1135
+ ? extractSummary(result.content)
1136
+ : undefined;
1137
+ // --- Build response ------------------------------------------------------
1138
+ // Headers kept for backward compat; also surfaced in envelope metadata.
1139
+ res.setHeader('X-Cache', 'MISS');
1140
+ res.setHeader('X-Cache-Status', 'MISS');
1141
+ res.setHeader('X-Credits-Used', '1');
1142
+ res.setHeader('X-Processing-Time', elapsed.toString());
1143
+ res.setHeader('X-Fetch-Type', fetchType);
1144
+ // Proxy bandwidth usage headers — let users track their proxy consumption
1145
+ if (postUserId) {
1146
+ const postProxyStats = getProxyUsage(postUserId, postUserTier);
1147
+ res.setHeader('X-Proxy-Bytes-Used', postProxyStats.used.toString());
1148
+ res.setHeader('X-Proxy-Bytes-Remaining', postProxyStats.remaining === -1 ? 'unlimited' : postProxyStats.remaining.toString());
1149
+ res.setHeader('X-Proxy-Limit', postProxyStats.limit === -1 ? 'unlimited' : postProxyStats.limit.toString());
1150
+ }
1151
+ // Method + cost headers — customers can see what tier they're using
1152
+ res.setHeader('X-Method', result.method || 'simple');
1153
+ res.setHeader('X-Method-Cost', methodCosts[result.method || 'simple'] ?? '0.005');
1154
+ res.setHeader('X-Tokens', String(result.tokens || 0));
1155
+ // Response timing headers — let customers see exactly where time is spent
1156
+ const postTimingFetch = result.timing?.fetch ?? 0;
1157
+ const postTimingParse = (result.timing?.convert ?? 0) + (result.timing?.metadata ?? 0) + (result.timing?.prune ?? 0);
1158
+ res.setHeader('X-Response-Time', `${elapsed}ms`);
1159
+ res.setHeader('X-Fetch-Time', `${postTimingFetch}ms`);
1160
+ res.setHeader('X-Parse-Time', `${postTimingParse}ms`);
1161
+ res.setHeader('Server-Timing', `fetch;dur=${postTimingFetch}, parse;dur=${postTimingParse}, total;dur=${elapsed}`);
1162
+ const responseBody = { ...result };
1163
+ // Ensure method + cost are always present in body
1164
+ if (!responseBody.method)
1165
+ responseBody.method = result.method || 'simple';
1166
+ responseBody.cost = methodCosts[result.method || 'simple'] ?? '0.005';
1167
+ if (jsonData !== undefined) {
1168
+ responseBody.json = jsonData;
1169
+ }
1170
+ if (extractTokensUsed) {
1171
+ responseBody.extractTokensUsed = extractTokensUsed;
1172
+ }
1173
+ if (postAnswerResult !== undefined) {
1174
+ responseBody.answer = postAnswerResult.answer;
1175
+ }
1176
+ if (postSummaryText !== undefined) {
1177
+ responseBody.summary = postSummaryText;
1178
+ }
1179
+ // --- Multi-format response (formats array) --------------------------------
1180
+ // When 'formats' is provided as an array, populate each requested format
1181
+ // as a top-level field in the response. Backward-compatible: single-format
1182
+ // responses continue to work unchanged via the 'format' param.
1183
+ if (Array.isArray(formats) && formats.length > 0) {
1184
+ for (const fmt of formats) {
1185
+ const fmtStr = typeof fmt === 'string' ? fmt : fmt?.type;
1186
+ switch (fmtStr) {
1187
+ case 'markdown':
1188
+ responseBody.markdown = result.content;
1189
+ break;
1190
+ case 'html':
1191
+ // If the user requested html format, result.content is already html.
1192
+ // Otherwise fall back to empty string (html is not exposed by peel()).
1193
+ responseBody.html = (resolvedFormat === 'html') ? result.content : '';
1194
+ break;
1195
+ case 'rawHtml':
1196
+ // rawHtml is not surfaced by PeelResult — return empty string.
1197
+ responseBody.rawHtml = '';
1198
+ break;
1199
+ case 'screenshot':
1200
+ responseBody.screenshot = result.screenshot || null;
1201
+ break;
1202
+ case 'links': {
1203
+ // PeelResult.links is already a deduplicated string[] — convert to
1204
+ // {url, text} objects. Fall back to cheerio extraction if links is empty.
1205
+ if (result.links && result.links.length > 0) {
1206
+ responseBody.links = result.links.map((url) => ({ url, text: '' }));
1207
+ }
1208
+ else {
1209
+ responseBody.links = extractLinks(result.content || '', result.url);
1210
+ }
1211
+ break;
1212
+ }
1213
+ case 'json':
1214
+ // Already handled above via resolvedExtract / jsonData
1215
+ break;
1216
+ }
1217
+ }
1218
+ }
1219
+ if (wantsEnvelope(req)) {
1220
+ successResponse(res, responseBody, {
1221
+ requestId: req.requestId,
1222
+ processingTimeMs: elapsed,
1223
+ creditsUsed: 1,
1224
+ cached: false,
1225
+ fetchType,
1226
+ });
1227
+ }
1228
+ else {
1229
+ res.json(responseBody);
1230
+ }
1231
+ }
1232
+ catch (error) {
1233
+ const err = error;
1234
+ console.error('POST fetch/scrape error:', err);
1235
+ if (res.headersSent)
1236
+ return; // Timeout middleware already responded
1237
+ const postUrl = req.body?.url;
1238
+ if (err.code || err.name === 'TimeoutError' || err.name === 'BlockedError' || err.name === 'NetworkError' || err.name === 'WebPeelError') {
1239
+ const { type, message, hint, docs } = buildFetchErrorMessage(err);
1240
+ const statusCode = type === 'timeout' ? 504
1241
+ : type === 'blocked' ? 403
1242
+ : type === 'not_found' ? 404
1243
+ : type === 'network' || type === 'server_error' ? 502
1244
+ : 500;
1245
+ res.status(statusCode).json({
1246
+ success: false,
1247
+ error: {
1248
+ type,
1249
+ message,
1250
+ url: postUrl,
1251
+ ...(hint ? { hint } : {}),
1252
+ docs: docs || 'https://webpeel.dev/docs/errors',
1253
+ },
1254
+ requestId: req.requestId,
1255
+ });
1256
+ }
1257
+ else {
1258
+ res.status(500).json({
1259
+ success: false,
1260
+ error: {
1261
+ type: 'unknown',
1262
+ message: 'An unexpected error occurred. If this persists, check https://webpeel.dev/status',
1263
+ url: postUrl,
1264
+ docs: 'https://webpeel.dev/docs/errors',
1265
+ },
1266
+ requestId: req.requestId,
1267
+ });
1268
+ }
1269
+ }
1270
+ }
1271
+ router.post('/v1/fetch', handlePostFetch);
1272
+ router.post('/v2/scrape', handlePostFetch);
1273
+ return router;
1274
+ }