@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
package/dist/index.js ADDED
@@ -0,0 +1,291 @@
1
+ /**
2
+ * WebPeel - Fast web fetcher for AI agents
3
+ *
4
+ * Main library export
5
+ */
6
+ import { cleanup, warmup, closePool, scrollAndWait, closeProfileBrowser } from './core/fetcher.js';
7
+ import { createContext, normalizeOptions, handleYouTube, fetchContent, detectContentType, parseContent, postProcess, finalize, buildResult, } from './core/pipeline.js';
8
+ import { checkUrlSafety } from './core/safe-browsing.js';
9
+ export * from './types.js';
10
+ export { WebPeelError as TypedWebPeelError, Errors, isRetryable, } from './errors.js';
11
+ export { withRetry, DomainRateLimiter, domainLimiter } from './core/retry.js';
12
+ // Domain extractors — compiled JS ships in npm, TypeScript source is .gitignore'd.
13
+ // Re-export types from the basic stub (always available), runtime functions via lazy wrapper.
14
+ export { getDomainExtractor, extractDomainData } from './ee/domain-extractors.js';
15
+ export { crawl } from './core/crawler.js';
16
+ export { discoverSitemap } from './core/sitemap.js';
17
+ export { mapDomain } from './core/map.js';
18
+ export { extractBranding } from './core/branding.js';
19
+ export { trackChange, getSnapshot, clearSnapshots } from './core/change-tracking.js';
20
+ export { extractWithLLM } from './core/extract.js';
21
+ export { extractDocumentToFormat, isPdfContentType, isDocxContentType } from './core/documents.js';
22
+ export { extractInlineJson } from './core/extract-inline.js';
23
+ export { runAgent } from './core/agent.js';
24
+ export { summarizeContent } from './core/summarize.js';
25
+ export { getSearchProvider, DuckDuckGoProvider, BraveSearchProvider, providerStats, } from './core/search-provider.js';
26
+ export { BaiduSearchProvider, YandexSearchProvider, NaverSearchProvider, YahooJapanSearchProvider } from './core/search-engines.js';
27
+ export { crossVerifySearch } from './core/cross-verify.js';
28
+ export { answerQuestion, } from './core/answer.js';
29
+ export { parseGoogleSerp } from './core/google-serp-parser.js';
30
+ export { searchJobs } from './core/jobs.js';
31
+ export { RateGovernor, formatDuration, } from './core/rate-governor.js';
32
+ export { ApplicationTracker, } from './core/application-tracker.js';
33
+ export { applyToJob, loadApplications, saveApplication, getApplicationsToday, updateApplicationStatus, } from './core/apply.js';
34
+ // Human behavior exports — see bottom of file for full export
35
+ export { extractListings } from './core/extract-listings.js';
36
+ export { parseYouTubeUrl, extractVideoInfo, extractPlayerResponse, parseCaptionXml, decodeHtmlEntities, getYouTubeTranscript, } from './core/youtube.js';
37
+ export { formatTable } from './core/table-format.js';
38
+ export { findNextPageUrl } from './core/paginate.js';
39
+ export { distillToBudget, budgetListings, TOKENS_PER_LISTING_ITEM } from './core/budget.js';
40
+ export { watch, parseDuration, parseAssertion, } from './core/watch.js';
41
+ export { observe, } from './core/observe.js';
42
+ export { diffUrl, } from './core/diff.js';
43
+ export { extractReadableContent } from './core/readability.js';
44
+ export { quickAnswer } from './core/quick-answer.js';
45
+ export { extractValueFromPassage, smartExtractSchemaFields } from './core/schema-postprocess.js';
46
+ export { Timer } from './core/timing.js';
47
+ export { chunkContent } from './core/chunker.js';
48
+ export async function searchFallback(..._args) {
49
+ // @ts-ignore — proprietary module, gitignored
50
+ try {
51
+ const m = await import('./core/search-fallback.js');
52
+ return m.searchFallback(..._args);
53
+ }
54
+ catch {
55
+ return null;
56
+ }
57
+ }
58
+ export { peelTLSFetch, isPeelTLSAvailable, shutdownPeelTLS } from './core/peel-tls.js';
59
+ export { sanitizeForLLM } from './core/prompt-guard.js';
60
+ export { getSourceCredibility } from './core/source-credibility.js';
61
+ export { verifyDomain } from './core/domain-verify.js';
62
+ export { checkUrlSafety } from './core/safe-browsing.js';
63
+ export { checkThreatFeeds } from './core/threat-feeds.js';
64
+ export { detectLanguageFromUrl, buildAcceptLanguageHeader } from './core/language-detect.js';
65
+ export { localSearch } from './core/local-search.js';
66
+ export { getBusinessIntel } from './core/business-intel.js';
67
+ export { CircuitBreaker, browserCircuitBreaker } from './core/circuit-breaker.js';
68
+ export { checkMemoryPressure } from './core/browser-pool.js';
69
+ export { searchShopping, searchNews, searchImages, searchVideos } from './core/vertical-search.js';
70
+ /**
71
+ * Fetch and extract content from a URL
72
+ *
73
+ * @param url - URL to fetch
74
+ * @param options - Fetch options
75
+ * @returns Extracted content and metadata
76
+ *
77
+ * @example
78
+ * ```typescript
79
+ * import { peel } from 'webpeel';
80
+ *
81
+ * const result = await peel('https://example.com');
82
+ * console.log(result.content); // Markdown content
83
+ * console.log(result.metadata); // Structured metadata
84
+ * ```
85
+ */
86
+ export async function peel(url, options = {}) {
87
+ const ctx = createContext(url, options);
88
+ normalizeOptions(ctx);
89
+ // Safe Browsing check — runs before any HTTP request, non-blocking
90
+ const sbResult = await checkUrlSafety(url, process.env.SAFE_BROWSING_API_KEY);
91
+ ctx.safeBrowsingResult = sbResult;
92
+ if (!sbResult.safe) {
93
+ const threatList = sbResult.threats.join(', ');
94
+ ctx.warnings.push(`⚠️ URL flagged by Safe Browsing: ${threatList}`);
95
+ }
96
+ const ytResult = await handleYouTube(ctx);
97
+ if (ytResult) {
98
+ // Attach safe browsing to YouTube results too
99
+ return {
100
+ ...ytResult,
101
+ safeBrowsing: sbResult,
102
+ ...(ytResult.warnings || ctx.warnings.length > 0
103
+ ? { warnings: [...(ytResult.warnings ?? []), ...ctx.warnings.filter(w => !ytResult.warnings?.includes(w))] }
104
+ : {}),
105
+ };
106
+ }
107
+ try {
108
+ await fetchContent(ctx);
109
+ detectContentType(ctx);
110
+ await parseContent(ctx);
111
+ await postProcess(ctx);
112
+ await finalize(ctx);
113
+ const result = buildResult(ctx);
114
+ // Attach safe browsing result
115
+ result.safeBrowsing = sbResult;
116
+ // ── Auto-render escalation (post-processing) ────────────────────────
117
+ // If final content is thin and we did NOT use browser rendering,
118
+ // retry with render=true. This catches SPAs that return enough SSR
119
+ // HTML to pass the fetch-time thin-content check but produce sparse
120
+ // extracted content (e.g. React shells with nav chrome only).
121
+ // Only escalate when: (a) not already rendered, (b) not explicitly
122
+ // opted out (noEscalate), (c) not a domain-extracted result, (d) HTML
123
+ // content type, (e) not a retry already.
124
+ const contentTokens = result.tokens ?? Math.ceil((result.content?.length ?? 0) / 4);
125
+ const wasRendered = ctx.render || options.render;
126
+ const hasDomainData = !!ctx.domainData;
127
+ const isHtml = (ctx.fetchResult?.contentType || '').includes('html');
128
+ const noEscalate = !!options.noEscalate;
129
+ const isRetry = !!options._autoRenderRetry;
130
+ // Don't escalate when user explicitly limited output size
131
+ const hasTokenBudget = !!(options.budget || options.maxTokens);
132
+ // Don't escalate if browser/stealth was already used — can't go higher
133
+ const fetchMethod = ctx.fetchResult?.method ?? '';
134
+ const alreadyBrowserOrStealth = fetchMethod === 'browser' || fetchMethod === 'stealth' ||
135
+ fetchMethod === 'browser-with-wait' || wasRendered;
136
+ if (contentTokens < 80 &&
137
+ !alreadyBrowserOrStealth &&
138
+ !hasDomainData &&
139
+ isHtml &&
140
+ !noEscalate &&
141
+ !isRetry &&
142
+ !hasTokenBudget &&
143
+ result.content &&
144
+ result.content.length < 400) {
145
+ // Retry with render — this is a one-shot escalation, not a loop
146
+ const retryResult = await peel(url, {
147
+ ...options,
148
+ render: true,
149
+ noCache: true,
150
+ _autoRenderRetry: true,
151
+ });
152
+ // Only use the retry if it produced more content
153
+ if ((retryResult.tokens ?? 0) > contentTokens) {
154
+ retryResult.warnings = [
155
+ ...(retryResult.warnings || []),
156
+ 'Auto-escalated to browser rendering (initial fetch produced sparse content)',
157
+ ];
158
+ return retryResult;
159
+ }
160
+ }
161
+ return result;
162
+ }
163
+ catch (error) {
164
+ // Clean up browser resources on error
165
+ await cleanup();
166
+ throw error;
167
+ }
168
+ }
169
+ /**
170
+ * Fetch multiple URLs in batch with concurrency control
171
+ *
172
+ * @param urls - Array of URLs to fetch
173
+ * @param options - Fetch options (including concurrency)
174
+ * @returns Array of results or errors
175
+ *
176
+ * @example
177
+ * ```typescript
178
+ * import { peelBatch } from 'webpeel';
179
+ *
180
+ * const urls = ['https://example.com', 'https://example.org'];
181
+ * const results = await peelBatch(urls, { concurrency: 3 });
182
+ * ```
183
+ */
184
+ export async function peelBatch(urls, options = {}) {
185
+ const { concurrency = 3, onProgress, ...peelOpts } = options;
186
+ const results = new Array(urls.length);
187
+ let nextIndex = 0;
188
+ let completedCount = 0;
189
+ async function worker() {
190
+ while (nextIndex < urls.length) {
191
+ const index = nextIndex++;
192
+ const url = urls[index];
193
+ try {
194
+ results[index] = await peel(url, peelOpts);
195
+ }
196
+ catch (error) {
197
+ results[index] = {
198
+ url,
199
+ error: error instanceof Error ? error.message : 'Unknown error',
200
+ };
201
+ }
202
+ completedCount++;
203
+ onProgress?.(completedCount, urls.length);
204
+ }
205
+ }
206
+ // Launch concurrent workers (true worker-pool, not sequential batches)
207
+ const workerCount = Math.min(concurrency, urls.length);
208
+ if (workerCount > 0) {
209
+ await Promise.all(Array.from({ length: workerCount }, () => worker()));
210
+ }
211
+ return results;
212
+ }
213
+ /**
214
+ * Clean up any browser resources
215
+ * Call this when you're done using WebPeel
216
+ */
217
+ export { cleanup, warmup, closePool, scrollAndWait, closeProfileBrowser };
218
+ export { getCached, setCached, clearCache, setCacheTTL } from './core/cache.js';
219
+ export { getRealisticUserAgent, getRandomUA, REALISTIC_USER_AGENTS, } from './core/user-agents.js';
220
+ export { humanDelay, humanMouseMove, humanRead, warmupBrowse, humanType, humanClearAndType, humanClick, humanScroll, humanScrollToElement, warmupSession, humanSelect, humanUploadFile, humanToggle, } from './core/human.js';
221
+ export { SCHEMA_TEMPLATES, getSchemaTemplate, listSchemaTemplates } from './core/schema-templates.js';
222
+ // Framework integrations
223
+ export { WebPeelLoader } from './integrations/langchain.js';
224
+ export { WebPeelReader } from './integrations/llamaindex.js';
225
+ // Advanced stealth utilities — for power users who want to apply extra evasions
226
+ // to their own Playwright pages.
227
+ // stealth-patches: proprietary module, loaded at runtime only
228
+ export async function applyStealthPatches(page) {
229
+ // @ts-ignore — proprietary module, gitignored
230
+ try {
231
+ const m = await import('./core/stealth-patches.js');
232
+ await m.applyStealthPatches(page);
233
+ }
234
+ catch { /* not available */ }
235
+ }
236
+ export async function applyAcceptLanguageHeader(page, lang) {
237
+ // @ts-ignore — proprietary module, gitignored
238
+ try {
239
+ const m = await import('./core/stealth-patches.js');
240
+ await m.applyAcceptLanguageHeader(page, lang);
241
+ }
242
+ catch { /* not available */ }
243
+ }
244
+ // Google Cache fallback — fetch cached copies of blocked pages
245
+ export { fetchGoogleCache, isGoogleCacheAvailable } from './core/google-cache.js';
246
+ export { cfWorkerFetch, isCfWorkerAvailable } from './core/cf-worker-proxy.js';
247
+ /**
248
+ * WebPeel client class — alternative OOP interface over the functional API.
249
+ * Provides the same capabilities as the standalone functions but with
250
+ * a configured client instance.
251
+ *
252
+ * @example
253
+ * import { WebPeel } from 'webpeel';
254
+ * const wp = new WebPeel({ apiKey: process.env.WEBPEEL_API_KEY });
255
+ * const result = await wp.fetch('https://stripe.com');
256
+ */
257
+ export class WebPeel {
258
+ apiKey;
259
+ constructor(config) {
260
+ if (!config.apiKey)
261
+ throw new Error('WebPeel: apiKey is required');
262
+ this.apiKey = config.apiKey;
263
+ // apiUrl reserved for future use (remote API proxy mode)
264
+ void config.apiUrl;
265
+ }
266
+ /** Fetch and extract content from a URL */
267
+ async fetch(url, options = {}) {
268
+ return peel(url, { ...options });
269
+ }
270
+ /** Search the web */
271
+ async search(query, options = {}) {
272
+ const { getSearchProvider } = await import('./core/search-provider.js');
273
+ const provider = getSearchProvider({ ...options });
274
+ return provider.searchWeb(query, options);
275
+ }
276
+ /** Crawl a site */
277
+ async crawl(startUrl, options = {}) {
278
+ const { crawl: crawlFn } = await import('./core/crawler.js');
279
+ return crawlFn(startUrl, { ...options, apiKey: this.apiKey });
280
+ }
281
+ /** Map a domain's pages */
282
+ async map(url, options = {}) {
283
+ const { mapDomain } = await import('./core/map.js');
284
+ return mapDomain(url, { ...options });
285
+ }
286
+ /** Extract structured data */
287
+ async extract(url, _schema, options = {}) {
288
+ const result = await peel(url, { ...options });
289
+ return result;
290
+ }
291
+ }
@@ -0,0 +1,2 @@
1
+ export { WebPeelLoader, type WebPeelLoaderOptions, type Document } from './langchain.js';
2
+ export { WebPeelReader, type WebPeelReaderOptions, type LlamaDocument } from './llamaindex.js';
@@ -0,0 +1,2 @@
1
+ export { WebPeelLoader } from './langchain.js';
2
+ export { WebPeelReader } from './llamaindex.js';
@@ -0,0 +1,64 @@
1
+ /**
2
+ * WebPeel LangChain.js Document Loader
3
+ *
4
+ * Usage:
5
+ * import { WebPeelLoader } from 'webpeel/integrations/langchain';
6
+ * const loader = new WebPeelLoader({ url: 'https://example.com' });
7
+ * const docs = await loader.load();
8
+ */
9
+ import type { PeelOptions } from '../types.js';
10
+ /** LangChain Document interface (we define our own to avoid the dependency) */
11
+ export interface Document {
12
+ pageContent: string;
13
+ metadata: Record<string, any>;
14
+ }
15
+ export interface WebPeelLoaderOptions {
16
+ /** URL to fetch */
17
+ url: string;
18
+ /** Multiple URLs to fetch */
19
+ urls?: string[];
20
+ /** Scraping mode: 'scrape' for single page, 'crawl' for following links */
21
+ mode?: 'scrape' | 'crawl';
22
+ /** Output format */
23
+ format?: 'markdown' | 'text' | 'html' | 'clean';
24
+ /** Use headless browser */
25
+ render?: boolean;
26
+ /** Stealth mode for anti-bot */
27
+ stealth?: boolean;
28
+ /** Token budget per page */
29
+ budget?: number;
30
+ /** Proxy URL */
31
+ proxy?: string;
32
+ /** Multiple proxies for rotation */
33
+ proxies?: string[];
34
+ /** CSS selector to extract */
35
+ selector?: string;
36
+ /** Enable chunking for RAG */
37
+ chunk?: boolean;
38
+ /** Max tokens per chunk (default: 512) */
39
+ chunkSize?: number;
40
+ /** Chunk overlap tokens (default: 50) */
41
+ chunkOverlap?: number;
42
+ /** Additional PeelOptions */
43
+ peelOptions?: Partial<PeelOptions>;
44
+ }
45
+ /**
46
+ * WebPeel Document Loader for LangChain.js
47
+ *
48
+ * Compatible with LangChain's BaseDocumentLoader interface.
49
+ * Returns Document[] with pageContent and metadata.
50
+ */
51
+ export declare class WebPeelLoader {
52
+ private options;
53
+ constructor(options: WebPeelLoaderOptions);
54
+ /**
55
+ * Load documents from the configured URL(s).
56
+ * If chunking is enabled, each chunk becomes a separate Document.
57
+ */
58
+ load(): Promise<Document[]>;
59
+ /**
60
+ * Lazy load documents one at a time (async generator).
61
+ * Useful for large URL lists to avoid memory pressure.
62
+ */
63
+ lazyLoad(): AsyncGenerator<Document>;
64
+ }
@@ -0,0 +1,115 @@
1
+ /**
2
+ * WebPeel LangChain.js Document Loader
3
+ *
4
+ * Usage:
5
+ * import { WebPeelLoader } from 'webpeel/integrations/langchain';
6
+ * const loader = new WebPeelLoader({ url: 'https://example.com' });
7
+ * const docs = await loader.load();
8
+ */
9
+ import { peel } from '../index.js';
10
+ import { chunkContent } from '../core/chunker.js';
11
+ /**
12
+ * WebPeel Document Loader for LangChain.js
13
+ *
14
+ * Compatible with LangChain's BaseDocumentLoader interface.
15
+ * Returns Document[] with pageContent and metadata.
16
+ */
17
+ export class WebPeelLoader {
18
+ options;
19
+ constructor(options) {
20
+ this.options = options;
21
+ }
22
+ /**
23
+ * Load documents from the configured URL(s).
24
+ * If chunking is enabled, each chunk becomes a separate Document.
25
+ */
26
+ async load() {
27
+ const urls = this.options.urls || [this.options.url];
28
+ const documents = [];
29
+ for (const url of urls) {
30
+ try {
31
+ const peelOpts = {
32
+ format: this.options.format || 'markdown',
33
+ render: this.options.render,
34
+ stealth: this.options.stealth,
35
+ budget: this.options.budget,
36
+ proxy: this.options.proxy,
37
+ proxies: this.options.proxies,
38
+ selector: this.options.selector,
39
+ ...this.options.peelOptions,
40
+ };
41
+ // Remove undefined values
42
+ Object.keys(peelOpts).forEach(key => {
43
+ if (peelOpts[key] === undefined)
44
+ delete peelOpts[key];
45
+ });
46
+ const result = await peel(url, peelOpts);
47
+ if (this.options.chunk) {
48
+ // Split into chunks, each becomes a Document
49
+ const chunkResult = chunkContent(result.content, {
50
+ maxTokens: this.options.chunkSize || 512,
51
+ overlap: this.options.chunkOverlap || 50,
52
+ strategy: 'section',
53
+ });
54
+ for (const chunk of chunkResult.chunks) {
55
+ documents.push({
56
+ pageContent: chunk.text,
57
+ metadata: {
58
+ source: url,
59
+ title: result.metadata?.title || '',
60
+ description: result.metadata?.description || '',
61
+ chunkIndex: chunk.index,
62
+ totalChunks: chunkResult.totalChunks,
63
+ section: chunk.section,
64
+ sectionDepth: chunk.sectionDepth,
65
+ tokenCount: chunk.tokenCount,
66
+ wordCount: chunk.wordCount,
67
+ fetchedAt: result.metadata?.fetchedAt || new Date().toISOString(),
68
+ method: result.metadata?.method || 'unknown',
69
+ },
70
+ });
71
+ }
72
+ }
73
+ else {
74
+ // Single document per URL
75
+ documents.push({
76
+ pageContent: result.content,
77
+ metadata: {
78
+ source: url,
79
+ title: result.metadata?.title || '',
80
+ description: result.metadata?.description || '',
81
+ wordCount: result.metadata?.wordCount || 0,
82
+ language: result.metadata?.language || '',
83
+ fetchedAt: result.metadata?.fetchedAt || new Date().toISOString(),
84
+ method: result.metadata?.method || 'unknown',
85
+ contentType: result.metadata?.contentType || '',
86
+ statusCode: result.metadata?.statusCode || 0,
87
+ },
88
+ });
89
+ }
90
+ }
91
+ catch (error) {
92
+ // Include failed URLs as empty documents with error metadata
93
+ documents.push({
94
+ pageContent: '',
95
+ metadata: {
96
+ source: url,
97
+ error: error instanceof Error ? error.message : String(error),
98
+ fetchedAt: new Date().toISOString(),
99
+ },
100
+ });
101
+ }
102
+ }
103
+ return documents;
104
+ }
105
+ /**
106
+ * Lazy load documents one at a time (async generator).
107
+ * Useful for large URL lists to avoid memory pressure.
108
+ */
109
+ async *lazyLoad() {
110
+ const docs = await this.load();
111
+ for (const doc of docs) {
112
+ yield doc;
113
+ }
114
+ }
115
+ }
@@ -0,0 +1,50 @@
1
+ /**
2
+ * WebPeel LlamaIndex Reader
3
+ *
4
+ * Usage:
5
+ * import { WebPeelReader } from 'webpeel/integrations/llamaindex';
6
+ * const reader = new WebPeelReader();
7
+ * const docs = await reader.loadData('https://example.com');
8
+ */
9
+ import type { PeelOptions } from '../types.js';
10
+ /** LlamaIndex Document interface */
11
+ export interface LlamaDocument {
12
+ text: string;
13
+ metadata: Record<string, any>;
14
+ id_?: string;
15
+ }
16
+ export interface WebPeelReaderOptions {
17
+ /** Output format */
18
+ format?: 'markdown' | 'text' | 'html' | 'clean';
19
+ /** Use headless browser */
20
+ render?: boolean;
21
+ /** Stealth mode */
22
+ stealth?: boolean;
23
+ /** Token budget */
24
+ budget?: number;
25
+ /** Enable chunking */
26
+ chunk?: boolean;
27
+ /** Max tokens per chunk */
28
+ chunkSize?: number;
29
+ /** Chunk overlap */
30
+ chunkOverlap?: number;
31
+ /** Proxy URL */
32
+ proxy?: string;
33
+ /** Multiple proxies */
34
+ proxies?: string[];
35
+ /** Additional PeelOptions */
36
+ peelOptions?: Partial<PeelOptions>;
37
+ }
38
+ /**
39
+ * WebPeel Reader for LlamaIndex
40
+ *
41
+ * Compatible with LlamaIndex's BaseReader interface.
42
+ */
43
+ export declare class WebPeelReader {
44
+ private options;
45
+ constructor(options?: WebPeelReaderOptions);
46
+ /**
47
+ * Load data from one or more URLs.
48
+ */
49
+ loadData(urlOrUrls: string | string[]): Promise<LlamaDocument[]>;
50
+ }
@@ -0,0 +1,91 @@
1
+ /**
2
+ * WebPeel LlamaIndex Reader
3
+ *
4
+ * Usage:
5
+ * import { WebPeelReader } from 'webpeel/integrations/llamaindex';
6
+ * const reader = new WebPeelReader();
7
+ * const docs = await reader.loadData('https://example.com');
8
+ */
9
+ import { peel } from '../index.js';
10
+ import { chunkContent } from '../core/chunker.js';
11
+ /**
12
+ * WebPeel Reader for LlamaIndex
13
+ *
14
+ * Compatible with LlamaIndex's BaseReader interface.
15
+ */
16
+ export class WebPeelReader {
17
+ options;
18
+ constructor(options = {}) {
19
+ this.options = options;
20
+ }
21
+ /**
22
+ * Load data from one or more URLs.
23
+ */
24
+ async loadData(urlOrUrls) {
25
+ const urls = Array.isArray(urlOrUrls) ? urlOrUrls : [urlOrUrls];
26
+ const documents = [];
27
+ for (const url of urls) {
28
+ try {
29
+ const peelOpts = {
30
+ format: this.options.format || 'markdown',
31
+ render: this.options.render,
32
+ stealth: this.options.stealth,
33
+ budget: this.options.budget,
34
+ proxy: this.options.proxy,
35
+ proxies: this.options.proxies,
36
+ ...this.options.peelOptions,
37
+ };
38
+ Object.keys(peelOpts).forEach(key => {
39
+ if (peelOpts[key] === undefined)
40
+ delete peelOpts[key];
41
+ });
42
+ const result = await peel(url, peelOpts);
43
+ if (this.options.chunk) {
44
+ const chunkResult = chunkContent(result.content, {
45
+ maxTokens: this.options.chunkSize || 512,
46
+ overlap: this.options.chunkOverlap || 50,
47
+ strategy: 'section',
48
+ });
49
+ for (const chunk of chunkResult.chunks) {
50
+ documents.push({
51
+ text: chunk.text,
52
+ id_: `${url}#chunk-${chunk.index}`,
53
+ metadata: {
54
+ url,
55
+ title: result.metadata?.title || '',
56
+ chunkIndex: chunk.index,
57
+ totalChunks: chunkResult.totalChunks,
58
+ section: chunk.section,
59
+ tokenCount: chunk.tokenCount,
60
+ },
61
+ });
62
+ }
63
+ }
64
+ else {
65
+ documents.push({
66
+ text: result.content,
67
+ id_: url,
68
+ metadata: {
69
+ url,
70
+ title: result.metadata?.title || '',
71
+ description: result.metadata?.description || '',
72
+ wordCount: result.metadata?.wordCount || 0,
73
+ language: result.metadata?.language || '',
74
+ },
75
+ });
76
+ }
77
+ }
78
+ catch (error) {
79
+ documents.push({
80
+ text: '',
81
+ id_: url,
82
+ metadata: {
83
+ url,
84
+ error: error instanceof Error ? error.message : String(error),
85
+ },
86
+ });
87
+ }
88
+ }
89
+ return documents;
90
+ }
91
+ }
@@ -0,0 +1,5 @@
1
+ /**
2
+ * handleAct — perform browser actions on a page, then optionally extract content.
3
+ */
4
+ import { type McpHandler } from './types.js';
5
+ export declare const handleAct: McpHandler;
@@ -0,0 +1,34 @@
1
+ /**
2
+ * handleAct — perform browser actions on a page, then optionally extract content.
3
+ */
4
+ import { peel } from '../../index.js';
5
+ import { normalizeActions } from '../../core/actions.js';
6
+ import { textResult, safeStringify } from './types.js';
7
+ export const handleAct = async (args, _ctx) => {
8
+ const url = args.url;
9
+ const rawActions = args.actions || [];
10
+ const extract = args.extract !== false; // default true
11
+ const screenshot = Boolean(args.screenshot);
12
+ if (!url)
13
+ return textResult(safeStringify({ error: 'url is required' }));
14
+ if (!rawActions.length)
15
+ return textResult(safeStringify({ error: 'actions array is required' }));
16
+ // Normalize actions (handles Firecrawl-style aliases)
17
+ const actions = normalizeActions(rawActions) || [];
18
+ const result = await peel(url, {
19
+ render: true, // actions always require browser
20
+ actions,
21
+ screenshot,
22
+ format: 'markdown',
23
+ budget: 4000,
24
+ timeout: 60000,
25
+ });
26
+ return textResult(safeStringify({
27
+ url: result.url,
28
+ title: result.title,
29
+ content: extract ? result.content : undefined,
30
+ screenshot: result.screenshot,
31
+ method: result.method,
32
+ elapsed: result.elapsed,
33
+ }));
34
+ };
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Tool schema definitions — single source of truth for both transports.
3
+ * Imported by the standalone MCP server and the HTTP MCP route for tools/list.
4
+ */
5
+ import type { Tool } from '@modelcontextprotocol/sdk/types.js';
6
+ export declare const toolDefinitions: Tool[];