webpeel 0.19.4 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (544) hide show
  1. package/README.md +2 -2
  2. package/dist/cache.d.ts +0 -1
  3. package/dist/cache.js +0 -1
  4. package/dist/cli/commands/auth.d.ts +5 -0
  5. package/dist/cli/commands/auth.js +476 -0
  6. package/dist/cli/commands/fetch.d.ts +6 -0
  7. package/dist/cli/commands/fetch.js +1015 -0
  8. package/dist/cli/commands/interact.d.ts +5 -0
  9. package/dist/cli/commands/interact.js +839 -0
  10. package/dist/cli/commands/jobs.d.ts +5 -0
  11. package/dist/cli/commands/jobs.js +997 -0
  12. package/dist/cli/commands/screenshot.d.ts +5 -0
  13. package/dist/cli/commands/screenshot.js +273 -0
  14. package/dist/cli/commands/search.d.ts +5 -0
  15. package/dist/cli/commands/search.js +524 -0
  16. package/dist/cli/utils.d.ts +84 -0
  17. package/dist/cli/utils.js +686 -0
  18. package/dist/cli-auth.d.ts +0 -1
  19. package/dist/cli-auth.js +0 -1
  20. package/dist/cli.d.ts +7 -6
  21. package/dist/cli.js +35 -4698
  22. package/dist/core/actions.d.ts +0 -1
  23. package/dist/core/actions.js +0 -1
  24. package/dist/core/agent.d.ts +0 -1
  25. package/dist/core/agent.js +9 -12
  26. package/dist/core/answer.d.ts +0 -1
  27. package/dist/core/answer.js +0 -1
  28. package/dist/core/application-tracker.d.ts +0 -1
  29. package/dist/core/application-tracker.js +0 -1
  30. package/dist/core/apply.d.ts +0 -1
  31. package/dist/core/apply.js +0 -1
  32. package/dist/core/auto-extract.d.ts +0 -1
  33. package/dist/core/auto-extract.js +0 -1
  34. package/dist/core/auto-interact.d.ts +0 -1
  35. package/dist/core/auto-interact.js +0 -1
  36. package/dist/core/bm25-filter.d.ts +0 -1
  37. package/dist/core/bm25-filter.js +0 -1
  38. package/dist/core/branding.d.ts +0 -1
  39. package/dist/core/branding.js +0 -1
  40. package/dist/core/browser-fetch.d.ts +0 -1
  41. package/dist/core/browser-fetch.js +17 -10
  42. package/dist/core/browser-pool.d.ts +0 -1
  43. package/dist/core/browser-pool.js +0 -1
  44. package/dist/core/budget.d.ts +0 -1
  45. package/dist/core/budget.js +0 -1
  46. package/dist/core/cache.d.ts +0 -1
  47. package/dist/core/cache.js +0 -1
  48. package/dist/core/cf-worker-proxy.d.ts +0 -1
  49. package/dist/core/cf-worker-proxy.js +0 -1
  50. package/dist/core/challenge-detection.d.ts +0 -1
  51. package/dist/core/challenge-detection.js +0 -1
  52. package/dist/core/change-tracking.d.ts +0 -1
  53. package/dist/core/change-tracking.js +0 -1
  54. package/dist/core/chunker.d.ts +0 -1
  55. package/dist/core/chunker.js +0 -1
  56. package/dist/core/chunking.d.ts +0 -1
  57. package/dist/core/chunking.js +0 -1
  58. package/dist/core/cloak-fetch.d.ts +0 -1
  59. package/dist/core/cloak-fetch.js +0 -1
  60. package/dist/core/content-pruner.d.ts +0 -1
  61. package/dist/core/content-pruner.js +0 -1
  62. package/dist/core/crawl-checkpoint.d.ts +0 -1
  63. package/dist/core/crawl-checkpoint.js +0 -1
  64. package/dist/core/crawler.d.ts +0 -1
  65. package/dist/core/crawler.js +6 -5
  66. package/dist/core/cycle-fetch.d.ts +0 -1
  67. package/dist/core/cycle-fetch.js +0 -1
  68. package/dist/core/deep-fetch.d.ts +0 -1
  69. package/dist/core/deep-fetch.js +0 -1
  70. package/dist/core/design-analysis.d.ts +0 -1
  71. package/dist/core/design-analysis.js +0 -1
  72. package/dist/core/design-compare.d.ts +0 -1
  73. package/dist/core/design-compare.js +0 -1
  74. package/dist/core/diff.d.ts +0 -1
  75. package/dist/core/diff.js +0 -1
  76. package/dist/core/dns-cache.d.ts +0 -1
  77. package/dist/core/dns-cache.js +0 -1
  78. package/dist/core/documents.d.ts +0 -1
  79. package/dist/core/documents.js +0 -1
  80. package/dist/core/domain-extractors.d.ts +0 -1
  81. package/dist/core/domain-extractors.js +0 -1
  82. package/dist/core/extract-inline.d.ts +0 -1
  83. package/dist/core/extract-inline.js +0 -1
  84. package/dist/core/extract-listings.d.ts +0 -1
  85. package/dist/core/extract-listings.js +0 -1
  86. package/dist/core/extract.d.ts +0 -1
  87. package/dist/core/extract.js +0 -1
  88. package/dist/core/fetcher.d.ts +0 -1
  89. package/dist/core/fetcher.js +0 -1
  90. package/dist/core/google-cache.d.ts +0 -1
  91. package/dist/core/google-cache.js +0 -1
  92. package/dist/core/hotel-search.d.ts +0 -1
  93. package/dist/core/hotel-search.js +0 -1
  94. package/dist/core/http-fetch.d.ts +0 -1
  95. package/dist/core/http-fetch.js +5 -7
  96. package/dist/core/human.d.ts +0 -1
  97. package/dist/core/human.js +0 -1
  98. package/dist/core/jobs.d.ts +0 -1
  99. package/dist/core/jobs.js +0 -1
  100. package/dist/core/json-ld.d.ts +0 -1
  101. package/dist/core/json-ld.js +0 -1
  102. package/dist/core/llm-extract.d.ts +0 -1
  103. package/dist/core/llm-extract.js +0 -1
  104. package/dist/core/logger.d.ts +17 -0
  105. package/dist/core/logger.js +44 -0
  106. package/dist/core/map.d.ts +0 -1
  107. package/dist/core/map.js +0 -1
  108. package/dist/core/markdown.d.ts +0 -1
  109. package/dist/core/markdown.js +0 -1
  110. package/dist/core/metadata.d.ts +0 -1
  111. package/dist/core/metadata.js +0 -1
  112. package/dist/core/paginate.d.ts +0 -1
  113. package/dist/core/paginate.js +0 -1
  114. package/dist/core/pdf.d.ts +0 -1
  115. package/dist/core/pdf.js +0 -1
  116. package/dist/core/peel-tls.d.ts +0 -1
  117. package/dist/core/peel-tls.js +0 -1
  118. package/dist/core/pipeline.d.ts +0 -1
  119. package/dist/core/pipeline.js +22 -25
  120. package/dist/core/profiles.d.ts +0 -1
  121. package/dist/core/profiles.js +0 -1
  122. package/dist/core/quick-answer.d.ts +0 -1
  123. package/dist/core/quick-answer.js +0 -1
  124. package/dist/core/rate-governor.d.ts +0 -1
  125. package/dist/core/rate-governor.js +0 -1
  126. package/dist/core/readability.d.ts +0 -1
  127. package/dist/core/readability.js +0 -1
  128. package/dist/core/research.d.ts +0 -1
  129. package/dist/core/research.js +0 -1
  130. package/dist/core/schema-extraction.d.ts +0 -1
  131. package/dist/core/schema-extraction.js +0 -1
  132. package/dist/core/schema-postprocess.d.ts +0 -1
  133. package/dist/core/schema-postprocess.js +0 -1
  134. package/dist/core/schema-templates.d.ts +0 -1
  135. package/dist/core/schema-templates.js +0 -1
  136. package/dist/core/screenshot.d.ts +0 -1
  137. package/dist/core/screenshot.js +0 -1
  138. package/dist/core/search-fallback.d.ts +0 -1
  139. package/dist/core/search-fallback.js +0 -1
  140. package/dist/core/search-provider.d.ts +0 -1
  141. package/dist/core/search-provider.js +18 -21
  142. package/dist/core/site-search.d.ts +0 -1
  143. package/dist/core/site-search.js +0 -1
  144. package/dist/core/sitemap.d.ts +0 -1
  145. package/dist/core/sitemap.js +0 -1
  146. package/dist/core/stealth-patches.d.ts +0 -1
  147. package/dist/core/stealth-patches.js +0 -1
  148. package/dist/core/stemmer.d.ts +0 -1
  149. package/dist/core/stemmer.js +0 -1
  150. package/dist/core/strategies.d.ts +6 -1
  151. package/dist/core/strategies.js +29 -41
  152. package/dist/core/strategy-hooks.d.ts +0 -1
  153. package/dist/core/strategy-hooks.js +0 -1
  154. package/dist/core/summarize.d.ts +0 -1
  155. package/dist/core/summarize.js +0 -1
  156. package/dist/core/synonyms.d.ts +0 -1
  157. package/dist/core/synonyms.js +0 -1
  158. package/dist/core/table-format.d.ts +0 -1
  159. package/dist/core/table-format.js +0 -1
  160. package/dist/core/timing.d.ts +0 -1
  161. package/dist/core/timing.js +0 -1
  162. package/dist/core/user-agents.d.ts +0 -1
  163. package/dist/core/user-agents.js +0 -1
  164. package/dist/core/watch-manager.d.ts +0 -1
  165. package/dist/core/watch-manager.js +0 -1
  166. package/dist/core/watch.d.ts +0 -1
  167. package/dist/core/watch.js +0 -1
  168. package/dist/core/youtube.d.ts +0 -1
  169. package/dist/core/youtube.js +0 -1
  170. package/dist/index.d.ts +8 -3
  171. package/dist/index.js +27 -3
  172. package/dist/integrations/index.d.ts +0 -1
  173. package/dist/integrations/index.js +0 -1
  174. package/dist/integrations/langchain.d.ts +0 -1
  175. package/dist/integrations/langchain.js +0 -1
  176. package/dist/integrations/llamaindex.d.ts +0 -1
  177. package/dist/integrations/llamaindex.js +0 -1
  178. package/dist/mcp/handlers/act.d.ts +5 -0
  179. package/dist/mcp/handlers/act.js +34 -0
  180. package/dist/mcp/handlers/definitions.d.ts +6 -0
  181. package/dist/mcp/handlers/definitions.js +266 -0
  182. package/dist/mcp/handlers/extract.d.ts +6 -0
  183. package/dist/mcp/handlers/extract.js +102 -0
  184. package/dist/mcp/handlers/fetch.d.ts +6 -0
  185. package/dist/mcp/handlers/fetch.js +98 -0
  186. package/dist/mcp/handlers/find.d.ts +5 -0
  187. package/dist/mcp/handlers/find.js +137 -0
  188. package/dist/mcp/handlers/index.d.ts +13 -0
  189. package/dist/mcp/handlers/index.js +61 -0
  190. package/dist/mcp/handlers/legacy.d.ts +25 -0
  191. package/dist/mcp/handlers/legacy.js +450 -0
  192. package/dist/mcp/handlers/meta.d.ts +6 -0
  193. package/dist/mcp/handlers/meta.js +31 -0
  194. package/dist/mcp/handlers/monitor.d.ts +5 -0
  195. package/dist/mcp/handlers/monitor.js +41 -0
  196. package/dist/mcp/handlers/read.d.ts +6 -0
  197. package/dist/mcp/handlers/read.js +63 -0
  198. package/dist/mcp/handlers/see.d.ts +5 -0
  199. package/dist/mcp/handlers/see.js +75 -0
  200. package/dist/mcp/handlers/types.d.ts +29 -0
  201. package/dist/mcp/handlers/types.js +28 -0
  202. package/dist/mcp/server.d.ts +3 -4
  203. package/dist/mcp/server.js +35 -1101
  204. package/dist/mcp/smart-router.d.ts +0 -1
  205. package/dist/mcp/smart-router.js +3 -1
  206. package/dist/types.d.ts +6 -1
  207. package/dist/types.js +0 -1
  208. package/package.json +3 -13
  209. package/dist/cache.d.ts.map +0 -1
  210. package/dist/cache.js.map +0 -1
  211. package/dist/cli-auth.d.ts.map +0 -1
  212. package/dist/cli-auth.js.map +0 -1
  213. package/dist/cli.bundle.cjs +0 -159248
  214. package/dist/cli.d.ts.map +0 -1
  215. package/dist/cli.js.map +0 -1
  216. package/dist/core/actions.d.ts.map +0 -1
  217. package/dist/core/actions.js.map +0 -1
  218. package/dist/core/agent.d.ts.map +0 -1
  219. package/dist/core/agent.js.map +0 -1
  220. package/dist/core/answer.d.ts.map +0 -1
  221. package/dist/core/answer.js.map +0 -1
  222. package/dist/core/application-tracker.d.ts.map +0 -1
  223. package/dist/core/application-tracker.js.map +0 -1
  224. package/dist/core/apply.d.ts.map +0 -1
  225. package/dist/core/apply.js.map +0 -1
  226. package/dist/core/auto-extract.d.ts.map +0 -1
  227. package/dist/core/auto-extract.js.map +0 -1
  228. package/dist/core/auto-interact.d.ts.map +0 -1
  229. package/dist/core/auto-interact.js.map +0 -1
  230. package/dist/core/bm25-filter.d.ts.map +0 -1
  231. package/dist/core/bm25-filter.js.map +0 -1
  232. package/dist/core/branding.d.ts.map +0 -1
  233. package/dist/core/branding.js.map +0 -1
  234. package/dist/core/browser-fetch.d.ts.map +0 -1
  235. package/dist/core/browser-fetch.js.map +0 -1
  236. package/dist/core/browser-pool.d.ts.map +0 -1
  237. package/dist/core/browser-pool.js.map +0 -1
  238. package/dist/core/budget.d.ts.map +0 -1
  239. package/dist/core/budget.js.map +0 -1
  240. package/dist/core/cache.d.ts.map +0 -1
  241. package/dist/core/cache.js.map +0 -1
  242. package/dist/core/cf-worker-proxy.d.ts.map +0 -1
  243. package/dist/core/cf-worker-proxy.js.map +0 -1
  244. package/dist/core/challenge-detection.d.ts.map +0 -1
  245. package/dist/core/challenge-detection.js.map +0 -1
  246. package/dist/core/change-tracking.d.ts.map +0 -1
  247. package/dist/core/change-tracking.js.map +0 -1
  248. package/dist/core/chunker.d.ts.map +0 -1
  249. package/dist/core/chunker.js.map +0 -1
  250. package/dist/core/chunking.d.ts.map +0 -1
  251. package/dist/core/chunking.js.map +0 -1
  252. package/dist/core/cloak-fetch.d.ts.map +0 -1
  253. package/dist/core/cloak-fetch.js.map +0 -1
  254. package/dist/core/content-pruner.d.ts.map +0 -1
  255. package/dist/core/content-pruner.js.map +0 -1
  256. package/dist/core/crawl-checkpoint.d.ts.map +0 -1
  257. package/dist/core/crawl-checkpoint.js.map +0 -1
  258. package/dist/core/crawler.d.ts.map +0 -1
  259. package/dist/core/crawler.js.map +0 -1
  260. package/dist/core/cycle-fetch.d.ts.map +0 -1
  261. package/dist/core/cycle-fetch.js.map +0 -1
  262. package/dist/core/deep-fetch.d.ts.map +0 -1
  263. package/dist/core/deep-fetch.js.map +0 -1
  264. package/dist/core/design-analysis.d.ts.map +0 -1
  265. package/dist/core/design-analysis.js.map +0 -1
  266. package/dist/core/design-compare.d.ts.map +0 -1
  267. package/dist/core/design-compare.js.map +0 -1
  268. package/dist/core/diff.d.ts.map +0 -1
  269. package/dist/core/diff.js.map +0 -1
  270. package/dist/core/dns-cache.d.ts.map +0 -1
  271. package/dist/core/dns-cache.js.map +0 -1
  272. package/dist/core/documents.d.ts.map +0 -1
  273. package/dist/core/documents.js.map +0 -1
  274. package/dist/core/domain-extractors.d.ts.map +0 -1
  275. package/dist/core/domain-extractors.js.map +0 -1
  276. package/dist/core/extract-inline.d.ts.map +0 -1
  277. package/dist/core/extract-inline.js.map +0 -1
  278. package/dist/core/extract-listings.d.ts.map +0 -1
  279. package/dist/core/extract-listings.js.map +0 -1
  280. package/dist/core/extract.d.ts.map +0 -1
  281. package/dist/core/extract.js.map +0 -1
  282. package/dist/core/fetcher.d.ts.map +0 -1
  283. package/dist/core/fetcher.js.map +0 -1
  284. package/dist/core/google-cache.d.ts.map +0 -1
  285. package/dist/core/google-cache.js.map +0 -1
  286. package/dist/core/hotel-search.d.ts.map +0 -1
  287. package/dist/core/hotel-search.js.map +0 -1
  288. package/dist/core/http-fetch.d.ts.map +0 -1
  289. package/dist/core/http-fetch.js.map +0 -1
  290. package/dist/core/human.d.ts.map +0 -1
  291. package/dist/core/human.js.map +0 -1
  292. package/dist/core/jobs.d.ts.map +0 -1
  293. package/dist/core/jobs.js.map +0 -1
  294. package/dist/core/json-ld.d.ts.map +0 -1
  295. package/dist/core/json-ld.js.map +0 -1
  296. package/dist/core/llm-extract.d.ts.map +0 -1
  297. package/dist/core/llm-extract.js.map +0 -1
  298. package/dist/core/map.d.ts.map +0 -1
  299. package/dist/core/map.js.map +0 -1
  300. package/dist/core/markdown.d.ts.map +0 -1
  301. package/dist/core/markdown.js.map +0 -1
  302. package/dist/core/metadata.d.ts.map +0 -1
  303. package/dist/core/metadata.js.map +0 -1
  304. package/dist/core/paginate.d.ts.map +0 -1
  305. package/dist/core/paginate.js.map +0 -1
  306. package/dist/core/pdf.d.ts.map +0 -1
  307. package/dist/core/pdf.js.map +0 -1
  308. package/dist/core/peel-tls.d.ts.map +0 -1
  309. package/dist/core/peel-tls.js.map +0 -1
  310. package/dist/core/pipeline.d.ts.map +0 -1
  311. package/dist/core/pipeline.js.map +0 -1
  312. package/dist/core/profiles.d.ts.map +0 -1
  313. package/dist/core/profiles.js.map +0 -1
  314. package/dist/core/quick-answer.d.ts.map +0 -1
  315. package/dist/core/quick-answer.js.map +0 -1
  316. package/dist/core/rate-governor.d.ts.map +0 -1
  317. package/dist/core/rate-governor.js.map +0 -1
  318. package/dist/core/readability.d.ts.map +0 -1
  319. package/dist/core/readability.js.map +0 -1
  320. package/dist/core/research.d.ts.map +0 -1
  321. package/dist/core/research.js.map +0 -1
  322. package/dist/core/schema-extraction.d.ts.map +0 -1
  323. package/dist/core/schema-extraction.js.map +0 -1
  324. package/dist/core/schema-postprocess.d.ts.map +0 -1
  325. package/dist/core/schema-postprocess.js.map +0 -1
  326. package/dist/core/schema-templates.d.ts.map +0 -1
  327. package/dist/core/schema-templates.js.map +0 -1
  328. package/dist/core/screenshot.d.ts.map +0 -1
  329. package/dist/core/screenshot.js.map +0 -1
  330. package/dist/core/search-fallback.d.ts.map +0 -1
  331. package/dist/core/search-fallback.js.map +0 -1
  332. package/dist/core/search-provider.d.ts.map +0 -1
  333. package/dist/core/search-provider.js.map +0 -1
  334. package/dist/core/site-search.d.ts.map +0 -1
  335. package/dist/core/site-search.js.map +0 -1
  336. package/dist/core/sitemap.d.ts.map +0 -1
  337. package/dist/core/sitemap.js.map +0 -1
  338. package/dist/core/stealth-patches.d.ts.map +0 -1
  339. package/dist/core/stealth-patches.js.map +0 -1
  340. package/dist/core/stemmer.d.ts.map +0 -1
  341. package/dist/core/stemmer.js.map +0 -1
  342. package/dist/core/strategies.d.ts.map +0 -1
  343. package/dist/core/strategies.js.map +0 -1
  344. package/dist/core/strategy-hooks.d.ts.map +0 -1
  345. package/dist/core/strategy-hooks.js.map +0 -1
  346. package/dist/core/summarize.d.ts.map +0 -1
  347. package/dist/core/summarize.js.map +0 -1
  348. package/dist/core/synonyms.d.ts.map +0 -1
  349. package/dist/core/synonyms.js.map +0 -1
  350. package/dist/core/table-format.d.ts.map +0 -1
  351. package/dist/core/table-format.js.map +0 -1
  352. package/dist/core/timing.d.ts.map +0 -1
  353. package/dist/core/timing.js.map +0 -1
  354. package/dist/core/user-agents.d.ts.map +0 -1
  355. package/dist/core/user-agents.js.map +0 -1
  356. package/dist/core/watch-manager.d.ts.map +0 -1
  357. package/dist/core/watch-manager.js.map +0 -1
  358. package/dist/core/watch.d.ts.map +0 -1
  359. package/dist/core/watch.js.map +0 -1
  360. package/dist/core/youtube.d.ts.map +0 -1
  361. package/dist/core/youtube.js.map +0 -1
  362. package/dist/index.d.ts.map +0 -1
  363. package/dist/index.js.map +0 -1
  364. package/dist/integrations/index.d.ts.map +0 -1
  365. package/dist/integrations/index.js.map +0 -1
  366. package/dist/integrations/langchain.d.ts.map +0 -1
  367. package/dist/integrations/langchain.js.map +0 -1
  368. package/dist/integrations/llamaindex.d.ts.map +0 -1
  369. package/dist/integrations/llamaindex.js.map +0 -1
  370. package/dist/mcp/server.d.ts.map +0 -1
  371. package/dist/mcp/server.js.map +0 -1
  372. package/dist/mcp/smart-router.d.ts.map +0 -1
  373. package/dist/mcp/smart-router.js.map +0 -1
  374. package/dist/server/app.d.ts +0 -15
  375. package/dist/server/app.d.ts.map +0 -1
  376. package/dist/server/app.js +0 -350
  377. package/dist/server/app.js.map +0 -1
  378. package/dist/server/auth-store.d.ts +0 -28
  379. package/dist/server/auth-store.d.ts.map +0 -1
  380. package/dist/server/auth-store.js +0 -89
  381. package/dist/server/auth-store.js.map +0 -1
  382. package/dist/server/email-service.d.ts +0 -22
  383. package/dist/server/email-service.d.ts.map +0 -1
  384. package/dist/server/email-service.js +0 -80
  385. package/dist/server/email-service.js.map +0 -1
  386. package/dist/server/job-queue.d.ts +0 -93
  387. package/dist/server/job-queue.d.ts.map +0 -1
  388. package/dist/server/job-queue.js +0 -146
  389. package/dist/server/job-queue.js.map +0 -1
  390. package/dist/server/logger.d.ts +0 -11
  391. package/dist/server/logger.d.ts.map +0 -1
  392. package/dist/server/logger.js +0 -38
  393. package/dist/server/logger.js.map +0 -1
  394. package/dist/server/middleware/auth.d.ts +0 -29
  395. package/dist/server/middleware/auth.d.ts.map +0 -1
  396. package/dist/server/middleware/auth.js +0 -222
  397. package/dist/server/middleware/auth.js.map +0 -1
  398. package/dist/server/middleware/rate-limit.d.ts +0 -25
  399. package/dist/server/middleware/rate-limit.d.ts.map +0 -1
  400. package/dist/server/middleware/rate-limit.js +0 -168
  401. package/dist/server/middleware/rate-limit.js.map +0 -1
  402. package/dist/server/middleware/url-validator.d.ts +0 -16
  403. package/dist/server/middleware/url-validator.d.ts.map +0 -1
  404. package/dist/server/middleware/url-validator.js +0 -187
  405. package/dist/server/middleware/url-validator.js.map +0 -1
  406. package/dist/server/openapi.yaml +0 -4944
  407. package/dist/server/pg-auth-store.d.ts +0 -133
  408. package/dist/server/pg-auth-store.d.ts.map +0 -1
  409. package/dist/server/pg-auth-store.js +0 -473
  410. package/dist/server/pg-auth-store.js.map +0 -1
  411. package/dist/server/pg-job-queue.d.ts +0 -60
  412. package/dist/server/pg-job-queue.d.ts.map +0 -1
  413. package/dist/server/pg-job-queue.js +0 -365
  414. package/dist/server/pg-job-queue.js.map +0 -1
  415. package/dist/server/premium/domain-intel.d.ts +0 -17
  416. package/dist/server/premium/domain-intel.d.ts.map +0 -1
  417. package/dist/server/premium/domain-intel.js +0 -134
  418. package/dist/server/premium/domain-intel.js.map +0 -1
  419. package/dist/server/premium/index.d.ts +0 -18
  420. package/dist/server/premium/index.d.ts.map +0 -1
  421. package/dist/server/premium/index.js +0 -36
  422. package/dist/server/premium/index.js.map +0 -1
  423. package/dist/server/premium/swr-cache.d.ts +0 -15
  424. package/dist/server/premium/swr-cache.d.ts.map +0 -1
  425. package/dist/server/premium/swr-cache.js +0 -35
  426. package/dist/server/premium/swr-cache.js.map +0 -1
  427. package/dist/server/routes/activity.d.ts +0 -7
  428. package/dist/server/routes/activity.d.ts.map +0 -1
  429. package/dist/server/routes/activity.js +0 -68
  430. package/dist/server/routes/activity.js.map +0 -1
  431. package/dist/server/routes/agent.d.ts +0 -16
  432. package/dist/server/routes/agent.d.ts.map +0 -1
  433. package/dist/server/routes/agent.js +0 -247
  434. package/dist/server/routes/agent.js.map +0 -1
  435. package/dist/server/routes/answer.d.ts +0 -6
  436. package/dist/server/routes/answer.d.ts.map +0 -1
  437. package/dist/server/routes/answer.js +0 -133
  438. package/dist/server/routes/answer.js.map +0 -1
  439. package/dist/server/routes/ask.d.ts +0 -23
  440. package/dist/server/routes/ask.d.ts.map +0 -1
  441. package/dist/server/routes/ask.js +0 -119
  442. package/dist/server/routes/ask.js.map +0 -1
  443. package/dist/server/routes/batch.d.ts +0 -7
  444. package/dist/server/routes/batch.d.ts.map +0 -1
  445. package/dist/server/routes/batch.js +0 -412
  446. package/dist/server/routes/batch.js.map +0 -1
  447. package/dist/server/routes/cli-usage.d.ts +0 -7
  448. package/dist/server/routes/cli-usage.d.ts.map +0 -1
  449. package/dist/server/routes/cli-usage.js +0 -121
  450. package/dist/server/routes/cli-usage.js.map +0 -1
  451. package/dist/server/routes/compat.d.ts +0 -24
  452. package/dist/server/routes/compat.d.ts.map +0 -1
  453. package/dist/server/routes/compat.js +0 -653
  454. package/dist/server/routes/compat.js.map +0 -1
  455. package/dist/server/routes/deep-fetch.d.ts +0 -9
  456. package/dist/server/routes/deep-fetch.d.ts.map +0 -1
  457. package/dist/server/routes/deep-fetch.js +0 -50
  458. package/dist/server/routes/deep-fetch.js.map +0 -1
  459. package/dist/server/routes/demo.d.ts +0 -25
  460. package/dist/server/routes/demo.d.ts.map +0 -1
  461. package/dist/server/routes/demo.js +0 -434
  462. package/dist/server/routes/demo.js.map +0 -1
  463. package/dist/server/routes/extract.d.ts +0 -9
  464. package/dist/server/routes/extract.d.ts.map +0 -1
  465. package/dist/server/routes/extract.js +0 -150
  466. package/dist/server/routes/extract.js.map +0 -1
  467. package/dist/server/routes/fetch.d.ts +0 -8
  468. package/dist/server/routes/fetch.d.ts.map +0 -1
  469. package/dist/server/routes/fetch.js +0 -988
  470. package/dist/server/routes/fetch.js.map +0 -1
  471. package/dist/server/routes/health.d.ts +0 -8
  472. package/dist/server/routes/health.d.ts.map +0 -1
  473. package/dist/server/routes/health.js +0 -20
  474. package/dist/server/routes/health.js.map +0 -1
  475. package/dist/server/routes/jobs.d.ts +0 -8
  476. package/dist/server/routes/jobs.d.ts.map +0 -1
  477. package/dist/server/routes/jobs.js +0 -487
  478. package/dist/server/routes/jobs.js.map +0 -1
  479. package/dist/server/routes/mcp.d.ts +0 -18
  480. package/dist/server/routes/mcp.d.ts.map +0 -1
  481. package/dist/server/routes/mcp.js +0 -1260
  482. package/dist/server/routes/mcp.js.map +0 -1
  483. package/dist/server/routes/oauth.d.ts +0 -10
  484. package/dist/server/routes/oauth.d.ts.map +0 -1
  485. package/dist/server/routes/oauth.js +0 -334
  486. package/dist/server/routes/oauth.js.map +0 -1
  487. package/dist/server/routes/quick-answer.d.ts +0 -9
  488. package/dist/server/routes/quick-answer.d.ts.map +0 -1
  489. package/dist/server/routes/quick-answer.js +0 -93
  490. package/dist/server/routes/quick-answer.js.map +0 -1
  491. package/dist/server/routes/screenshot.d.ts +0 -23
  492. package/dist/server/routes/screenshot.d.ts.map +0 -1
  493. package/dist/server/routes/screenshot.js +0 -819
  494. package/dist/server/routes/screenshot.js.map +0 -1
  495. package/dist/server/routes/search.d.ts +0 -7
  496. package/dist/server/routes/search.d.ts.map +0 -1
  497. package/dist/server/routes/search.js +0 -312
  498. package/dist/server/routes/search.js.map +0 -1
  499. package/dist/server/routes/session.d.ts +0 -16
  500. package/dist/server/routes/session.d.ts.map +0 -1
  501. package/dist/server/routes/session.js +0 -278
  502. package/dist/server/routes/session.js.map +0 -1
  503. package/dist/server/routes/stats.d.ts +0 -7
  504. package/dist/server/routes/stats.d.ts.map +0 -1
  505. package/dist/server/routes/stats.js +0 -65
  506. package/dist/server/routes/stats.js.map +0 -1
  507. package/dist/server/routes/stripe.d.ts +0 -16
  508. package/dist/server/routes/stripe.d.ts.map +0 -1
  509. package/dist/server/routes/stripe.js +0 -283
  510. package/dist/server/routes/stripe.js.map +0 -1
  511. package/dist/server/routes/users.d.ts +0 -9
  512. package/dist/server/routes/users.d.ts.map +0 -1
  513. package/dist/server/routes/users.js +0 -1211
  514. package/dist/server/routes/users.js.map +0 -1
  515. package/dist/server/routes/watch.d.ts +0 -16
  516. package/dist/server/routes/watch.d.ts.map +0 -1
  517. package/dist/server/routes/watch.js +0 -257
  518. package/dist/server/routes/watch.js.map +0 -1
  519. package/dist/server/routes/webhooks.d.ts +0 -16
  520. package/dist/server/routes/webhooks.d.ts.map +0 -1
  521. package/dist/server/routes/webhooks.js +0 -74
  522. package/dist/server/routes/webhooks.js.map +0 -1
  523. package/dist/server/routes/youtube.d.ts +0 -7
  524. package/dist/server/routes/youtube.d.ts.map +0 -1
  525. package/dist/server/routes/youtube.js +0 -93
  526. package/dist/server/routes/youtube.js.map +0 -1
  527. package/dist/server/sentry.d.ts +0 -14
  528. package/dist/server/sentry.d.ts.map +0 -1
  529. package/dist/server/sentry.js +0 -39
  530. package/dist/server/sentry.js.map +0 -1
  531. package/dist/server/types.d.ts +0 -16
  532. package/dist/server/types.d.ts.map +0 -1
  533. package/dist/server/types.js +0 -8
  534. package/dist/server/types.js.map +0 -1
  535. package/dist/server/utils/response.d.ts +0 -45
  536. package/dist/server/utils/response.d.ts.map +0 -1
  537. package/dist/server/utils/response.js +0 -70
  538. package/dist/server/utils/response.js.map +0 -1
  539. package/dist/server/utils/sse.d.ts +0 -23
  540. package/dist/server/utils/sse.d.ts.map +0 -1
  541. package/dist/server/utils/sse.js +0 -39
  542. package/dist/server/utils/sse.js.map +0 -1
  543. package/dist/types.d.ts.map +0 -1
  544. package/dist/types.js.map +0 -1
@@ -1,1260 +0,0 @@
1
- /**
2
- * Hosted MCP endpoint — POST /mcp, POST /v2/mcp, POST /:apiKey/v2/mcp
3
- *
4
- * Accepts MCP Streamable HTTP transport (JSON-RPC over HTTP).
5
- * Users connect with:
6
- * { "url": "https://api.webpeel.dev/mcp" }
7
- * { "url": "https://api.webpeel.dev/v2/mcp" }
8
- * { "url": "https://api.webpeel.dev/<API_KEY>/v2/mcp" } ← key in URL (Firecrawl-style)
9
- *
10
- * Each request creates a stateless MCP server, processes the JSON-RPC
11
- * message(s), and returns the response.
12
- */
13
- import { Router } from 'express';
14
- import '../types.js'; // Augments Express.Request with requestId
15
- import { Server } from '@modelcontextprotocol/sdk/server/index.js';
16
- import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
17
- import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
18
- import { LRUCache } from 'lru-cache';
19
- import { peel, peelBatch } from '../../index.js';
20
- import { normalizeActions } from '../../core/actions.js';
21
- import { runAgent } from '../../core/agent.js';
22
- import { quickAnswer } from '../../core/quick-answer.js';
23
- import { getBestSearchProvider } from '../../core/search-provider.js';
24
- import { extractInlineJson } from '../../core/extract-inline.js';
25
- import { answerQuestion } from '../../core/answer.js';
26
- import { readFileSync } from 'fs';
27
- import { join, dirname } from 'path';
28
- import { fileURLToPath } from 'url';
29
- // Read version from package.json
30
- let pkgVersion = '0.7.0';
31
- try {
32
- const __dirname = dirname(fileURLToPath(import.meta.url));
33
- const pkg = JSON.parse(readFileSync(join(__dirname, '..', '..', '..', 'package.json'), 'utf-8'));
34
- pkgVersion = pkg.version;
35
- }
36
- catch { /* fallback */ }
37
- const mcpFetchCache = new LRUCache({
38
- max: 500,
39
- ttl: 5 * 60 * 1000, // 5 minutes default
40
- maxSize: 100 * 1024 * 1024, // 100MB
41
- sizeCalculation: (entry) => JSON.stringify(entry).length,
42
- });
43
- // ---------------------------------------------------------------------------
44
- // Helper functions for brand extraction
45
- // ---------------------------------------------------------------------------
46
- function extractColorsFromContent(content) {
47
- const colors = [];
48
- const hexRegex = /#[0-9A-Fa-f]{6}|#[0-9A-Fa-f]{3}/g;
49
- const matches = content.match(hexRegex);
50
- if (matches) {
51
- colors.push(...[...new Set(matches)].slice(0, 10));
52
- }
53
- return colors;
54
- }
55
- function extractFontsFromContent(content) {
56
- const fonts = [];
57
- const fontRegex = /font-family:\s*([^;}"'\n]+)/gi;
58
- let match;
59
- while ((match = fontRegex.exec(content)) !== null) {
60
- fonts.push(match[1].trim());
61
- }
62
- return [...new Set(fonts)].slice(0, 5);
63
- }
64
- // ---------------------------------------------------------------------------
65
- // Tool definitions — 7 consolidated tools (matches src/mcp/server.ts)
66
- // ---------------------------------------------------------------------------
67
- function getTools() {
68
- return [
69
- {
70
- name: 'webpeel',
71
- description: "Your complete web toolkit. Describe what you want in plain language. " +
72
- "Examples: 'read https://stripe.com', 'screenshot bbc.com on mobile', " +
73
- "'find best AI frameworks', 'extract prices from stripe.com/pricing', " +
74
- "'watch stripe.com/pricing for changes'",
75
- annotations: { title: 'WebPeel Smart Web Tool', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
76
- inputSchema: {
77
- type: 'object',
78
- properties: {
79
- task: { type: 'string', description: 'Plain English description of what you want to do with the web.' },
80
- },
81
- required: ['task'],
82
- },
83
- },
84
- {
85
- name: 'webpeel_read',
86
- description: 'Read any URL and return clean markdown. Handles web pages, YouTube videos, and PDFs automatically. Use question= for Q&A about the page, summary=true for a summary.',
87
- annotations: { title: 'Read Web Page', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
88
- inputSchema: {
89
- type: 'object',
90
- properties: {
91
- url: { type: 'string', description: 'URL to fetch' },
92
- format: { type: 'string', enum: ['markdown', 'text', 'html'], description: 'Output format (default: markdown)', default: 'markdown' },
93
- render: { type: 'boolean', description: 'Force browser rendering for JS-heavy sites', default: false },
94
- question: { type: 'string', description: 'Ask a question about the page content (BM25, no LLM needed)' },
95
- summary: { type: 'boolean', description: 'Return a summary instead of full content', default: false },
96
- budget: { type: 'number', description: 'Smart token budget — distill content to N tokens' },
97
- readable: { type: 'boolean', description: 'Reader mode — extract only article content', default: false },
98
- },
99
- required: ['url'],
100
- },
101
- },
102
- {
103
- name: 'webpeel_see',
104
- description: "See any page visually. Returns a screenshot. Use mode='design' for design analysis, mode='compare' with compare_url for visual comparison.",
105
- annotations: { title: 'See Page Visually', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
106
- inputSchema: {
107
- type: 'object',
108
- properties: {
109
- url: { type: 'string', description: 'URL to screenshot' },
110
- mode: { type: 'string', enum: ['screenshot', 'design', 'compare'], description: "Mode: 'screenshot' (default), 'design' (analysis), 'compare' (visual diff)", default: 'screenshot' },
111
- compare_url: { type: 'string', description: "Second URL to compare against (for mode='compare')" },
112
- viewport: { type: 'string', enum: ['mobile', 'tablet', 'desktop'], description: 'Viewport size preset' },
113
- full_page: { type: 'boolean', description: 'Capture the full scrollable page', default: false },
114
- },
115
- required: ['url'],
116
- },
117
- },
118
- {
119
- name: 'webpeel_find',
120
- description: "Find anything on the web. Pass a query to search, or a url to discover all pages on that domain. Use depth='deep' for multi-source research.",
121
- annotations: { title: 'Find on the Web', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
122
- inputSchema: {
123
- type: 'object',
124
- properties: {
125
- query: { type: 'string', description: 'Search query' },
126
- url: { type: 'string', description: 'Domain URL to map/discover all pages' },
127
- depth: { type: 'string', enum: ['quick', 'deep'], description: "Search depth: 'quick' = single search, 'deep' = multi-source research", default: 'quick' },
128
- limit: { type: 'number', description: 'Max results to return (default: 5)', default: 5 },
129
- },
130
- },
131
- },
132
- {
133
- name: 'webpeel_extract',
134
- description: "Extract structured data from any URL. Pass fields=['price','title'] for specific data, or omit for auto-detection. Returns typed JSON.",
135
- annotations: { title: 'Extract Structured Data', readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true },
136
- inputSchema: {
137
- type: 'object',
138
- properties: {
139
- url: { type: 'string', description: 'URL to extract from' },
140
- schema: { type: 'object', description: 'JSON schema describing desired output structure' },
141
- fields: { type: 'array', items: { type: 'string' }, description: "Specific fields to extract, e.g. ['price', 'title', 'description']" },
142
- format: { type: 'string', enum: ['json', 'markdown'], description: 'Output format (default: json)', default: 'json' },
143
- },
144
- required: ['url'],
145
- },
146
- },
147
- {
148
- name: 'webpeel_monitor',
149
- description: 'Watch a URL for changes. Returns diff on subsequent calls. Add webhook= for persistent monitoring with notifications.',
150
- annotations: { title: 'Monitor URL for Changes', readOnlyHint: false, destructiveHint: false, idempotentHint: false, openWorldHint: true },
151
- inputSchema: {
152
- type: 'object',
153
- properties: {
154
- url: { type: 'string', description: 'URL to monitor' },
155
- webhook: { type: 'string', description: 'Webhook URL to notify when content changes' },
156
- interval: { type: 'string', description: "Check interval, e.g. '1h', '30m', '1d'", default: '1h' },
157
- selector: { type: 'string', description: 'CSS selector to monitor a specific part of the page' },
158
- },
159
- required: ['url'],
160
- },
161
- },
162
- {
163
- name: 'webpeel_act',
164
- description: 'Interact with a web page. Click buttons, fill forms, navigate. Returns screenshot + extracted content after actions complete.',
165
- annotations: { title: 'Act on Web Page', readOnlyHint: false, destructiveHint: false, idempotentHint: false, openWorldHint: true },
166
- inputSchema: {
167
- type: 'object',
168
- properties: {
169
- url: { type: 'string', description: 'URL to interact with' },
170
- actions: {
171
- type: 'array',
172
- description: 'Actions to perform, e.g. [{type:"click",selector:".btn"}, {type:"type",selector:"#q",value:"hello"}]',
173
- items: {
174
- type: 'object',
175
- properties: {
176
- type: { type: 'string', enum: ['click', 'type', 'fill', 'scroll', 'wait', 'press', 'hover', 'select'] },
177
- selector: { type: 'string' },
178
- value: { type: 'string' },
179
- key: { type: 'string' },
180
- milliseconds: { type: 'number' },
181
- },
182
- required: ['type'],
183
- },
184
- },
185
- extract_after: { type: 'boolean', description: 'Extract content after actions complete', default: true },
186
- screenshot_after: { type: 'boolean', description: 'Take screenshot after actions complete', default: false },
187
- },
188
- required: ['url', 'actions'],
189
- },
190
- },
191
- ];
192
- }
193
- // ---------------------------------------------------------------------------
194
- // Tool handler
195
- // ---------------------------------------------------------------------------
196
- function safeStringify(obj) {
197
- try {
198
- return JSON.stringify(obj, null, 2);
199
- }
200
- catch {
201
- return JSON.stringify({ error: 'serialization_error', message: 'Failed to serialize result' });
202
- }
203
- }
204
- async function handleToolCall(name, args, pool, req) {
205
- try {
206
- // webpeel_fetch
207
- if (name === 'webpeel_fetch') {
208
- const url = args.url;
209
- if (!url || typeof url !== 'string')
210
- throw new Error('Invalid URL');
211
- if (url.length > 2048)
212
- throw new Error('URL too long');
213
- // Normalize actions (handles Firecrawl-style aliases)
214
- const parsedActions = args.actions ? normalizeActions(args.actions) : undefined;
215
- const hasActions = parsedActions && parsedActions.length > 0;
216
- const options = {
217
- render: args.render || hasActions || false,
218
- stealth: args.stealth || false,
219
- wait: args.wait || 0,
220
- format: args.format || 'markdown',
221
- selector: args.selector,
222
- maxTokens: args.maxTokens,
223
- images: args.images,
224
- readable: args.readable || false,
225
- lite: args.lite || false,
226
- budget: args.budget,
227
- question: args.question,
228
- screenshot: args.screenshot || false,
229
- actions: parsedActions,
230
- };
231
- // Auto-budget: default to 4000 tokens for MCP when no budget specified
232
- // Lite mode disables auto-budget
233
- if (options.budget === undefined && !options.lite) {
234
- options.budget = 4000;
235
- }
236
- // Cache key and bypass logic
237
- const mcpNoCache = args.noCache === true;
238
- const mcpCacheTtlMs = typeof args.cacheTtl === 'number' ? args.cacheTtl * 1000 : 5 * 60 * 1000;
239
- const mcpActionsKey = parsedActions ? JSON.stringify(parsedActions) : '';
240
- const mcpCacheKey = `mcp:fetch:${url}:${options.render}:${options.wait}:${options.format}:${options.selector}:${options.images}:${mcpActionsKey}:${options.budget}`;
241
- // Check cache (skip for noCache or inline extraction requests)
242
- const hasInlineExtract = args.inlineExtract && (args.inlineExtract.schema || args.inlineExtract.prompt);
243
- if (!mcpNoCache && !hasInlineExtract) {
244
- const cached = mcpFetchCache.get(mcpCacheKey);
245
- if (cached) {
246
- const cacheAge = Date.now() - cached.timestamp;
247
- if (cacheAge < mcpCacheTtlMs) {
248
- const r = cached.result;
249
- const cachedOutput = {
250
- url: r.url || url,
251
- title: r.title || r.metadata?.title || '',
252
- tokens: r.tokens || 0,
253
- content: r.content,
254
- _cache: 'HIT',
255
- _cacheAge: Math.floor(cacheAge / 1000),
256
- };
257
- if (r.metadata && Object.keys(r.metadata).length > 0)
258
- cachedOutput.metadata = r.metadata;
259
- if (r.domainData)
260
- cachedOutput.domainData = r.domainData;
261
- if (r.readability)
262
- cachedOutput.readability = { readingTime: r.readability.readingTime, wordCount: r.readability.wordCount };
263
- if (r.quickAnswer)
264
- cachedOutput.quickAnswer = r.quickAnswer;
265
- if (r.json)
266
- cachedOutput.json = r.json;
267
- if (r.extracted)
268
- cachedOutput.extracted = r.extracted;
269
- if (r.images && r.images.length > 0)
270
- cachedOutput.images = r.images;
271
- if (r.screenshot)
272
- cachedOutput.screenshot = r.screenshot;
273
- if (r.fingerprint)
274
- cachedOutput.fingerprint = r.fingerprint;
275
- if (r.linkCount !== undefined)
276
- cachedOutput.linkCount = r.linkCount;
277
- if (r.quality !== undefined)
278
- cachedOutput.quality = r.quality;
279
- if (r.timing)
280
- cachedOutput.timing = r.timing;
281
- if (r.method)
282
- cachedOutput.method = r.method;
283
- if (r.freshness)
284
- cachedOutput.freshness = r.freshness;
285
- if (r.prunedPercent !== undefined)
286
- cachedOutput.prunedPercent = r.prunedPercent;
287
- return ok(safeStringify(cachedOutput));
288
- }
289
- }
290
- }
291
- const result = await Promise.race([
292
- peel(url, options),
293
- timeout(60000, 'Fetch timed out'),
294
- ]);
295
- // Inline LLM extraction (post-fetch, BYOK)
296
- const inlineExtract = args.inlineExtract;
297
- const llmProvider = args.llmProvider;
298
- const llmApiKey = args.llmApiKey;
299
- const llmModel = args.llmModel;
300
- if (inlineExtract && (inlineExtract.schema || inlineExtract.prompt) && llmApiKey && llmProvider) {
301
- const validProviders = ['openai', 'anthropic', 'google'];
302
- if (validProviders.includes(llmProvider)) {
303
- const extractResult = await extractInlineJson(result.content, {
304
- schema: inlineExtract.schema,
305
- prompt: inlineExtract.prompt,
306
- llmProvider: llmProvider,
307
- llmApiKey,
308
- llmModel,
309
- });
310
- result.json = extractResult.data;
311
- result.extractTokensUsed = extractResult.tokensUsed;
312
- }
313
- }
314
- // Store in cache (skip for inline extraction results — they depend on user's LLM keys)
315
- if (!mcpNoCache && !hasInlineExtract) {
316
- mcpFetchCache.set(mcpCacheKey, { result, timestamp: Date.now() }, { ttl: mcpCacheTtlMs });
317
- }
318
- // Build consistent output — always include url, title, tokens
319
- const output = {
320
- url: result.url || url,
321
- title: result.title || result.metadata?.title || '',
322
- tokens: result.tokens || 0,
323
- content: result.content,
324
- };
325
- if (result.metadata && Object.keys(result.metadata).length > 0)
326
- output.metadata = result.metadata;
327
- if (result.domainData)
328
- output.domainData = result.domainData;
329
- if (result.readability)
330
- output.readability = {
331
- readingTime: result.readability.readingTime,
332
- wordCount: result.readability.wordCount,
333
- };
334
- if (result.quickAnswer)
335
- output.quickAnswer = result.quickAnswer;
336
- if (result.json)
337
- output.json = result.json;
338
- if (result.extracted)
339
- output.extracted = result.extracted;
340
- if (result.images && result.images.length > 0)
341
- output.images = result.images;
342
- if (result.screenshot)
343
- output.screenshot = result.screenshot;
344
- if (result.fingerprint)
345
- output.fingerprint = result.fingerprint;
346
- if (result.extractTokensUsed)
347
- output.extractTokensUsed = result.extractTokensUsed;
348
- if (result._cache)
349
- output._cache = result._cache;
350
- if (result._cacheAge !== undefined)
351
- output._cacheAge = result._cacheAge;
352
- if (result.linkCount !== undefined)
353
- output.linkCount = result.linkCount;
354
- if (result.quality !== undefined)
355
- output.quality = result.quality;
356
- if (result.timing)
357
- output.timing = result.timing;
358
- if (result.method)
359
- output.method = result.method;
360
- if (result.freshness)
361
- output.freshness = result.freshness;
362
- if (result.prunedPercent !== undefined)
363
- output.prunedPercent = result.prunedPercent;
364
- return ok(safeStringify(output));
365
- }
366
- // webpeel_search
367
- if (name === 'webpeel_search') {
368
- const query = args.query;
369
- if (!query || typeof query !== 'string')
370
- throw new Error('Invalid query');
371
- const { getBestSearchProvider } = await import('../../core/search-provider.js');
372
- const { provider, apiKey } = getBestSearchProvider();
373
- const count = Math.min(Math.max(args.count || 5, 1), 10);
374
- const rawResults = await Promise.race([
375
- provider.searchWeb(query, { count, apiKey }),
376
- timeout(30000, 'Search timed out'),
377
- ]);
378
- // Normalize to consistent format
379
- const resultsList = Array.isArray(rawResults) ? rawResults : (rawResults?.results ?? []);
380
- const normalizedResults = resultsList.map((r) => ({
381
- title: r.title || '',
382
- url: r.url || r.link || '',
383
- snippet: r.snippet || r.description || r.body || '',
384
- ...(r.favicon ? { favicon: r.favicon } : {}),
385
- }));
386
- return ok(safeStringify({ query, count: normalizedResults.length, results: normalizedResults }));
387
- }
388
- // webpeel_crawl
389
- if (name === 'webpeel_crawl') {
390
- const url = args.url;
391
- if (!url || typeof url !== 'string')
392
- throw new Error('Invalid URL');
393
- const { crawl } = await import('../../core/crawler.js');
394
- const results = await Promise.race([
395
- crawl(url, {
396
- maxPages: args.maxPages,
397
- maxDepth: args.maxDepth,
398
- render: args.render || false,
399
- }),
400
- timeout(600000, 'Crawl timed out'),
401
- ]);
402
- return ok(safeStringify(results));
403
- }
404
- // webpeel_map
405
- if (name === 'webpeel_map') {
406
- const url = args.url;
407
- if (!url || typeof url !== 'string')
408
- throw new Error('Invalid URL');
409
- const { mapDomain } = await import('../../core/map.js');
410
- const results = await Promise.race([
411
- mapDomain(url, { maxUrls: args.maxUrls }),
412
- timeout(600000, 'Map timed out'),
413
- ]);
414
- return ok(safeStringify(results));
415
- }
416
- // webpeel_extract
417
- if (name === 'webpeel_extract') {
418
- const url = args.url;
419
- if (!url || typeof url !== 'string')
420
- throw new Error('Invalid URL');
421
- const options = {
422
- render: args.render || false,
423
- extract: {
424
- selectors: args.selectors,
425
- prompt: args.prompt,
426
- llmApiKey: args.llmApiKey,
427
- },
428
- };
429
- const result = await Promise.race([
430
- peel(url, options),
431
- timeout(60000, 'Extract timed out'),
432
- ]);
433
- return ok(safeStringify(result));
434
- }
435
- // webpeel_batch
436
- if (name === 'webpeel_batch') {
437
- const urls = args.urls;
438
- if (!urls || !Array.isArray(urls) || urls.length === 0)
439
- throw new Error('Invalid urls');
440
- if (urls.length > 50)
441
- throw new Error('Too many URLs (max 50)');
442
- const options = {
443
- concurrency: args.concurrency || 3,
444
- format: args.format || 'markdown',
445
- };
446
- const results = await Promise.race([
447
- peelBatch(urls, options),
448
- timeout(300000, 'Batch timed out'),
449
- ]);
450
- return ok(safeStringify(results));
451
- }
452
- // webpeel_research (and legacy alias webpeel_agent)
453
- if (name === 'webpeel_research') {
454
- const query = args.query;
455
- if (!query || typeof query !== 'string')
456
- throw new Error('Invalid query');
457
- const { research } = await import('../../core/research.js');
458
- const result = await Promise.race([
459
- research({
460
- query,
461
- maxSources: args.maxSources ?? 5,
462
- maxDepth: args.maxDepth ?? 1,
463
- apiKey: args.llmApiKey,
464
- model: args.llmModel,
465
- baseUrl: args.llmBaseUrl,
466
- outputFormat: args.outputFormat ?? 'report',
467
- timeout: args.timeout ?? 60000,
468
- }),
469
- timeout(180000, 'Research timed out'),
470
- ]);
471
- return ok(safeStringify(result));
472
- }
473
- if (name === 'webpeel_agent') {
474
- const prompt = args.prompt;
475
- const llmApiKey = args.llmApiKey;
476
- if (!prompt || typeof prompt !== 'string')
477
- throw new Error('Invalid prompt');
478
- if (!llmApiKey || typeof llmApiKey !== 'string')
479
- throw new Error('Invalid llmApiKey');
480
- const result = await Promise.race([
481
- runAgent({
482
- prompt,
483
- llmApiKey,
484
- llmModel: args.llmModel,
485
- depth: args.depth || 'basic',
486
- topic: args.topic || 'general',
487
- maxSources: args.maxSources,
488
- outputSchema: args.outputSchema,
489
- }),
490
- timeout(180000, 'Agent timed out'),
491
- ]);
492
- return ok(safeStringify(result));
493
- }
494
- // agent — LLM-free data agent: search + fetch + BM25 extraction
495
- if (name === 'agent') {
496
- const llmApiKey = args.llmApiKey;
497
- // LLM mode: delegate to existing runAgent
498
- if (llmApiKey) {
499
- const prompt = args.prompt;
500
- if (!prompt || typeof prompt !== 'string')
501
- throw new Error('Missing prompt for LLM agent mode');
502
- const result = await Promise.race([
503
- runAgent({
504
- prompt,
505
- llmApiKey,
506
- urls: args.urls,
507
- llmModel: args.llmModel,
508
- maxSources: args.maxResults || args.maxSources || undefined,
509
- }),
510
- timeout(180000, 'Agent timed out'),
511
- ]);
512
- return ok(safeStringify(result));
513
- }
514
- // LLM-free mode: search + fetch + BM25 quickAnswer
515
- const urls = args.urls || [];
516
- const search = args.search;
517
- if ((!urls || urls.length === 0) && !search) {
518
- throw new Error('Provide at least "urls" or "search". For LLM-powered research, also pass "llmApiKey".');
519
- }
520
- const prompt = args.prompt;
521
- const schema = args.schema;
522
- const budget = args.budget || 4000;
523
- const maxResults = Math.min(args.maxResults || 5, 20);
524
- const targetUrls = [...urls];
525
- if (search) {
526
- try {
527
- const { provider, apiKey } = getBestSearchProvider();
528
- const searchResults = await provider.searchWeb(search, { count: Math.max(maxResults, 5), apiKey });
529
- for (const r of searchResults) {
530
- if (!targetUrls.includes(r.url))
531
- targetUrls.push(r.url);
532
- }
533
- }
534
- catch { /* continue with provided URLs */ }
535
- }
536
- const urlsToFetch = targetUrls.slice(0, maxResults);
537
- const agentResults = [];
538
- await Promise.all(urlsToFetch.map(async (url) => {
539
- try {
540
- const page = await peel(url, { budget, format: 'markdown' });
541
- const content = page.content || '';
542
- const title = page.title || url;
543
- let extracted = null;
544
- let confidence = 0;
545
- if (schema && Object.keys(schema).length > 0) {
546
- extracted = {};
547
- let total = 0;
548
- for (const [field] of Object.entries(schema)) {
549
- const question = prompt ? `${prompt} — specifically: what is the ${field}?` : `What is the ${field}?`;
550
- const qa = quickAnswer({ question, content, maxPassages: 1, url });
551
- extracted[field] = qa.answer || '';
552
- total += qa.confidence;
553
- }
554
- if ('source' in schema)
555
- extracted['source'] = url;
556
- confidence = Object.keys(schema).length > 0 ? total / Object.keys(schema).length : 0;
557
- }
558
- else if (prompt) {
559
- const qa = quickAnswer({ question: prompt, content, maxPassages: 3, url });
560
- confidence = qa.confidence;
561
- }
562
- agentResults.push({ url, title, extracted, content: content.slice(0, 500) + (content.length > 500 ? '…' : ''), confidence });
563
- }
564
- catch { /* skip */ }
565
- }));
566
- return ok(safeStringify({
567
- success: true,
568
- data: { results: agentResults, totalSources: agentResults.length },
569
- }));
570
- }
571
- // webpeel_screenshot
572
- if (name === 'webpeel_screenshot') {
573
- const url = args.url;
574
- if (!url || typeof url !== 'string')
575
- throw new Error('Invalid URL');
576
- if (url.length > 2048)
577
- throw new Error('URL too long');
578
- const width = args.width;
579
- const height = args.height;
580
- const format = args.format || 'png';
581
- const quality = args.quality;
582
- const waitFor = args.waitFor || 0;
583
- const stealth = args.stealth || false;
584
- const fullPage = args.fullPage || false;
585
- if (width !== undefined && (width < 100 || width > 5000))
586
- throw new Error('Invalid width: must be 100–5000');
587
- if (height !== undefined && (height < 100 || height > 5000))
588
- throw new Error('Invalid height: must be 100–5000');
589
- if (!['png', 'jpeg'].includes(format))
590
- throw new Error('Invalid format');
591
- if (quality !== undefined && (quality < 1 || quality > 100))
592
- throw new Error('Invalid quality: must be 1–100');
593
- if (waitFor < 0 || waitFor > 60000)
594
- throw new Error('Invalid waitFor: must be 0–60000');
595
- const { takeScreenshot } = await import('../../core/screenshot.js');
596
- const result = await Promise.race([
597
- takeScreenshot(url, {
598
- fullPage,
599
- width,
600
- height,
601
- format,
602
- quality,
603
- waitFor,
604
- stealth,
605
- actions: args.actions,
606
- }),
607
- timeout(60000, 'Screenshot timed out'),
608
- ]);
609
- return ok(safeStringify({
610
- url: result.url,
611
- format: result.format,
612
- contentType: result.contentType,
613
- screenshot: result.screenshot,
614
- }));
615
- }
616
- // webpeel_design_analysis
617
- if (name === 'webpeel_design_analysis') {
618
- const url = args.url;
619
- if (!url || typeof url !== 'string')
620
- throw new Error('Invalid URL');
621
- if (url.length > 2048)
622
- throw new Error('URL too long');
623
- const { takeDesignAnalysis } = await import('../../core/screenshot.js');
624
- const result = await Promise.race([
625
- takeDesignAnalysis(url, {}),
626
- timeout(90000, 'Design analysis timed out'),
627
- ]);
628
- return ok(safeStringify({
629
- url: result.url,
630
- analysis: result.analysis,
631
- }));
632
- }
633
- // webpeel_design_compare
634
- if (name === 'webpeel_design_compare') {
635
- const url1 = args.url1;
636
- const url2 = args.url2;
637
- if (!url1 || typeof url1 !== 'string')
638
- throw new Error('Invalid url1');
639
- if (!url2 || typeof url2 !== 'string')
640
- throw new Error('Invalid url2');
641
- if (url1.length > 2048)
642
- throw new Error('url1 too long');
643
- if (url2.length > 2048)
644
- throw new Error('url2 too long');
645
- if (url1 === url2)
646
- throw new Error('url1 and url2 must be different URLs');
647
- const { takeDesignComparison } = await import('../../core/screenshot.js');
648
- const result = await Promise.race([
649
- takeDesignComparison(url1, url2, {}),
650
- timeout(120000, 'Design comparison timed out'),
651
- ]);
652
- return ok(safeStringify({
653
- subjectUrl: result.subjectUrl,
654
- referenceUrl: result.referenceUrl,
655
- score: result.comparison.score,
656
- summary: result.comparison.summary,
657
- gaps: result.comparison.gaps,
658
- subjectAnalysis: result.comparison.subjectAnalysis,
659
- referenceAnalysis: result.comparison.referenceAnalysis,
660
- }));
661
- }
662
- // webpeel_summarize
663
- if (name === 'webpeel_summarize') {
664
- const url = args.url;
665
- const llmApiKey = args.llmApiKey;
666
- if (!url || typeof url !== 'string')
667
- throw new Error('Invalid URL');
668
- if (url.length > 2048)
669
- throw new Error('URL too long');
670
- if (!llmApiKey || typeof llmApiKey !== 'string')
671
- throw new Error('Invalid llmApiKey');
672
- const options = {
673
- render: args.render || false,
674
- extract: {
675
- prompt: args.prompt || 'Summarize this webpage in 2-3 sentences.',
676
- llmApiKey,
677
- llmModel: args.llmModel || 'gpt-4o-mini',
678
- llmBaseUrl: args.llmBaseUrl || 'https://api.openai.com/v1',
679
- },
680
- };
681
- const result = await Promise.race([
682
- peel(url, options),
683
- timeout(60000, 'Summarize timed out'),
684
- ]);
685
- return ok(safeStringify({
686
- url: result.url,
687
- title: result.title,
688
- summary: result.extracted,
689
- }));
690
- }
691
- // webpeel_answer
692
- if (name === 'webpeel_answer') {
693
- const question = args.question;
694
- const llmProvider = args.llmProvider;
695
- const llmApiKey = args.llmApiKey;
696
- if (!question || typeof question !== 'string')
697
- throw new Error('Invalid question');
698
- if (question.length > 2000)
699
- throw new Error('Question too long (max 2000 characters)');
700
- const validLlmProviders = ['openai', 'anthropic', 'google'];
701
- if (!llmProvider || !validLlmProviders.includes(llmProvider)) {
702
- throw new Error('Invalid llmProvider: must be openai, anthropic, or google');
703
- }
704
- if (!llmApiKey || typeof llmApiKey !== 'string')
705
- throw new Error('Invalid llmApiKey');
706
- const _validSearchProviders = ['duckduckgo', 'brave', 'stealth', 'google'];
707
- const spId = _validSearchProviders.includes(args.searchProvider)
708
- ? args.searchProvider
709
- : 'duckduckgo';
710
- const maxSources = typeof args.maxSources === 'number' ? Math.min(Math.max(args.maxSources, 1), 10) : 5;
711
- const result = await Promise.race([
712
- answerQuestion({
713
- question,
714
- searchProvider: spId,
715
- searchApiKey: args.searchApiKey,
716
- llmProvider: llmProvider,
717
- llmApiKey,
718
- llmModel: args.llmModel,
719
- maxSources,
720
- stream: false,
721
- }),
722
- timeout(180000, 'Answer timed out'),
723
- ]);
724
- return ok(safeStringify(result));
725
- }
726
- // webpeel_brand
727
- if (name === 'webpeel_brand') {
728
- const url = args.url;
729
- if (!url || typeof url !== 'string')
730
- throw new Error('Invalid URL');
731
- if (url.length > 2048)
732
- throw new Error('URL too long');
733
- const options = {
734
- render: args.render || false,
735
- extract: {
736
- selectors: {
737
- primaryColor: 'meta[name="theme-color"]',
738
- title: 'title',
739
- logo: 'img[class*="logo"], img[alt*="logo"]',
740
- },
741
- },
742
- };
743
- const result = await Promise.race([
744
- peel(url, options),
745
- timeout(60000, 'Brand extraction timed out'),
746
- ]);
747
- return ok(safeStringify({
748
- url: result.url,
749
- title: result.title,
750
- extracted: result.extracted,
751
- metadata: result.metadata,
752
- colors: extractColorsFromContent(result.content || ''),
753
- fonts: extractFontsFromContent(result.content || ''),
754
- }));
755
- }
756
- // webpeel_change_track
757
- if (name === 'webpeel_change_track') {
758
- const url = args.url;
759
- if (!url || typeof url !== 'string')
760
- throw new Error('Invalid URL');
761
- if (url.length > 2048)
762
- throw new Error('URL too long');
763
- const options = {
764
- render: args.render || false,
765
- };
766
- const result = await Promise.race([
767
- peel(url, options),
768
- timeout(60000, 'Change tracking timed out'),
769
- ]);
770
- return ok(safeStringify({
771
- url: result.url,
772
- title: result.title,
773
- fingerprint: result.fingerprint,
774
- tokens: result.tokens,
775
- contentType: result.contentType,
776
- lastChecked: new Date().toISOString(),
777
- }));
778
- }
779
- // webpeel_deep_fetch
780
- if (name === 'webpeel_deep_fetch') {
781
- const query = args.query;
782
- if (!query || typeof query !== 'string')
783
- throw new Error('Invalid query');
784
- const count = Math.min(Math.max(args.count || 5, 1), 10);
785
- const rawFormat = args.format || 'markdown';
786
- const isComparison = rawFormat === 'comparison';
787
- const format = (isComparison ? 'markdown' : rawFormat);
788
- // Step 1: Search for the query using best available provider
789
- const { getBestSearchProvider } = await import('../../core/search-provider.js');
790
- const { provider, apiKey } = getBestSearchProvider();
791
- const searchResults = await Promise.race([
792
- provider.searchWeb(query, { count, apiKey }),
793
- timeout(30000, 'Search timed out'),
794
- ]);
795
- const results = searchResults?.results ?? searchResults ?? [];
796
- const topResults = Array.isArray(results) ? results.slice(0, count) : [];
797
- if (topResults.length === 0) {
798
- return ok(safeStringify({ query, sources: [], content: '', totalTokens: 0 }));
799
- }
800
- // Step 2: Fetch all URLs in parallel
801
- const urls = topResults.map((r) => r.url).filter(Boolean);
802
- const pages = await Promise.race([
803
- peelBatch(urls, { concurrency: 5, format }),
804
- timeout(120000, 'Batch fetch timed out'),
805
- ]);
806
- // Step 3: Merge content with source attribution
807
- const sources = [];
808
- const contentParts = [];
809
- let totalTokens = 0;
810
- for (let i = 0; i < pages.length; i++) {
811
- const page = pages[i];
812
- const searchResult = topResults[i];
813
- const pageUrl = urls[i];
814
- const title = page?.title || searchResult?.title || pageUrl;
815
- // Position-based relevance score (top result = 1.0, decreasing)
816
- const relevanceScore = Math.round((1 - i / Math.max(pages.length, 1)) * 100) / 100;
817
- sources.push({ url: pageUrl, title, relevanceScore, ...(searchResult?.snippet ? { snippet: searchResult.snippet } : {}) });
818
- if (page?.content) {
819
- contentParts.push(`## Source ${i + 1}: ${title}\n**URL:** ${pageUrl}\n\n${page.content}\n\n---\n`);
820
- totalTokens += page.tokens || 0;
821
- }
822
- else if (page?.error) {
823
- contentParts.push(`## Source ${i + 1}: ${title}\n**URL:** ${pageUrl}\n\n*(Failed to fetch: ${page.error})*\n\n---\n`);
824
- }
825
- }
826
- const mergedContent = contentParts.join('\n');
827
- const deepFetchOutput = {
828
- query,
829
- sources,
830
- content: mergedContent,
831
- totalTokens,
832
- };
833
- // For comparison format, add a structured comparison hint
834
- if (isComparison) {
835
- deepFetchOutput.format = 'comparison';
836
- deepFetchOutput.comparisonNote = 'Sources fetched and ranked by relevance. Review sources array and content sections for side-by-side comparison.';
837
- }
838
- return ok(safeStringify(deepFetchOutput));
839
- }
840
- // webpeel_quick_answer
841
- if (name === 'webpeel_quick_answer') {
842
- const url = args.url;
843
- const question = args.question;
844
- if (!url || typeof url !== 'string')
845
- throw new Error('Invalid URL');
846
- if (url.length > 2048)
847
- throw new Error('URL too long');
848
- if (!question || typeof question !== 'string')
849
- throw new Error('Invalid question');
850
- if (question.length > 1000)
851
- throw new Error('Question too long (max 1000 characters)');
852
- const maxPassages = typeof args.maxPassages === 'number' ? Math.min(Math.max(args.maxPassages, 1), 10) : 3;
853
- const peelResult = await Promise.race([
854
- peel(url, {
855
- render: args.render || false,
856
- format: 'markdown',
857
- budget: 8000,
858
- }),
859
- timeout(60000, 'Quick answer fetch timed out'),
860
- ]);
861
- const { quickAnswer } = await import('../../core/quick-answer.js');
862
- const qa = quickAnswer({
863
- question,
864
- content: peelResult.content || '',
865
- url: peelResult.url || url,
866
- maxPassages,
867
- });
868
- return ok(safeStringify({
869
- url: peelResult.url || url,
870
- title: peelResult.title,
871
- question: qa.question,
872
- answer: qa.answer,
873
- confidence: qa.confidence,
874
- passages: qa.passages,
875
- method: qa.method,
876
- }));
877
- }
878
- // webpeel_youtube
879
- if (name === 'webpeel_youtube') {
880
- const url = args.url;
881
- if (!url || typeof url !== 'string')
882
- throw new Error('Invalid URL');
883
- const { getYouTubeTranscript } = await import('../../core/youtube.js');
884
- const transcript = await Promise.race([
885
- getYouTubeTranscript(url, {
886
- language: args.language ?? 'en',
887
- }),
888
- timeout(60000, 'YouTube transcript extraction timed out'),
889
- ]);
890
- return ok(safeStringify(transcript));
891
- }
892
- // webpeel_auto_extract
893
- if (name === 'webpeel_auto_extract') {
894
- const url = args.url;
895
- if (!url)
896
- return { content: [{ type: 'text', text: JSON.stringify({ error: 'Missing url parameter' }) }] };
897
- const { autoExtract } = await import('../../core/auto-extract.js');
898
- const result = await peel(url, { format: 'html' });
899
- const extracted = autoExtract(result.content || '', url);
900
- return {
901
- content: [{ type: 'text', text: JSON.stringify({
902
- url,
903
- pageType: extracted.type,
904
- structured: extracted,
905
- }, null, 2) }],
906
- };
907
- }
908
- // webpeel_watch
909
- if (name === 'webpeel_watch') {
910
- const action = args.action;
911
- if (!pool) {
912
- return { content: [{ type: 'text', text: JSON.stringify({ error: 'Watch feature requires database connection. Use the REST API at /v1/watch instead.' }) }] };
913
- }
914
- const { WatchManager } = await import('../../core/watch-manager.js');
915
- const wm = new WatchManager(pool);
916
- const accountId = req?.auth?.keyInfo?.accountId || req?.auth?.keyInfo?.userId || 'anonymous';
917
- if (action === 'create') {
918
- const watch = await wm.create(accountId, args.url, {
919
- webhookUrl: args.webhookUrl,
920
- checkIntervalMinutes: args.intervalMinutes || 60,
921
- selector: args.selector,
922
- });
923
- return { content: [{ type: 'text', text: JSON.stringify(watch, null, 2) }] };
924
- }
925
- if (action === 'list') {
926
- const watches = await wm.list(accountId);
927
- return { content: [{ type: 'text', text: JSON.stringify(watches, null, 2) }] };
928
- }
929
- if (action === 'check') {
930
- const result = await wm.check(args.id);
931
- return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
932
- }
933
- if (action === 'delete') {
934
- await wm.delete(args.id);
935
- return { content: [{ type: 'text', text: JSON.stringify({ success: true }) }] };
936
- }
937
- return { content: [{ type: 'text', text: JSON.stringify({ error: `Unknown watch action: ${action}` }) }] };
938
- }
939
- // webpeel_hotels
940
- if (name === 'webpeel_hotels') {
941
- const { searchHotels, parseDate, addDays } = await import('../../core/hotel-search.js');
942
- const destination = args.destination;
943
- if (!destination)
944
- return { content: [{ type: 'text', text: safeStringify({ error: 'Missing destination' }) }] };
945
- const checkin = args.checkin ? parseDate(args.checkin) : parseDate('tomorrow');
946
- const checkout = args.checkout ? parseDate(args.checkout) : addDays(checkin, 1);
947
- const sort = (['price', 'rating', 'value'].includes(args.sort) ? args.sort : 'price');
948
- const limit = Math.max(1, Math.min(50, args.limit || 20));
949
- const result = await searchHotels({ destination, checkin, checkout, sort, limit, stealth: true });
950
- return { content: [{ type: 'text', text: safeStringify({ destination, checkin, checkout, sources: result.sources, count: result.results.length, results: result.results.slice(0, limit) }) }] };
951
- }
952
- // webpeel_act — page interaction (click, fill, scroll, screenshot)
953
- if (name === 'webpeel_act') {
954
- const url = args.url;
955
- const actions = args.actions || [];
956
- const extract = args.extract !== false;
957
- const screenshot = Boolean(args.screenshot);
958
- if (!url)
959
- return { content: [{ type: 'text', text: safeStringify({ error: 'url is required' }) }] };
960
- if (!actions.length)
961
- return { content: [{ type: 'text', text: safeStringify({ error: 'actions array is required' }) }] };
962
- const { peel } = await import('../../index.js');
963
- const { normalizeActions } = await import('../../core/actions.js');
964
- const normalized = normalizeActions(actions) || [];
965
- const result = await peel(url, {
966
- render: true,
967
- actions: normalized,
968
- screenshot,
969
- format: 'markdown',
970
- budget: 4000,
971
- timeout: 25000,
972
- });
973
- return {
974
- content: [{
975
- type: 'text',
976
- text: safeStringify({
977
- url: result.url,
978
- title: result.title,
979
- content: extract ? result.content : undefined,
980
- screenshot: result.screenshot,
981
- method: result.method,
982
- elapsed: result.elapsed,
983
- }),
984
- }],
985
- };
986
- }
987
- // ── Consolidated tools (route to existing specific handlers) ──
988
- // These are the 7 new public tools that map to the 20+ legacy handlers.
989
- // webpeel_read → webpeel_fetch (with YouTube auto-detect)
990
- if (name === 'webpeel_read') {
991
- const url = args.url;
992
- if (!url)
993
- return { content: [{ type: 'text', text: safeStringify({ error: 'url is required' }) }] };
994
- // YouTube auto-detect
995
- const ytMatch = url.match(/(?:youtube\.com\/watch\?v=|youtu\.be\/)([\w-]+)/);
996
- if (ytMatch) {
997
- // Route to YouTube handler
998
- const { getYouTubeTranscript } = await import('../../core/youtube.js');
999
- const transcript = await getYouTubeTranscript(url, { language: args.language || 'en' });
1000
- return { content: [{ type: 'text', text: safeStringify(transcript) }] };
1001
- }
1002
- // Standard fetch
1003
- const { peel } = await import('../../index.js');
1004
- const result = await peel(url, {
1005
- render: Boolean(args.render),
1006
- format: (args.format || 'markdown'),
1007
- budget: args.budget || 4000,
1008
- readable: Boolean(args.readable),
1009
- summary: Boolean(args.summary),
1010
- timeout: 30000,
1011
- });
1012
- const response = {
1013
- url: result.url,
1014
- title: result.title,
1015
- content: result.content,
1016
- tokens: result.tokens,
1017
- method: result.method,
1018
- elapsed: result.elapsed,
1019
- };
1020
- if (args.question && result.content) {
1021
- const { quickAnswer } = await import('../../core/quick-answer.js');
1022
- const qa = quickAnswer({ content: result.content, question: args.question, url: result.url });
1023
- response.answer = qa.answer;
1024
- response.confidence = qa.confidence;
1025
- }
1026
- if (args.summary && result.content) {
1027
- response.summary = result.content.slice(0, 500);
1028
- }
1029
- return { content: [{ type: 'text', text: safeStringify(response) }] };
1030
- }
1031
- // webpeel_see → screenshot / design analysis / design compare
1032
- if (name === 'webpeel_see') {
1033
- const url = args.url;
1034
- if (!url)
1035
- return { content: [{ type: 'text', text: safeStringify({ error: 'url is required' }) }] };
1036
- const mode = args.mode || 'screenshot';
1037
- const compareUrl = args.compare_url;
1038
- // Resolve viewport
1039
- let width = 1280, height = 720;
1040
- if (args.viewport === 'mobile') {
1041
- width = 390;
1042
- height = 844;
1043
- }
1044
- else if (args.viewport === 'tablet') {
1045
- width = 768;
1046
- height = 1024;
1047
- }
1048
- else if (args.viewport && typeof args.viewport === 'object') {
1049
- const vp = args.viewport;
1050
- width = vp.width ?? 1280;
1051
- height = vp.height ?? 720;
1052
- }
1053
- if (mode === 'design') {
1054
- const { takeDesignAnalysis } = await import('../../core/screenshot.js');
1055
- const analysis = await takeDesignAnalysis(url, { width, height });
1056
- return { content: [{ type: 'text', text: safeStringify(analysis) }] };
1057
- }
1058
- if (mode === 'compare' && compareUrl) {
1059
- const { takeDesignComparison } = await import('../../core/screenshot.js');
1060
- const comparison = await takeDesignComparison(url, compareUrl, {});
1061
- return { content: [{ type: 'text', text: safeStringify(comparison) }] };
1062
- }
1063
- // Default: screenshot
1064
- const { peel } = await import('../../index.js');
1065
- const result = await peel(url, {
1066
- render: true,
1067
- screenshot: true,
1068
- fullPage: Boolean(args.full_page),
1069
- timeout: 30000,
1070
- });
1071
- return { content: [{ type: 'text', text: safeStringify({ url: result.url, title: result.title, screenshot: result.screenshot }) }] };
1072
- }
1073
- // webpeel_find → search (query) or map (url without query)
1074
- if (name === 'webpeel_find') {
1075
- const query = args.query;
1076
- const url = args.url;
1077
- const limit = Math.min(Math.max(args.limit ?? 5, 1), 20);
1078
- // URL-only: map domain
1079
- if (url && !query) {
1080
- const { mapDomain } = await import('../../core/map.js');
1081
- const results = await mapDomain(url, { maxUrls: limit * 100 });
1082
- return { content: [{ type: 'text', text: safeStringify(results) }] };
1083
- }
1084
- if (!query)
1085
- return { content: [{ type: 'text', text: safeStringify({ error: 'Either query or url is required' }) }] };
1086
- // Question detection → BM25 Q&A (like /v1/ask)
1087
- const isQuestion = /\?$/.test(query.trim()) ||
1088
- /^(what|how|when|where|why|who|which|can|does|is|are|do|did|will|would|could|should)\b/i.test(query.trim());
1089
- if (isQuestion) {
1090
- const { getBestSearchProvider: getBSP } = await import('../../core/search-provider.js');
1091
- const { provider, apiKey: sKey } = getBSP();
1092
- const searchResults = await provider.searchWeb(query, { count: Math.min(limit, 5), apiKey: sKey });
1093
- if (searchResults.length > 0) {
1094
- const { peel } = await import('../../index.js');
1095
- const topUrl = searchResults[0].url;
1096
- const result = await peel(topUrl, { budget: 4000, timeout: 15000 });
1097
- const { quickAnswer } = await import('../../core/quick-answer.js');
1098
- const answer = quickAnswer({ content: result.content || '', question: query, url: topUrl });
1099
- return { content: [{ type: 'text', text: safeStringify({ question: query, answer: answer.answer, confidence: answer.confidence, sources: searchResults.slice(0, 3).map((r) => ({ url: r.url, title: r.title })), method: 'bm25' }) }] };
1100
- }
1101
- }
1102
- // Regular search
1103
- const { getBestSearchProvider: getBSP2 } = await import('../../core/search-provider.js');
1104
- const { provider: sp, apiKey: sk } = getBSP2();
1105
- const results = await sp.searchWeb(query, { count: limit, apiKey: sk });
1106
- return { content: [{ type: 'text', text: safeStringify({ query, results: results.slice(0, limit) }) }] };
1107
- }
1108
- // webpeel_monitor → watch/change detection
1109
- if (name === 'webpeel_monitor') {
1110
- const url = args.url;
1111
- if (!url)
1112
- return { content: [{ type: 'text', text: safeStringify({ error: 'url is required' }) }] };
1113
- const webhook = args.webhook;
1114
- if (webhook) {
1115
- return { content: [{ type: 'text', text: safeStringify({ message: 'Persistent webhook monitoring requires the hosted API. Use webpeel_monitor without webhook= for one-time change detection.', url }) }] };
1116
- }
1117
- // One-time change snapshot
1118
- const { peel } = await import('../../index.js');
1119
- const result = await peel(url, {
1120
- render: Boolean(args.render),
1121
- ...(args.selector ? { selector: args.selector } : {}),
1122
- timeout: 30000,
1123
- });
1124
- return {
1125
- content: [{
1126
- type: 'text',
1127
- text: safeStringify({
1128
- url: result.url,
1129
- title: result.title,
1130
- content: result.content?.slice(0, 2000),
1131
- tokens: result.tokens,
1132
- snapshot_at: new Date().toISOString(),
1133
- tip: 'Call again later to compare content manually, or use webhook= for persistent monitoring.',
1134
- }),
1135
- }],
1136
- };
1137
- }
1138
- throw new Error(`Unknown tool: ${name}`);
1139
- }
1140
- catch (error) {
1141
- const err = error;
1142
- return {
1143
- content: [{ type: 'text', text: safeStringify({ error: err.name || 'Error', message: err.message || 'Unknown error' }) }],
1144
- isError: true,
1145
- };
1146
- }
1147
- }
1148
- function ok(text) {
1149
- return { content: [{ type: 'text', text }] };
1150
- }
1151
- function timeout(ms, msg) {
1152
- return new Promise((_, reject) => setTimeout(() => reject(new Error(msg)), ms));
1153
- }
1154
- // ---------------------------------------------------------------------------
1155
- // Create a fresh MCP server instance (stateless — one per request)
1156
- // ---------------------------------------------------------------------------
1157
- function createMcpServer(pool, req) {
1158
- const server = new Server({ name: 'webpeel', version: pkgVersion }, { capabilities: { tools: {} } });
1159
- const tools = getTools();
1160
- server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools }));
1161
- server.setRequestHandler(CallToolRequestSchema, async (request) => {
1162
- const { name, arguments: args } = request.params;
1163
- return handleToolCall(name, (args ?? {}), pool, req);
1164
- });
1165
- return server;
1166
- }
1167
- // ---------------------------------------------------------------------------
1168
- // Express router
1169
- // ---------------------------------------------------------------------------
1170
- // ---------------------------------------------------------------------------
1171
- // Shared MCP handler logic
1172
- // ---------------------------------------------------------------------------
1173
- async function handleMcpPost(req, res, pool) {
1174
- // Require authentication — reject unauthenticated requests.
1175
- // The /:apiKey/v2/mcp path validates the key before calling this handler.
1176
- // The /mcp and /v2/mcp paths rely on the global auth middleware (Bearer token).
1177
- const mcpAuthId = req.auth?.keyInfo?.accountId || req.user?.userId;
1178
- if (!mcpAuthId) {
1179
- res.status(401).json({
1180
- jsonrpc: '2.0',
1181
- error: { code: -32001, message: 'Authentication required. Pass API key via Authorization: Bearer <key> header or use /:apiKey/v2/mcp path.' },
1182
- id: null,
1183
- });
1184
- return;
1185
- }
1186
- try {
1187
- const server = createMcpServer(pool, req);
1188
- const transport = new StreamableHTTPServerTransport({
1189
- sessionIdGenerator: undefined, // stateless
1190
- });
1191
- // Connect server ↔ transport
1192
- await server.connect(transport);
1193
- // Delegate to transport — it reads the JSON-RPC body and writes the response.
1194
- // We pass req.body as the pre-parsed body (Express already parsed JSON).
1195
- await transport.handleRequest(req, res, req.body);
1196
- // Clean up (don't await — fire and forget)
1197
- transport.close().catch(() => { });
1198
- server.close().catch(() => { });
1199
- }
1200
- catch (error) {
1201
- console.error('MCP endpoint error:', error);
1202
- if (!res.headersSent) {
1203
- res.status(500).json({
1204
- jsonrpc: '2.0',
1205
- error: { code: -32603, message: 'Internal error' },
1206
- id: null,
1207
- });
1208
- }
1209
- }
1210
- }
1211
- function mcpMethodNotAllowed(_req, res) {
1212
- res.status(405).json({
1213
- jsonrpc: '2.0',
1214
- error: {
1215
- code: -32000,
1216
- message: 'Method not allowed. Use POST to send MCP JSON-RPC messages.',
1217
- },
1218
- id: null,
1219
- });
1220
- }
1221
- function mcpDeleteOk(_req, res) {
1222
- res.status(200).json({ ok: true });
1223
- }
1224
- // ---------------------------------------------------------------------------
1225
- // Express router
1226
- // ---------------------------------------------------------------------------
1227
- export function createMcpRouter(_authStore, pool) {
1228
- const router = Router();
1229
- const boundHandler = (req, res) => handleMcpPost(req, res, pool);
1230
- // POST /mcp — legacy path, MCP Streamable HTTP transport
1231
- router.post('/mcp', boundHandler);
1232
- router.get('/mcp', mcpMethodNotAllowed);
1233
- router.delete('/mcp', mcpDeleteOk);
1234
- // POST /v2/mcp — canonical v2 path; auth via Authorization: Bearer <key> header
1235
- // The global auth middleware already validates the Bearer token, so no extra
1236
- // validation is needed here.
1237
- router.post('/v2/mcp', boundHandler);
1238
- router.get('/v2/mcp', mcpMethodNotAllowed);
1239
- router.delete('/v2/mcp', mcpDeleteOk);
1240
- // SECURITY: /:apiKey/v2/mcp — BLOCKED. API keys in URLs are insecure because
1241
- // they get recorded in server logs, browser history, and proxy access logs.
1242
- // All methods return 400 with instructions to use the Authorization header.
1243
- const mcpInsecureAuthHandler = (req, res) => {
1244
- res.status(400).json({
1245
- success: false,
1246
- error: {
1247
- type: 'insecure_auth',
1248
- message: 'API keys in URLs are insecure.',
1249
- hint: 'Use the Authorization header instead: Authorization: Bearer wp_your_key',
1250
- docs: 'https://webpeel.dev/docs/api-reference#authentication',
1251
- },
1252
- requestId: req.requestId,
1253
- });
1254
- };
1255
- router.post('/:apiKey/v2/mcp', mcpInsecureAuthHandler);
1256
- router.get('/:apiKey/v2/mcp', mcpInsecureAuthHandler);
1257
- router.delete('/:apiKey/v2/mcp', mcpInsecureAuthHandler);
1258
- return router;
1259
- }
1260
- //# sourceMappingURL=mcp.js.map