@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,395 @@
1
+ /**
2
+ * WebPeel Observe — Give agents structured "eyes" on a web page.
3
+ *
4
+ * Returns a machine-readable map of interactive elements (links, buttons,
5
+ * inputs, forms, selects, media) so agents can decide what to do next
6
+ * without needing a vision model.
7
+ *
8
+ * This bridges the gap between:
9
+ * - `peel()` / `webpeel_read` → markdown content (strips interaction cues)
10
+ * - `webpeel_act` → requires knowing selectors already
11
+ *
12
+ * With `observe()`, the loop becomes:
13
+ * 1. observe(url) → see what's on the page
14
+ * 2. decide which element to interact with
15
+ * 3. act(url, actions) → do it
16
+ * 4. observe again → see the result
17
+ */
18
+ // ── Serializable extraction logic (runs inside page.evaluate) ─────────────
19
+ /**
20
+ * This function runs inside the browser context via page.evaluate().
21
+ * It must be fully self-contained — no closures over Node variables.
22
+ */
23
+ function extractInteractiveElements(args) {
24
+ const { scopeSelector, maxPerCategory } = args;
25
+ const root = scopeSelector
26
+ ? document.querySelector(scopeSelector) || document
27
+ : document;
28
+ const vpW = window.innerWidth;
29
+ const vpH = window.innerHeight;
30
+ function isVisible(el) {
31
+ const style = window.getComputedStyle(el);
32
+ if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0')
33
+ return false;
34
+ const rect = el.getBoundingClientRect();
35
+ return rect.width > 0 && rect.height > 0;
36
+ }
37
+ function isInViewport(el) {
38
+ const rect = el.getBoundingClientRect();
39
+ return rect.top < vpH && rect.bottom > 0 && rect.left < vpW && rect.right > 0;
40
+ }
41
+ function getBbox(el) {
42
+ const rect = el.getBoundingClientRect();
43
+ if (rect.width === 0 && rect.height === 0)
44
+ return null;
45
+ return {
46
+ x: Math.round(rect.x),
47
+ y: Math.round(rect.y),
48
+ width: Math.round(rect.width),
49
+ height: Math.round(rect.height),
50
+ };
51
+ }
52
+ function getText(el) {
53
+ // Prefer aria-label, then textContent, then value, then placeholder
54
+ const ariaLabel = el.getAttribute('aria-label');
55
+ if (ariaLabel)
56
+ return ariaLabel.trim().slice(0, 120);
57
+ const text = (el.textContent || '').trim().replace(/\s+/g, ' ');
58
+ if (text && text.length <= 120)
59
+ return text;
60
+ if (text)
61
+ return text.slice(0, 117) + '...';
62
+ if (el instanceof HTMLInputElement) {
63
+ return el.value || el.placeholder || '';
64
+ }
65
+ return el.getAttribute('title') || el.getAttribute('alt') || '';
66
+ }
67
+ function buildSelector(el) {
68
+ // Best effort: id > unique class > nth-child path
69
+ if (el.id)
70
+ return `#${CSS.escape(el.id)}`;
71
+ // data-testid is very reliable
72
+ const testId = el.getAttribute('data-testid');
73
+ if (testId)
74
+ return `[data-testid="${CSS.escape(testId)}"]`;
75
+ // aria-label is good for buttons
76
+ const ariaLabel = el.getAttribute('aria-label');
77
+ if (ariaLabel)
78
+ return `${el.tagName.toLowerCase()}[aria-label="${CSS.escape(ariaLabel)}"]`;
79
+ // name attribute for form elements
80
+ const name = el.getAttribute('name');
81
+ if (name)
82
+ return `${el.tagName.toLowerCase()}[name="${CSS.escape(name)}"]`;
83
+ // href for links
84
+ if (el.tagName === 'A') {
85
+ const href = el.getAttribute('href');
86
+ if (href && href.length < 100)
87
+ return `a[href="${CSS.escape(href)}"]`;
88
+ }
89
+ // class-based with tag
90
+ if (el.className && typeof el.className === 'string') {
91
+ const cls = el.className.trim().split(/\s+/)[0];
92
+ if (cls) {
93
+ const candidate = `${el.tagName.toLowerCase()}.${CSS.escape(cls)}`;
94
+ // Check uniqueness
95
+ if (document.querySelectorAll(candidate).length === 1)
96
+ return candidate;
97
+ }
98
+ }
99
+ // Fallback: nth-child path (2 levels max for readability)
100
+ const tag = el.tagName.toLowerCase();
101
+ const parent = el.parentElement;
102
+ if (parent) {
103
+ const siblings = Array.from(parent.children).filter(c => c.tagName === el.tagName);
104
+ if (siblings.length === 1) {
105
+ const parentTag = parent.tagName.toLowerCase();
106
+ if (parent.id)
107
+ return `#${CSS.escape(parent.id)} > ${tag}`;
108
+ return `${parentTag} > ${tag}`;
109
+ }
110
+ const idx = siblings.indexOf(el) + 1;
111
+ if (parent.id)
112
+ return `#${CSS.escape(parent.id)} > ${tag}:nth-of-type(${idx})`;
113
+ }
114
+ return tag;
115
+ }
116
+ function getRole(el) {
117
+ const explicit = el.getAttribute('role');
118
+ if (explicit)
119
+ return explicit;
120
+ const tag = el.tagName.toLowerCase();
121
+ if (tag === 'a')
122
+ return 'link';
123
+ if (tag === 'button')
124
+ return 'button';
125
+ if (tag === 'input') {
126
+ const type = el.type || 'text';
127
+ if (type === 'submit')
128
+ return 'submit';
129
+ if (type === 'checkbox')
130
+ return 'checkbox';
131
+ if (type === 'radio')
132
+ return 'radio';
133
+ if (type === 'file')
134
+ return 'file-upload';
135
+ if (type === 'search')
136
+ return 'search';
137
+ return 'textbox';
138
+ }
139
+ if (tag === 'textarea')
140
+ return 'textbox';
141
+ if (tag === 'select')
142
+ return 'listbox';
143
+ if (tag === 'form')
144
+ return 'form';
145
+ if (tag === 'img')
146
+ return 'image';
147
+ if (tag === 'video')
148
+ return 'video';
149
+ if (tag === 'audio')
150
+ return 'audio';
151
+ return tag;
152
+ }
153
+ function getAttributes(el) {
154
+ const attrs = {};
155
+ const tag = el.tagName.toLowerCase();
156
+ if (tag === 'a') {
157
+ const href = el.getAttribute('href');
158
+ if (href)
159
+ attrs.href = href.slice(0, 200);
160
+ if (el.getAttribute('target') === '_blank')
161
+ attrs.target = '_blank';
162
+ }
163
+ if (tag === 'input') {
164
+ const inp = el;
165
+ attrs.type = inp.type || 'text';
166
+ if (inp.placeholder)
167
+ attrs.placeholder = inp.placeholder;
168
+ if (inp.name)
169
+ attrs.name = inp.name;
170
+ if (inp.required)
171
+ attrs.required = 'true';
172
+ if (inp.disabled)
173
+ attrs.disabled = 'true';
174
+ if (inp.value)
175
+ attrs.value = inp.value.slice(0, 50);
176
+ }
177
+ if (tag === 'select') {
178
+ const sel = el;
179
+ const options = Array.from(sel.options).slice(0, 5).map(o => o.text.trim());
180
+ if (options.length > 0)
181
+ attrs.options = options.join(' | ');
182
+ if (sel.name)
183
+ attrs.name = sel.name;
184
+ }
185
+ if (tag === 'textarea') {
186
+ const ta = el;
187
+ if (ta.placeholder)
188
+ attrs.placeholder = ta.placeholder;
189
+ if (ta.name)
190
+ attrs.name = ta.name;
191
+ }
192
+ if (tag === 'form') {
193
+ const form = el;
194
+ if (form.action)
195
+ attrs.action = form.action.slice(0, 200);
196
+ if (form.method)
197
+ attrs.method = form.method;
198
+ attrs.fields = String(form.elements.length);
199
+ }
200
+ if (tag === 'img') {
201
+ const img = el;
202
+ if (img.alt)
203
+ attrs.alt = img.alt.slice(0, 120);
204
+ if (img.src)
205
+ attrs.src = img.src.slice(0, 200);
206
+ }
207
+ if (tag === 'video' || tag === 'audio') {
208
+ const media = el;
209
+ if (media.src)
210
+ attrs.src = media.src.slice(0, 200);
211
+ if (media.duration)
212
+ attrs.duration = String(Math.round(media.duration));
213
+ }
214
+ return attrs;
215
+ }
216
+ function serialize(el) {
217
+ return {
218
+ tag: el.tagName.toLowerCase(),
219
+ selector: buildSelector(el),
220
+ text: getText(el),
221
+ role: getRole(el),
222
+ attributes: getAttributes(el),
223
+ inViewport: isInViewport(el),
224
+ bbox: getBbox(el),
225
+ };
226
+ }
227
+ function collect(selector) {
228
+ const els = root instanceof Document
229
+ ? Array.from(root.querySelectorAll(selector))
230
+ : Array.from(root.querySelectorAll(selector));
231
+ return els
232
+ .filter(isVisible)
233
+ .slice(0, maxPerCategory)
234
+ .map(serialize);
235
+ }
236
+ return {
237
+ links: collect('a[href]'),
238
+ buttons: collect('button, [role="button"], input[type="submit"], input[type="button"]'),
239
+ inputs: collect('input:not([type="hidden"]):not([type="submit"]):not([type="button"]), textarea'),
240
+ selects: collect('select'),
241
+ forms: collect('form'),
242
+ media: collect('img[src], video, audio, iframe[src]'),
243
+ };
244
+ }
245
+ // ── Main observe function ─────────────────────────────────────────────────────
246
+ /**
247
+ * Observe a web page and return a structured map of interactive elements.
248
+ *
249
+ * @example
250
+ * ```typescript
251
+ * import { observe } from 'webpeel';
252
+ *
253
+ * const result = await observe({ url: 'https://news.ycombinator.com' });
254
+ * console.log(result.elements.links.length); // e.g. 30
255
+ * console.log(result.elements.links[0].ref); // "link-0"
256
+ * console.log(result.elements.links[0].text); // "Show HN: ..."
257
+ * console.log(result.elements.links[0].selector); // "a[href='item?id=12345']"
258
+ * console.log(result.summary);
259
+ * // "30 links, 2 buttons, 1 input, 1 form. Key actions: ..."
260
+ * ```
261
+ */
262
+ export async function observe(options) {
263
+ const { url, selector = null, viewport = 'desktop', screenshot: wantScreenshot = false, screenshotFullPage = false, maxElements = 50, timeout = 30000, stealth = false, } = options;
264
+ if (!url)
265
+ throw new Error('observe() requires a url');
266
+ const startTime = Date.now();
267
+ // Resolve viewport dimensions
268
+ let vpWidth = 1280;
269
+ let vpHeight = 800;
270
+ let deviceLabel = 'desktop';
271
+ if (viewport === 'mobile') {
272
+ vpWidth = 390;
273
+ vpHeight = 844;
274
+ deviceLabel = 'mobile';
275
+ }
276
+ else if (viewport === 'tablet') {
277
+ vpWidth = 768;
278
+ vpHeight = 1024;
279
+ deviceLabel = 'tablet';
280
+ }
281
+ else if (typeof viewport === 'object') {
282
+ vpWidth = viewport.width;
283
+ vpHeight = viewport.height;
284
+ deviceLabel = `${viewport.width}x${viewport.height}`;
285
+ }
286
+ // Use browserFetch with keepPageOpen so we can evaluate in the live page
287
+ const { browserFetch } = await import('./browser-fetch.js');
288
+ const fetchResult = await browserFetch(url, {
289
+ timeoutMs: timeout,
290
+ stealth,
291
+ keepPageOpen: true,
292
+ viewportWidth: vpWidth,
293
+ viewportHeight: vpHeight,
294
+ device: deviceLabel === 'mobile' ? 'mobile' : deviceLabel === 'tablet' ? 'tablet' : 'desktop',
295
+ });
296
+ const page = fetchResult.page;
297
+ if (!page) {
298
+ throw new Error('observe() failed: browser page not available');
299
+ }
300
+ try {
301
+ // Extract interactive elements from the live DOM
302
+ const raw = await page.evaluate(extractInteractiveElements, { scopeSelector: selector, maxPerCategory: maxElements });
303
+ // Take optional screenshot
304
+ let screenshotBase64;
305
+ if (wantScreenshot) {
306
+ const buf = await page.screenshot({ type: 'png', fullPage: screenshotFullPage });
307
+ screenshotBase64 = buf.toString('base64');
308
+ }
309
+ const pageTitle = await page.title();
310
+ const finalUrl = page.url();
311
+ // Get scroll dimensions
312
+ const scrollDims = await page.evaluate(() => ({
313
+ width: document.documentElement.scrollWidth,
314
+ height: document.documentElement.scrollHeight,
315
+ }));
316
+ const elapsed = Date.now() - startTime;
317
+ // Add refs to elements
318
+ const addRefs = (items, prefix) => items.map((item, i) => ({
319
+ ref: `${prefix}-${i}`,
320
+ tag: item.tag,
321
+ selector: item.selector,
322
+ text: item.text,
323
+ role: item.role,
324
+ attributes: item.attributes,
325
+ inViewport: item.inViewport,
326
+ bbox: item.bbox ?? undefined,
327
+ }));
328
+ const elements = {
329
+ links: addRefs(raw.links, 'link'),
330
+ buttons: addRefs(raw.buttons, 'button'),
331
+ inputs: addRefs(raw.inputs, 'input'),
332
+ selects: addRefs(raw.selects, 'select'),
333
+ forms: addRefs(raw.forms, 'form'),
334
+ media: addRefs(raw.media, 'media'),
335
+ };
336
+ const totalElements = elements.links.length +
337
+ elements.buttons.length +
338
+ elements.inputs.length +
339
+ elements.selects.length +
340
+ elements.forms.length +
341
+ elements.media.length;
342
+ // Build summary
343
+ const parts = [];
344
+ if (elements.links.length > 0)
345
+ parts.push(`${elements.links.length} links`);
346
+ if (elements.buttons.length > 0)
347
+ parts.push(`${elements.buttons.length} buttons`);
348
+ if (elements.inputs.length > 0)
349
+ parts.push(`${elements.inputs.length} inputs`);
350
+ if (elements.selects.length > 0)
351
+ parts.push(`${elements.selects.length} selects`);
352
+ if (elements.forms.length > 0)
353
+ parts.push(`${elements.forms.length} forms`);
354
+ if (elements.media.length > 0)
355
+ parts.push(`${elements.media.length} media`);
356
+ // Highlight key actionable items
357
+ const keyActions = [];
358
+ for (const btn of elements.buttons.slice(0, 3)) {
359
+ if (btn.text)
360
+ keyActions.push(`[${btn.ref}] "${btn.text}"`);
361
+ }
362
+ for (const inp of elements.inputs.slice(0, 2)) {
363
+ const label = inp.text || inp.attributes.placeholder || inp.attributes.name || 'text field';
364
+ keyActions.push(`[${inp.ref}] ${label} (${inp.attributes.type || 'text'})`);
365
+ }
366
+ let summary = `Page: "${pageTitle}" — ${parts.join(', ')}`;
367
+ if (keyActions.length > 0) {
368
+ summary += `. Key actions: ${keyActions.join(', ')}`;
369
+ }
370
+ summary += `. ${elapsed}ms.`;
371
+ return {
372
+ url: finalUrl,
373
+ title: pageTitle,
374
+ viewport: { width: vpWidth, height: vpHeight },
375
+ scroll: scrollDims,
376
+ elements,
377
+ totalElements,
378
+ summary,
379
+ screenshot: screenshotBase64,
380
+ elapsed,
381
+ };
382
+ }
383
+ finally {
384
+ // Clean up: close the page and its browser
385
+ try {
386
+ const browser = page.context().browser();
387
+ await page.close().catch(() => { });
388
+ if (browser)
389
+ await browser.close().catch(() => { });
390
+ }
391
+ catch {
392
+ // Best-effort cleanup
393
+ }
394
+ }
395
+ }
@@ -0,0 +1,12 @@
1
+ export declare const IMAGE_CONTENT_TYPES: string[];
2
+ /**
3
+ * Returns true if the given content-type string is a supported image type.
4
+ */
5
+ export declare function isImageContentType(contentType: string): boolean;
6
+ /**
7
+ * Extract text from an image buffer using Tesseract OCR.
8
+ * @param imageBuffer - Raw image bytes
9
+ * @param language - Tesseract language code (default: 'eng')
10
+ * @returns Extracted text, trimmed. Empty string when no text found.
11
+ */
12
+ export declare function extractTextFromImage(imageBuffer: Buffer, language?: string): Promise<string>;
@@ -0,0 +1,33 @@
1
+ /**
2
+ * OCR module — extract text from images using Tesseract.js (pure JS, no native deps).
3
+ */
4
+ import Tesseract from 'tesseract.js';
5
+ export const IMAGE_CONTENT_TYPES = [
6
+ 'image/png',
7
+ 'image/jpeg',
8
+ 'image/jpg',
9
+ 'image/gif',
10
+ 'image/webp',
11
+ 'image/tiff',
12
+ 'image/bmp',
13
+ ];
14
+ /**
15
+ * Returns true if the given content-type string is a supported image type.
16
+ */
17
+ export function isImageContentType(contentType) {
18
+ const ct = contentType.toLowerCase();
19
+ return IMAGE_CONTENT_TYPES.some(t => ct.includes(t));
20
+ }
21
+ /**
22
+ * Extract text from an image buffer using Tesseract OCR.
23
+ * @param imageBuffer - Raw image bytes
24
+ * @param language - Tesseract language code (default: 'eng')
25
+ * @returns Extracted text, trimmed. Empty string when no text found.
26
+ */
27
+ export async function extractTextFromImage(imageBuffer, language = 'eng') {
28
+ const { data: { text } } = await Tesseract.recognize(imageBuffer, language, {
29
+ // Suppress verbose Tesseract logging
30
+ logger: () => { },
31
+ });
32
+ return text.trim();
33
+ }
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Pagination link discovery.
3
+ *
4
+ * Given a page's HTML, finds the "Next" page URL by matching common
5
+ * pagination patterns (link text, ARIA labels, rel attributes, CSS classes).
6
+ *
7
+ * @module paginate
8
+ */
9
+ /**
10
+ * Attempt to find the URL of the "next" page in a paginated result set.
11
+ *
12
+ * Checks, in priority order:
13
+ * 1. `<a rel="next">` or `<link rel="next">`
14
+ * 2. `<a aria-label="...next...">`
15
+ * 3. `<a class="...next...">` (if the link text also looks "nexty")
16
+ * 4. `<a>` whose visible text matches a known next-page pattern
17
+ *
18
+ * @param html - Raw HTML of the current page.
19
+ * @param currentUrl - Absolute URL of the current page (used to resolve
20
+ * relative `href` values).
21
+ * @returns Absolute URL of the next page, or `null` if none found.
22
+ *
23
+ * @example
24
+ * ```typescript
25
+ * const next = findNextPageUrl(html, 'https://example.com/results?page=1');
26
+ * if (next) {
27
+ * // fetch next page
28
+ * }
29
+ * ```
30
+ */
31
+ export declare function findNextPageUrl(html: string, currentUrl: string): string | null;
@@ -0,0 +1,106 @@
1
+ /**
2
+ * Pagination link discovery.
3
+ *
4
+ * Given a page's HTML, finds the "Next" page URL by matching common
5
+ * pagination patterns (link text, ARIA labels, rel attributes, CSS classes).
6
+ *
7
+ * @module paginate
8
+ */
9
+ import { load } from 'cheerio';
10
+ /* ------------------------------------------------------------------ */
11
+ /* Next-page heuristics */
12
+ /* ------------------------------------------------------------------ */
13
+ /** Exact and partial text patterns for "Next" links (case-insensitive). */
14
+ const NEXT_TEXT_EXACT = new Set(['next', 'next page', '›', '»', '>', '>>', 'next ›', 'next »', 'next >', 'suivant', 'weiter', 'siguiente', '次へ']);
15
+ /** Substrings to look for in aria-label / class attributes. */
16
+ const NEXT_ATTR_SUBSTRINGS = ['next'];
17
+ /* ------------------------------------------------------------------ */
18
+ /* Public API */
19
+ /* ------------------------------------------------------------------ */
20
+ /**
21
+ * Attempt to find the URL of the "next" page in a paginated result set.
22
+ *
23
+ * Checks, in priority order:
24
+ * 1. `<a rel="next">` or `<link rel="next">`
25
+ * 2. `<a aria-label="...next...">`
26
+ * 3. `<a class="...next...">` (if the link text also looks "nexty")
27
+ * 4. `<a>` whose visible text matches a known next-page pattern
28
+ *
29
+ * @param html - Raw HTML of the current page.
30
+ * @param currentUrl - Absolute URL of the current page (used to resolve
31
+ * relative `href` values).
32
+ * @returns Absolute URL of the next page, or `null` if none found.
33
+ *
34
+ * @example
35
+ * ```typescript
36
+ * const next = findNextPageUrl(html, 'https://example.com/results?page=1');
37
+ * if (next) {
38
+ * // fetch next page
39
+ * }
40
+ * ```
41
+ */
42
+ export function findNextPageUrl(html, currentUrl) {
43
+ if (!html)
44
+ return null;
45
+ const $ = load(html);
46
+ // 1. rel="next" (strongest signal)
47
+ const relNext = $('a[rel="next"], link[rel="next"]').first();
48
+ if (relNext.length) {
49
+ const href = relNext.attr('href');
50
+ const resolved = resolve(href, currentUrl);
51
+ if (resolved && resolved !== currentUrl)
52
+ return resolved;
53
+ }
54
+ // 2. aria-label containing "next"
55
+ const ariaNext = $('a[aria-label]').filter((_, el) => {
56
+ const label = $(el).attr('aria-label')?.toLowerCase() ?? '';
57
+ return NEXT_ATTR_SUBSTRINGS.some(sub => label.includes(sub));
58
+ }).first();
59
+ if (ariaNext.length) {
60
+ const href = ariaNext.attr('href');
61
+ const resolved = resolve(href, currentUrl);
62
+ if (resolved && resolved !== currentUrl)
63
+ return resolved;
64
+ }
65
+ // 3. class containing "next" + plausible link text
66
+ const classNext = $('a[class*="next"], a[class*="Next"]').filter((_, el) => {
67
+ const text = $(el).text().trim().toLowerCase();
68
+ // Avoid "previous" or "prev" links that happen to also have "next" in class
69
+ return !text.includes('prev');
70
+ }).first();
71
+ if (classNext.length) {
72
+ const href = classNext.attr('href');
73
+ const resolved = resolve(href, currentUrl);
74
+ if (resolved && resolved !== currentUrl)
75
+ return resolved;
76
+ }
77
+ // 4. Text-based match on all <a> tags
78
+ const allLinks = $('a');
79
+ for (let i = 0; i < allLinks.length; i++) {
80
+ const el = allLinks.eq(i);
81
+ const text = el.text().trim().toLowerCase();
82
+ if (NEXT_TEXT_EXACT.has(text)) {
83
+ const href = el.attr('href');
84
+ const resolved = resolve(href, currentUrl);
85
+ if (resolved && resolved !== currentUrl)
86
+ return resolved;
87
+ }
88
+ }
89
+ return null;
90
+ }
91
+ /* ------------------------------------------------------------------ */
92
+ /* Helpers */
93
+ /* ------------------------------------------------------------------ */
94
+ /**
95
+ * Resolve `href` against `base`. Returns `null` for unresolvable / empty hrefs.
96
+ */
97
+ function resolve(href, base) {
98
+ if (!href || href === '#' || href.startsWith('javascript:'))
99
+ return null;
100
+ try {
101
+ return new URL(href, base).href;
102
+ }
103
+ catch {
104
+ return null;
105
+ }
106
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * PDF extraction using pdf-parse
3
+ */
4
+ export declare function extractPdf(buffer: Buffer): Promise<{
5
+ text: string;
6
+ metadata: Record<string, any>;
7
+ pages: number;
8
+ }>;
@@ -0,0 +1,25 @@
1
+ /**
2
+ * PDF extraction using pdf-parse
3
+ */
4
+ export async function extractPdf(buffer) {
5
+ try {
6
+ const pdfParse = (await import('pdf-parse')).default;
7
+ // pdf-parse (pdfjs) requires Uint8Array — passing a Node Buffer causes xref parse errors.
8
+ // The type definitions say Buffer, but at runtime pdfjs needs a plain Uint8Array.
9
+ const data = await pdfParse(new Uint8Array(buffer));
10
+ return {
11
+ text: data.text,
12
+ metadata: {
13
+ title: data.info?.Title || '',
14
+ author: data.info?.Author || '',
15
+ creator: data.info?.Creator || '',
16
+ producer: data.info?.Producer || '',
17
+ creationDate: data.info?.CreationDate || '',
18
+ },
19
+ pages: data.numpages,
20
+ };
21
+ }
22
+ catch (error) {
23
+ throw new Error(`PDF parsing failed: ${error instanceof Error ? error.message : 'Unknown error'}. Install pdf-parse: npm install pdf-parse`);
24
+ }
25
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * PeelTLS — WebPeel's TLS Fingerprint Spoofing Engine
3
+ * BSD-licensed replacement for CycleTLS (GPL-3).
4
+ *
5
+ * Manages a singleton Go binary process that provides TLS fingerprint spoofing
6
+ * via uTLS (utls library). The binary exposes a local HTTP API that this module
7
+ * communicates with.
8
+ */
9
+ import type { FetchResult } from './fetcher.js';
10
+ export interface PeelTLSOptions {
11
+ timeout?: number;
12
+ proxy?: string;
13
+ headers?: Record<string, string>;
14
+ /** TLS fingerprint preset: 'chrome-133' | 'firefox-120' | 'safari-16' | raw JA3 string */
15
+ fingerprint?: string;
16
+ }
17
+ export interface PeelTLSResult extends FetchResult {
18
+ method: 'peeltls';
19
+ }
20
+ /** Check if the PeelTLS binary is available on this platform. */
21
+ export declare function isPeelTLSAvailable(): boolean;
22
+ /** Fetch a URL using PeelTLS TLS fingerprint spoofing. */
23
+ export declare function peelTLSFetch(url: string, options?: PeelTLSOptions): Promise<PeelTLSResult>;
24
+ /** Gracefully shut down the PeelTLS binary process. */
25
+ export declare function shutdownPeelTLS(): Promise<void>;