@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Extract structured metadata from HTML
3
+ */
4
+ import type { PageMetadata } from '../types.js';
5
+ /**
6
+ * Detect and fix concatenated titles where two titles are smashed together
7
+ * without a separator (e.g. "The Performance of Open Source SoftwareHigh Performance Networking in Chrome").
8
+ * Heuristic: split at lowercase→uppercase boundary if it looks like two distinct titles.
9
+ * Returns the longer (more specific) segment.
10
+ */
11
+ export declare function cleanConcatenatedTitle(title: string): string;
12
+ /**
13
+ * Extract all links from page
14
+ * Returns absolute URLs, deduplicated
15
+ */
16
+ export declare function extractLinks(html: string, baseUrl: string): string[];
17
+ /**
18
+ * Extract all images from HTML
19
+ * Resolves relative URLs to absolute and extracts metadata
20
+ *
21
+ * @param html - HTML to extract images from
22
+ * @param baseUrl - Base URL for resolving relative paths
23
+ * @returns Array of image information, deduplicated by src
24
+ */
25
+ export declare function extractImages(html: string, baseUrl: string): import('../types.js').ImageInfo[];
26
+ /**
27
+ * Extract all metadata from HTML.
28
+ * Optimization: only parse the <head> section with cheerio (avoids full DOM parse).
29
+ * Falls back to full HTML if head section is not found or produces no title.
30
+ */
31
+ export declare function extractMetadata(html: string, _url: string): {
32
+ title: string;
33
+ metadata: PageMetadata;
34
+ };
@@ -0,0 +1,422 @@
1
+ /**
2
+ * Extract structured metadata from HTML
3
+ */
4
+ import * as cheerio from 'cheerio';
5
+ /**
6
+ * Detect and fix concatenated titles where two titles are smashed together
7
+ * without a separator (e.g. "The Performance of Open Source SoftwareHigh Performance Networking in Chrome").
8
+ * Heuristic: split at lowercase→uppercase boundary if it looks like two distinct titles.
9
+ * Returns the longer (more specific) segment.
10
+ */
11
+ export function cleanConcatenatedTitle(title) {
12
+ if (!title)
13
+ return title;
14
+ // Look for pattern: lowercase letter immediately followed by uppercase letter
15
+ // that isn't a normal camelCase word (e.g. "JavaScript" is fine, but
16
+ // "SoftwareHigh" is two words smashed together)
17
+ // We split on boundaries where a common word ending meets a new capitalized word
18
+ const match = title.match(/^(.+[a-z])([A-Z][a-z].+)$/);
19
+ if (match) {
20
+ const [, part1, part2] = match;
21
+ // Both parts should be reasonably long to be separate titles
22
+ if (part1.length > 10 && part2.length > 10) {
23
+ // Prefer the second part — it's typically the page-specific title
24
+ // (e.g. "The Performance of Open Source Software" + "High Performance Networking in Chrome"
25
+ // → the second part is the chapter/page title, the first is the site/book title)
26
+ return part2;
27
+ }
28
+ }
29
+ return title;
30
+ }
31
+ /**
32
+ * Extract page title using fallback chain:
33
+ * og:title → twitter:title → title tag → h1
34
+ */
35
+ function extractTitle($) {
36
+ // Try Open Graph title
37
+ let title = $('meta[property="og:title"]').attr('content');
38
+ if (title)
39
+ return title.trim();
40
+ // Try Twitter title
41
+ title = $('meta[name="twitter:title"]').attr('content');
42
+ if (title)
43
+ return title.trim();
44
+ // Try title tag
45
+ title = $('title').text();
46
+ if (title)
47
+ return cleanConcatenatedTitle(title.trim());
48
+ // Fallback to first h1
49
+ title = $('h1').first().text();
50
+ if (title)
51
+ return title.trim();
52
+ return '';
53
+ }
54
+ /**
55
+ * Extract page description using fallback chain:
56
+ * og:description → twitter:description → meta description
57
+ */
58
+ function extractDescription($) {
59
+ // Try Open Graph description
60
+ let desc = $('meta[property="og:description"]').attr('content');
61
+ if (desc)
62
+ return desc.trim();
63
+ // Try Twitter description
64
+ desc = $('meta[name="twitter:description"]').attr('content');
65
+ if (desc)
66
+ return desc.trim();
67
+ // Try standard meta description
68
+ desc = $('meta[name="description"]').attr('content');
69
+ if (desc)
70
+ return desc.trim();
71
+ return undefined;
72
+ }
73
+ /**
74
+ * Extract author from meta tags
75
+ */
76
+ function extractAuthor($) {
77
+ // Try article:author
78
+ let author = $('meta[property="article:author"]').attr('content');
79
+ if (author)
80
+ return author.trim();
81
+ // Try og:article:author
82
+ author = $('meta[property="og:article:author"]').attr('content');
83
+ if (author)
84
+ return author.trim();
85
+ // Try author meta tag
86
+ author = $('meta[name="author"]').attr('content');
87
+ if (author)
88
+ return author.trim();
89
+ // Try twitter:creator
90
+ author = $('meta[name="twitter:creator"]').attr('content');
91
+ if (author)
92
+ return author.trim();
93
+ return undefined;
94
+ }
95
+ /**
96
+ * Extract publish date from rich meta sources
97
+ * Returns ISO 8601 date string if found
98
+ */
99
+ function extractPublishDate($, _html) {
100
+ // Try article:published_time
101
+ let published = $('meta[property="article:published_time"]').attr('content');
102
+ if (published) {
103
+ try {
104
+ return new Date(published).toISOString();
105
+ }
106
+ catch { /* ignore */ }
107
+ }
108
+ // Try meta name="date"
109
+ published = $('meta[name="date"]').attr('content');
110
+ if (published) {
111
+ try {
112
+ return new Date(published).toISOString();
113
+ }
114
+ catch { /* ignore */ }
115
+ }
116
+ // Try og:updated_time
117
+ published = $('meta[property="og:updated_time"]').attr('content');
118
+ if (published) {
119
+ try {
120
+ return new Date(published).toISOString();
121
+ }
122
+ catch { /* ignore */ }
123
+ }
124
+ // Try <time pubdate> or <time datetime> with pubdate attribute
125
+ const timeEl = $('time[pubdate], time[datetime][pubdate]').first();
126
+ const datetime = timeEl.attr('datetime') || timeEl.attr('content');
127
+ if (datetime) {
128
+ try {
129
+ return new Date(datetime).toISOString();
130
+ }
131
+ catch { /* ignore */ }
132
+ }
133
+ // Try JSON-LD datePublished
134
+ $('script[type="application/ld+json"]').each((_, el) => {
135
+ if (published)
136
+ return;
137
+ try {
138
+ const json = JSON.parse($(el).html() || '{}');
139
+ const date = json.datePublished || json.publishDate || (json['@graph'] && json['@graph'].find?.((n) => n.datePublished)?.datePublished);
140
+ if (date) {
141
+ published = new Date(date).toISOString();
142
+ }
143
+ }
144
+ catch { /* ignore */ }
145
+ });
146
+ if (published)
147
+ return published;
148
+ return undefined;
149
+ }
150
+ /**
151
+ * Extract page language
152
+ */
153
+ function extractLanguage($) {
154
+ // Try html lang attribute
155
+ const htmlLang = $('html').attr('lang');
156
+ if (htmlLang)
157
+ return htmlLang.trim();
158
+ // Try Content-Language meta
159
+ const contentLang = $('meta[http-equiv="Content-Language"]').attr('content');
160
+ if (contentLang)
161
+ return contentLang.trim();
162
+ // Try og:locale (convert underscore to hyphen, e.g. "en_US" → "en-US")
163
+ const ogLocale = $('meta[property="og:locale"]').attr('content');
164
+ if (ogLocale)
165
+ return ogLocale.trim().replace('_', '-');
166
+ return undefined;
167
+ }
168
+ /**
169
+ * Count words in visible text (strips HTML tags, splits on whitespace)
170
+ */
171
+ function extractWordCount(html) {
172
+ // Remove script and style content
173
+ const stripped = html
174
+ .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, ' ')
175
+ .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, ' ')
176
+ // Remove all HTML tags
177
+ .replace(/<[^>]+>/g, ' ')
178
+ // Decode common entities
179
+ .replace(/&amp;/g, '&')
180
+ .replace(/&lt;/g, '<')
181
+ .replace(/&gt;/g, '>')
182
+ .replace(/&nbsp;/g, ' ')
183
+ .replace(/&quot;/g, '"')
184
+ // Collapse whitespace
185
+ .replace(/\s+/g, ' ')
186
+ .trim();
187
+ if (!stripped)
188
+ return 0;
189
+ return stripped.split(' ').filter(w => w.length > 0).length;
190
+ }
191
+ /**
192
+ * Extract published date from meta tags
193
+ * Returns ISO 8601 date string if found
194
+ */
195
+ function extractPublished($) {
196
+ // Try article:published_time
197
+ let published = $('meta[property="article:published_time"]').attr('content');
198
+ if (published) {
199
+ try {
200
+ return new Date(published).toISOString();
201
+ }
202
+ catch (e) {
203
+ if (process.env.DEBUG)
204
+ console.debug('[webpeel]', 'date parse failed:', e instanceof Error ? e.message : e);
205
+ }
206
+ }
207
+ // Try datePublished schema.org
208
+ published = $('meta[itemprop="datePublished"]').attr('content');
209
+ if (published) {
210
+ try {
211
+ return new Date(published).toISOString();
212
+ }
213
+ catch (e) {
214
+ if (process.env.DEBUG)
215
+ console.debug('[webpeel]', 'date parse failed:', e instanceof Error ? e.message : e);
216
+ }
217
+ }
218
+ return undefined;
219
+ }
220
+ /**
221
+ * Extract Open Graph image URL
222
+ */
223
+ function extractImage($) {
224
+ // Try og:image
225
+ let image = $('meta[property="og:image"]').attr('content');
226
+ if (image)
227
+ return image.trim();
228
+ // Try twitter:image
229
+ image = $('meta[name="twitter:image"]').attr('content');
230
+ if (image)
231
+ return image.trim();
232
+ return undefined;
233
+ }
234
+ /**
235
+ * Extract canonical URL
236
+ */
237
+ function extractCanonical($) {
238
+ const canonical = $('link[rel="canonical"]').attr('href');
239
+ if (canonical)
240
+ return canonical.trim();
241
+ // Fallback to og:url
242
+ const ogUrl = $('meta[property="og:url"]').attr('content');
243
+ if (ogUrl)
244
+ return ogUrl.trim();
245
+ return undefined;
246
+ }
247
+ /**
248
+ * Extract all links from page
249
+ * Returns absolute URLs, deduplicated
250
+ */
251
+ export function extractLinks(html, baseUrl) {
252
+ const $ = cheerio.load(html);
253
+ const links = new Set();
254
+ $('a[href]').each((_, elem) => {
255
+ const href = $(elem).attr('href');
256
+ if (!href)
257
+ return;
258
+ try {
259
+ const absoluteUrl = new URL(href, baseUrl);
260
+ // SECURITY: Only allow HTTP and HTTPS protocols
261
+ if (!['http:', 'https:'].includes(absoluteUrl.protocol)) {
262
+ return;
263
+ }
264
+ // Skip anchor-only links (e.g., href="#section")
265
+ const baseNormalized = new URL(baseUrl);
266
+ if (absoluteUrl.hash &&
267
+ absoluteUrl.origin === baseNormalized.origin &&
268
+ absoluteUrl.pathname === baseNormalized.pathname &&
269
+ absoluteUrl.search === baseNormalized.search) {
270
+ return;
271
+ }
272
+ links.add(absoluteUrl.href);
273
+ }
274
+ catch (e) {
275
+ if (process.env.DEBUG)
276
+ console.debug('[webpeel]', 'url parse failed:', e instanceof Error ? e.message : e);
277
+ }
278
+ });
279
+ return Array.from(links).sort();
280
+ }
281
+ /**
282
+ * Extract all images from HTML
283
+ * Resolves relative URLs to absolute and extracts metadata
284
+ *
285
+ * @param html - HTML to extract images from
286
+ * @param baseUrl - Base URL for resolving relative paths
287
+ * @returns Array of image information, deduplicated by src
288
+ */
289
+ export function extractImages(html, baseUrl) {
290
+ const $ = cheerio.load(html);
291
+ const images = new Map();
292
+ // Extract <img> tags
293
+ $('img[src]').each((_, elem) => {
294
+ const $img = $(elem);
295
+ const src = $img.attr('src');
296
+ if (!src)
297
+ return;
298
+ try {
299
+ const absoluteUrl = new URL(src, baseUrl);
300
+ // SECURITY: Only allow HTTP and HTTPS protocols
301
+ if (!['http:', 'https:'].includes(absoluteUrl.protocol)) {
302
+ return;
303
+ }
304
+ const alt = $img.attr('alt') || '';
305
+ const title = $img.attr('title');
306
+ const widthStr = $img.attr('width');
307
+ const heightStr = $img.attr('height');
308
+ const width = widthStr ? parseInt(widthStr, 10) : undefined;
309
+ const height = heightStr ? parseInt(heightStr, 10) : undefined;
310
+ const imageInfo = {
311
+ src: absoluteUrl.href,
312
+ alt,
313
+ title,
314
+ width: width && !isNaN(width) ? width : undefined,
315
+ height: height && !isNaN(height) ? height : undefined,
316
+ };
317
+ // Deduplicate by src
318
+ images.set(absoluteUrl.href, imageInfo);
319
+ }
320
+ catch (e) {
321
+ if (process.env.DEBUG)
322
+ console.debug('[webpeel]', 'url parse failed:', e instanceof Error ? e.message : e);
323
+ }
324
+ });
325
+ // Extract <picture><source> tags
326
+ $('picture source[srcset]').each((_, elem) => {
327
+ const $source = $(elem);
328
+ const srcset = $source.attr('srcset');
329
+ if (!srcset)
330
+ return;
331
+ // Parse srcset (format: "url 1x, url 2x" or "url 100w, url 200w")
332
+ const srcsetParts = srcset.split(',').map(s => s.trim());
333
+ srcsetParts.forEach(part => {
334
+ const url = part.split(/\s+/)[0];
335
+ if (!url)
336
+ return;
337
+ try {
338
+ const absoluteUrl = new URL(url, baseUrl);
339
+ // SECURITY: Only allow HTTP and HTTPS protocols
340
+ if (!['http:', 'https:'].includes(absoluteUrl.protocol)) {
341
+ return;
342
+ }
343
+ // Try to get alt from parent picture's img
344
+ const alt = $source.closest('picture').find('img').attr('alt') || '';
345
+ const imageInfo = {
346
+ src: absoluteUrl.href,
347
+ alt,
348
+ };
349
+ images.set(absoluteUrl.href, imageInfo);
350
+ }
351
+ catch (e) {
352
+ if (process.env.DEBUG)
353
+ console.debug('[webpeel]', 'url parse failed:', e instanceof Error ? e.message : e);
354
+ }
355
+ });
356
+ });
357
+ // Extract CSS background images
358
+ $('[style*="background"]').each((_, elem) => {
359
+ const style = $(elem).attr('style');
360
+ if (!style)
361
+ return;
362
+ // Match url() in CSS
363
+ const urlMatches = style.match(/url\(['"]?([^'")\s]+)['"]?\)/g);
364
+ if (!urlMatches)
365
+ return;
366
+ urlMatches.forEach(match => {
367
+ const url = match.replace(/url\(['"]?([^'")\s]+)['"]?\)/, '$1');
368
+ if (!url)
369
+ return;
370
+ try {
371
+ const absoluteUrl = new URL(url, baseUrl);
372
+ // SECURITY: Only allow HTTP and HTTPS protocols
373
+ if (!['http:', 'https:'].includes(absoluteUrl.protocol)) {
374
+ return;
375
+ }
376
+ const imageInfo = {
377
+ src: absoluteUrl.href,
378
+ alt: '', // Background images don't have alt text
379
+ };
380
+ images.set(absoluteUrl.href, imageInfo);
381
+ }
382
+ catch (e) {
383
+ if (process.env.DEBUG)
384
+ console.debug('[webpeel]', 'url parse failed:', e instanceof Error ? e.message : e);
385
+ }
386
+ });
387
+ });
388
+ return Array.from(images.values());
389
+ }
390
+ /**
391
+ * Extract all metadata from HTML.
392
+ * Optimization: only parse the <head> section with cheerio (avoids full DOM parse).
393
+ * Falls back to full HTML if head section is not found or produces no title.
394
+ */
395
+ export function extractMetadata(html, _url) {
396
+ // Extract only the <head> section for faster cheerio parsing
397
+ // This avoids parsing the entire body DOM just for meta tags
398
+ let headHtml = html;
399
+ const headMatch = html.match(/<head[\s>][\s\S]*?<\/head>/i);
400
+ if (headMatch) {
401
+ // Include a minimal body shell so cheerio parses it correctly,
402
+ // and append the first <h1> from body for the title fallback
403
+ const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
404
+ headHtml = `<html>${headMatch[0]}<body>${h1Match ? h1Match[0] : ''}</body></html>`;
405
+ }
406
+ const $ = cheerio.load(headHtml);
407
+ const title = extractTitle($);
408
+ const publishDate = extractPublishDate($, html);
409
+ const language = extractLanguage($);
410
+ const wordCount = extractWordCount(html);
411
+ const metadata = {
412
+ description: extractDescription($),
413
+ author: extractAuthor($),
414
+ published: extractPublished($),
415
+ image: extractImage($),
416
+ canonical: extractCanonical($),
417
+ ...(publishDate ? { publishDate } : {}),
418
+ ...(language ? { language } : {}),
419
+ wordCount,
420
+ };
421
+ return { title, metadata };
422
+ }
@@ -0,0 +1,113 @@
1
+ /**
2
+ * WebPeel Observe — Give agents structured "eyes" on a web page.
3
+ *
4
+ * Returns a machine-readable map of interactive elements (links, buttons,
5
+ * inputs, forms, selects, media) so agents can decide what to do next
6
+ * without needing a vision model.
7
+ *
8
+ * This bridges the gap between:
9
+ * - `peel()` / `webpeel_read` → markdown content (strips interaction cues)
10
+ * - `webpeel_act` → requires knowing selectors already
11
+ *
12
+ * With `observe()`, the loop becomes:
13
+ * 1. observe(url) → see what's on the page
14
+ * 2. decide which element to interact with
15
+ * 3. act(url, actions) → do it
16
+ * 4. observe again → see the result
17
+ */
18
+ export interface ObserveOptions {
19
+ /** URL to observe (required unless passing an existing Page) */
20
+ url?: string;
21
+ /** Use browser rendering (default: true — observation inherently needs the rendered DOM) */
22
+ render?: boolean;
23
+ /** CSS selector to scope observation (e.g. 'main', '#content') */
24
+ selector?: string;
25
+ /** Viewport: 'desktop' | 'mobile' | 'tablet' | {width, height} */
26
+ viewport?: 'desktop' | 'mobile' | 'tablet' | {
27
+ width: number;
28
+ height: number;
29
+ };
30
+ /** Include a screenshot alongside structured data (default: false) */
31
+ screenshot?: boolean;
32
+ /** Full-page screenshot (default: false) */
33
+ screenshotFullPage?: boolean;
34
+ /** Max elements to return per category (default: 50) */
35
+ maxElements?: number;
36
+ /** Timeout in ms (default: 30000) */
37
+ timeout?: number;
38
+ /** Use stealth mode (default: false) */
39
+ stealth?: boolean;
40
+ }
41
+ export interface ObservedElement {
42
+ /** Auto-generated index for easy reference: "link-0", "button-3", "input-2" */
43
+ ref: string;
44
+ /** Element tag (a, button, input, select, textarea, etc.) */
45
+ tag: string;
46
+ /** Best CSS selector to target this element */
47
+ selector: string;
48
+ /** Visible text content (truncated to 120 chars) */
49
+ text: string;
50
+ /** Semantic role or purpose */
51
+ role: string;
52
+ /** Additional attributes that help identify purpose */
53
+ attributes: Record<string, string>;
54
+ /** Whether the element is visible in the current viewport */
55
+ inViewport: boolean;
56
+ /** Bounding box { x, y, width, height } relative to viewport */
57
+ bbox?: {
58
+ x: number;
59
+ y: number;
60
+ width: number;
61
+ height: number;
62
+ };
63
+ }
64
+ export interface ObserveResult {
65
+ /** Final URL after redirects */
66
+ url: string;
67
+ /** Page title */
68
+ title: string;
69
+ /** Current viewport dimensions */
70
+ viewport: {
71
+ width: number;
72
+ height: number;
73
+ };
74
+ /** Page scroll dimensions */
75
+ scroll: {
76
+ width: number;
77
+ height: number;
78
+ };
79
+ /** Interactive elements grouped by type */
80
+ elements: {
81
+ links: ObservedElement[];
82
+ buttons: ObservedElement[];
83
+ inputs: ObservedElement[];
84
+ selects: ObservedElement[];
85
+ forms: ObservedElement[];
86
+ media: ObservedElement[];
87
+ };
88
+ /** Total count of discovered elements */
89
+ totalElements: number;
90
+ /** Plain-text summary for quick agent consumption */
91
+ summary: string;
92
+ /** Optional screenshot (base64 PNG) */
93
+ screenshot?: string;
94
+ /** Elapsed time in ms */
95
+ elapsed: number;
96
+ }
97
+ /**
98
+ * Observe a web page and return a structured map of interactive elements.
99
+ *
100
+ * @example
101
+ * ```typescript
102
+ * import { observe } from 'webpeel';
103
+ *
104
+ * const result = await observe({ url: 'https://news.ycombinator.com' });
105
+ * console.log(result.elements.links.length); // e.g. 30
106
+ * console.log(result.elements.links[0].ref); // "link-0"
107
+ * console.log(result.elements.links[0].text); // "Show HN: ..."
108
+ * console.log(result.elements.links[0].selector); // "a[href='item?id=12345']"
109
+ * console.log(result.summary);
110
+ * // "30 links, 2 buttons, 1 input, 1 form. Key actions: ..."
111
+ * ```
112
+ */
113
+ export declare function observe(options: ObserveOptions): Promise<ObserveResult>;