@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,604 @@
1
+ /**
2
+ * Auto-extraction module — heuristic + CSS selector based structured data extraction.
3
+ * No LLM API key required.
4
+ *
5
+ * Supports:
6
+ * - pricing : pricing tables / plan cards
7
+ * - products : product grids / listings
8
+ * - contact : emails, phones, addresses, social links
9
+ * - article : blog posts / news articles
10
+ * - api_docs : REST API endpoint documentation
11
+ * - unknown : fallback when no type is detected
12
+ */
13
+ import { load } from 'cheerio';
14
+ // ---------------------------------------------------------------------------
15
+ // Page type detection
16
+ // ---------------------------------------------------------------------------
17
+ const PRICE_INLINE = /(\$|€|£)\s*\d+/;
18
+ const FREE_PLAN = /\bfree\b/i;
19
+ const HTTP_METHOD_PATTERN = /\b(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)\b/;
20
+ const URL_PATH_PATTERN = /\/(v\d+\/)?[a-z_-]+(\/{[^}]+}|\/?[a-z_-]*)*\b/;
21
+ const EMAIL_PATTERN = /[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}/g;
22
+ const PHONE_PATTERN = /(\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}|\+\d{1,3}[-.\s]?\d{2,4}[-.\s]?\d{4,}/g;
23
+ /** Extract body text with spaces between elements (prevents regex over-matching adjacent tokens). */
24
+ function getBodyText($) {
25
+ const html = $('body').html() || '';
26
+ return html.replace(/<[^>]+>/g, ' ').replace(/&[a-z#\d]+;/gi, ' ').replace(/\s+/g, ' ').trim();
27
+ }
28
+ function urlHas(url, ...keywords) {
29
+ try {
30
+ const path = new URL(url).pathname.toLowerCase();
31
+ return keywords.some((kw) => path.includes(kw));
32
+ }
33
+ catch {
34
+ const lower = url.toLowerCase();
35
+ return keywords.some((kw) => lower.includes(kw));
36
+ }
37
+ }
38
+ /**
39
+ * Detect the page type from HTML + URL.
40
+ * Returns one of: 'pricing' | 'products' | 'contact' | 'article' | 'api_docs' | 'unknown'
41
+ */
42
+ export function detectPageType(html, url) {
43
+ const $ = load(html);
44
+ // --- Pricing ---
45
+ if (urlHas(url, '/pricing', '/plans', '/packages', '/tiers', '/billing')) {
46
+ return 'pricing';
47
+ }
48
+ const bodyText = getBodyText($);
49
+ const priceMatches = bodyText.match(/(\$|€|£)\s*\d+/g) || [];
50
+ const perPeriodMatches = bodyText.match(/\/(mo|month|year|yr|annual|week)/gi) || [];
51
+ if (priceMatches.length >= 2 && perPeriodMatches.length >= 1) {
52
+ return 'pricing';
53
+ }
54
+ // --- Contact ---
55
+ if (urlHas(url, '/contact', '/about', '/reach', '/connect', '/support')) {
56
+ const emails = bodyText.match(EMAIL_PATTERN) || [];
57
+ if (emails.length > 0)
58
+ return 'contact';
59
+ }
60
+ const emails = bodyText.match(EMAIL_PATTERN) || [];
61
+ const phones = bodyText.match(PHONE_PATTERN) || [];
62
+ const socialLinks = $('a[href*="twitter.com"], a[href*="linkedin.com"], a[href*="github.com"]').length;
63
+ if (emails.length > 0 && (phones.length > 0 || socialLinks > 0)) {
64
+ return 'contact';
65
+ }
66
+ // --- Article ---
67
+ const hasArticleTag = $('article').length > 0;
68
+ const hasTimeTag = $('time[datetime], time[pubdate]').length > 0;
69
+ const hasAuthorMeta = $('meta[name="author"]').length > 0 ||
70
+ $('[class*="author"], [itemprop="author"]').length > 0;
71
+ if (hasArticleTag || (hasTimeTag && hasAuthorMeta)) {
72
+ return 'article';
73
+ }
74
+ // Single <h1> + multiple paragraphs and a date-ish element
75
+ const h1Count = $('h1').length;
76
+ const paraCount = $('p').length;
77
+ if (h1Count === 1 && paraCount >= 3 && hasTimeTag) {
78
+ return 'article';
79
+ }
80
+ // --- API docs ---
81
+ const codeText = $('code, pre').text();
82
+ const httpMethodHits = (codeText.match(HTTP_METHOD_PATTERN) || []).length;
83
+ const urlPathHits = (codeText.match(URL_PATH_PATTERN) || []).length;
84
+ if (httpMethodHits >= 2 && urlPathHits >= 2) {
85
+ return 'api_docs';
86
+ }
87
+ // Also check for common API doc patterns in normal text
88
+ const headingText = $('h1, h2, h3').text();
89
+ if (headingText.match(/endpoint|api reference|rest api|http method/i) &&
90
+ httpMethodHits >= 1) {
91
+ return 'api_docs';
92
+ }
93
+ // --- Products ---
94
+ // Look for repeating card-like structures with prices + images
95
+ const potentialProductContainers = [
96
+ '.product', '.item', '.card', '[class*="product"]', '[class*="item"]', '[class*="card"]',
97
+ ];
98
+ for (const sel of potentialProductContainers) {
99
+ const cards = $(sel);
100
+ if (cards.length >= 3) {
101
+ let withPrice = 0;
102
+ cards.each((_, el) => {
103
+ const text = $(el).text();
104
+ if (PRICE_INLINE.test(text) || FREE_PLAN.test(text))
105
+ withPrice++;
106
+ });
107
+ if (withPrice >= 2)
108
+ return 'products';
109
+ }
110
+ }
111
+ // Fallback: many <img> elements with adjacent prices
112
+ const imgs = $('img').length;
113
+ if (imgs >= 4 && priceMatches.length >= 3) {
114
+ return 'products';
115
+ }
116
+ return 'unknown';
117
+ }
118
+ // ---------------------------------------------------------------------------
119
+ // Pricing extractor
120
+ // ---------------------------------------------------------------------------
121
+ function extractPricingPlans($) {
122
+ const plans = [];
123
+ // Common pricing card selectors (ordered from specific to broad)
124
+ const containerSelectors = [
125
+ '[class*="pricing-card"]',
126
+ '[class*="price-card"]',
127
+ '[class*="plan-card"]',
128
+ '[class*="tier-card"]',
129
+ '[class*="pricing__plan"]',
130
+ '[class*="plan"]',
131
+ '[class*="pricing-tier"]',
132
+ '[class*="pricing-table"] td',
133
+ '[class*="pricing-table"] th',
134
+ '.card',
135
+ '[class*="col-"]',
136
+ ];
137
+ let containers = null;
138
+ for (const sel of containerSelectors) {
139
+ const found = $(sel).filter((_, el) => {
140
+ const text = $(el).text();
141
+ return PRICE_INLINE.test(text) || FREE_PLAN.test(text);
142
+ });
143
+ if (found.length >= 2) {
144
+ containers = found;
145
+ break;
146
+ }
147
+ }
148
+ if (!containers || containers.length === 0) {
149
+ // Last resort: parse entire page for price-like text blocks
150
+ return parsePricingFromText($);
151
+ }
152
+ containers.each((_, el) => {
153
+ try {
154
+ const $el = $(el);
155
+ const text = $el.text().trim();
156
+ // Extract plan name — try specific selectors first, then fall back to headings
157
+ const nameSelectors = [
158
+ '[data-plan-name]',
159
+ '.plan-name',
160
+ '[class*="plan-name"]',
161
+ '[class*="plan__name"]',
162
+ '[class*="tier-name"]',
163
+ '[class*="pricing-header"] h2',
164
+ '[class*="pricing-header"] h3',
165
+ '[class*="pricing__title"]',
166
+ '[class*="price__title"]',
167
+ '[class*="card__title"]',
168
+ '[class*="card-title"]',
169
+ 'h2',
170
+ 'h3',
171
+ 'h4',
172
+ '[class*="name"]',
173
+ '[class*="title"]',
174
+ 'h1',
175
+ 'h5',
176
+ 'h6',
177
+ ];
178
+ let name = '';
179
+ for (const sel of nameSelectors) {
180
+ const candidate = $el.find(sel).first().text().trim();
181
+ if (candidate && candidate.toLowerCase() !== 'plan') {
182
+ name = candidate;
183
+ break;
184
+ }
185
+ }
186
+ if (!name)
187
+ name = 'Plan';
188
+ // Extract price
189
+ const priceMatch = text.match(/(\$|€|£|free)\s*[\d,]+(\.\d+)?/i);
190
+ if (!priceMatch && !FREE_PLAN.test(text))
191
+ return; // Skip non-price containers
192
+ const price = FREE_PLAN.test(text) && !priceMatch ? 'Free' : (priceMatch?.[0] ?? '');
193
+ // Extract period
194
+ const periodMatch = text.match(/\/(mo(nth)?|yr|year|week|day|annual)/i);
195
+ const period = periodMatch ? periodMatch[0] : undefined;
196
+ // Extract features from lists
197
+ const features = [];
198
+ $el.find('li').each((_, li) => {
199
+ const featureText = $(li).text().trim();
200
+ if (featureText && featureText.length < 200) {
201
+ features.push(featureText);
202
+ }
203
+ });
204
+ // Extract CTA button
205
+ const ctaEl = $el
206
+ .find('a, button')
207
+ .filter((_, btn) => /get started|sign up|buy|subscribe|choose|select|try|start|upgrade/i.test($(btn).text()))
208
+ .first();
209
+ const cta = ctaEl.text().trim() || undefined;
210
+ if (name || price) {
211
+ plans.push({ name, price, period, features, cta });
212
+ }
213
+ }
214
+ catch (e) {
215
+ if (process.env.DEBUG)
216
+ console.debug('[webpeel]', 'pricing plan parse failed:', e instanceof Error ? e.message : e);
217
+ }
218
+ });
219
+ return deduplicatePlans(plans);
220
+ }
221
+ function parsePricingFromText($) {
222
+ // Fallback: find all price-like elements and group them
223
+ const plans = [];
224
+ const bodyText = getBodyText($);
225
+ const priceRegex = /(\$|€|£)\s*(\d+(?:\.\d+)?)\s*(?:\/(mo(?:nth)?|yr|year|week|annual))?/gi;
226
+ let match;
227
+ const foundPrices = [];
228
+ while ((match = priceRegex.exec(bodyText)) !== null) {
229
+ foundPrices.push(match[0]);
230
+ }
231
+ // Simple heuristic: each unique price = 1 plan
232
+ // Try to find plan names from headings near price text
233
+ const headings = [];
234
+ $('h1, h2, h3, h4').each((_, el) => {
235
+ const text = $(el).text().trim();
236
+ if (text && text.toLowerCase() !== 'plan' && text.length < 60)
237
+ headings.push(text);
238
+ });
239
+ const uniquePrices = [...new Set(foundPrices)];
240
+ for (let i = 0; i < uniquePrices.length; i++) {
241
+ const p = uniquePrices[i];
242
+ const name = headings[i] || 'Plan';
243
+ plans.push({ name, price: p, features: [] });
244
+ }
245
+ return plans;
246
+ }
247
+ function deduplicatePlans(plans) {
248
+ const seen = new Set();
249
+ return plans.filter((p) => {
250
+ const key = `${p.name}|${p.price}`;
251
+ if (seen.has(key))
252
+ return false;
253
+ seen.add(key);
254
+ return true;
255
+ });
256
+ }
257
+ // ---------------------------------------------------------------------------
258
+ // Products extractor
259
+ // ---------------------------------------------------------------------------
260
+ function extractProducts($, baseUrl) {
261
+ const items = [];
262
+ const origin = (() => {
263
+ try {
264
+ return new URL(baseUrl).origin;
265
+ }
266
+ catch {
267
+ return '';
268
+ }
269
+ })();
270
+ const containerSelectors = [
271
+ '[class*="product"]',
272
+ '[class*="item"]',
273
+ '[class*="card"]',
274
+ 'li',
275
+ 'article',
276
+ ];
277
+ let containers = null;
278
+ for (const sel of containerSelectors) {
279
+ const found = $(sel).filter((_, el) => {
280
+ const text = $(el).text();
281
+ return (PRICE_INLINE.test(text) || FREE_PLAN.test(text)) && $(el).find('img').length > 0;
282
+ });
283
+ if (found.length >= 2) {
284
+ containers = found;
285
+ break;
286
+ }
287
+ }
288
+ if (!containers || containers.length === 0)
289
+ return items;
290
+ containers.each((_, el) => {
291
+ try {
292
+ const $el = $(el);
293
+ // Name
294
+ const nameEl = $el.find('h1,h2,h3,h4,h5,h6,[class*="name"],[class*="title"]').first();
295
+ const name = nameEl.text().trim();
296
+ if (!name)
297
+ return;
298
+ // Price
299
+ const priceMatch = $el.text().match(/(\$|€|£)\s*[\d,]+(\.\d+)?/);
300
+ const price = priceMatch ? priceMatch[0].trim() : undefined;
301
+ // Image
302
+ const imgEl = $el.find('img').first();
303
+ const imgSrc = imgEl.attr('src') || imgEl.attr('data-src') || imgEl.attr('data-lazy');
304
+ const image = imgSrc
305
+ ? imgSrc.startsWith('http')
306
+ ? imgSrc
307
+ : `${origin}${imgSrc.startsWith('/') ? '' : '/'}${imgSrc}`
308
+ : undefined;
309
+ // URL
310
+ const linkEl = $el.find('a').first();
311
+ const href = linkEl.attr('href');
312
+ const url = href
313
+ ? href.startsWith('http')
314
+ ? href
315
+ : `${origin}${href.startsWith('/') ? '' : '/'}${href}`
316
+ : undefined;
317
+ // Rating
318
+ const ratingMatch = $el.text().match(/(\d(\.\d)?)\s*(\/\s*5|stars?|★)/i);
319
+ const rating = ratingMatch ? `${ratingMatch[1]}/5` : undefined;
320
+ items.push({ name, price, image, url, rating });
321
+ }
322
+ catch (e) {
323
+ if (process.env.DEBUG)
324
+ console.debug('[webpeel]', 'product item parse failed:', e instanceof Error ? e.message : e);
325
+ }
326
+ });
327
+ return items.slice(0, 100); // cap at 100
328
+ }
329
+ // ---------------------------------------------------------------------------
330
+ // Contact extractor
331
+ // ---------------------------------------------------------------------------
332
+ const SOCIAL_DOMAINS = {
333
+ 'twitter.com': 'twitter',
334
+ 'x.com': 'twitter',
335
+ 'linkedin.com': 'linkedin',
336
+ 'github.com': 'github',
337
+ 'facebook.com': 'facebook',
338
+ 'instagram.com': 'instagram',
339
+ 'youtube.com': 'youtube',
340
+ 'tiktok.com': 'tiktok',
341
+ 'discord.gg': 'discord',
342
+ 'discord.com': 'discord',
343
+ };
344
+ const ADDRESS_PATTERN = /\d{1,5}\s+[A-Za-z0-9\s,\.]+(?:street|st|avenue|ave|road|rd|blvd|boulevard|lane|ln|drive|dr|court|ct|way|wy|place|pl)\b[^<\n]{0,80}/i;
345
+ function extractContact($) {
346
+ const bodyText = getBodyText($);
347
+ // Emails
348
+ const emailMatches = bodyText.match(EMAIL_PATTERN) || [];
349
+ const emails = [
350
+ ...new Set(emailMatches.map((e) => e.toLowerCase())),
351
+ ];
352
+ // Phones
353
+ const phoneMatches = bodyText.match(PHONE_PATTERN) || [];
354
+ const phones = [...new Set(phoneMatches.map((p) => p.trim()))];
355
+ // Addresses
356
+ const addresses = [];
357
+ $('[class*="address"], [itemprop="address"], address').each((_, el) => {
358
+ const addr = $(el).text().replace(/\s+/g, ' ').trim();
359
+ if (addr.length > 10)
360
+ addresses.push(addr);
361
+ });
362
+ // Also regex-based
363
+ const addrMatch = bodyText.match(ADDRESS_PATTERN);
364
+ if (addrMatch) {
365
+ const addr = addrMatch[0].trim();
366
+ if (!addresses.some((a) => a.includes(addr.substring(0, 10)))) {
367
+ addresses.push(addr);
368
+ }
369
+ }
370
+ // Social links
371
+ const social = {};
372
+ $('a[href]').each((_, el) => {
373
+ const href = $(el).attr('href') || '';
374
+ for (const [domain, key] of Object.entries(SOCIAL_DOMAINS)) {
375
+ if (href.includes(domain) && !social[key]) {
376
+ social[key] = href;
377
+ }
378
+ }
379
+ });
380
+ return { type: 'contact', emails, phones, addresses, social };
381
+ }
382
+ // ---------------------------------------------------------------------------
383
+ // Article extractor
384
+ // ---------------------------------------------------------------------------
385
+ function extractArticle($) {
386
+ // Title
387
+ const title = $('h1').first().text().trim() ||
388
+ $('meta[property="og:title"]').attr('content') ||
389
+ $('title').text().trim() ||
390
+ undefined;
391
+ // Author
392
+ const author = $('meta[name="author"]').attr('content') ||
393
+ $('[itemprop="author"]').first().text().trim() ||
394
+ $('[class*="author"]').first().text().trim() ||
395
+ $('[rel="author"]').first().text().trim() ||
396
+ undefined;
397
+ // Date
398
+ const date = $('time[datetime]').first().attr('datetime') ||
399
+ $('time[pubdate]').first().attr('datetime') ||
400
+ $('meta[name="date"]').attr('content') ||
401
+ $('meta[property="article:published_time"]').attr('content') ||
402
+ $('time').first().text().trim() ||
403
+ undefined;
404
+ // Reading time
405
+ const readingTimeEl = $('[class*="reading-time"], [class*="read-time"], [class*="readtime"]').first();
406
+ const readingTime = readingTimeEl.length ? readingTimeEl.text().trim() : estimateReadingTime($);
407
+ // Summary (first 2 sentences of article content)
408
+ const articleEl = $('article').first();
409
+ const contentEl = articleEl.length ? articleEl : $('main').first();
410
+ const firstPara = contentEl.find('p').first().text().trim() ||
411
+ $('meta[name="description"]').attr('content') ||
412
+ $('meta[property="og:description"]').attr('content') ||
413
+ '';
414
+ const summary = firstPara ? extractFirstSentences(firstPara, 2) : undefined;
415
+ // Sections: h2/h3 + following content
416
+ const sections = [];
417
+ const headings = contentEl.find('h2, h3');
418
+ headings.each((_, el) => {
419
+ const heading = $(el).text().trim();
420
+ if (!heading)
421
+ return;
422
+ // Gather text of next sibling elements until next heading
423
+ const contentParts = [];
424
+ let sibling = $(el).next();
425
+ while (sibling.length && !sibling.is('h2, h3')) {
426
+ const text = sibling.text().trim();
427
+ if (text)
428
+ contentParts.push(text);
429
+ sibling = sibling.next();
430
+ }
431
+ if (contentParts.length > 0) {
432
+ sections.push({ heading, content: contentParts.join(' ') });
433
+ }
434
+ });
435
+ return { type: 'article', title, author, date, readingTime, summary, sections };
436
+ }
437
+ function extractFirstSentences(text, count) {
438
+ const sentenceEnd = /[.!?]+\s+/g;
439
+ let match;
440
+ let lastIndex = 0;
441
+ let sentenceCount = 0;
442
+ while ((match = sentenceEnd.exec(text)) !== null) {
443
+ lastIndex = match.index + match[0].length;
444
+ sentenceCount++;
445
+ if (sentenceCount >= count)
446
+ break;
447
+ }
448
+ return sentenceCount > 0 ? text.slice(0, lastIndex).trim() : text.slice(0, 300).trim();
449
+ }
450
+ function estimateReadingTime($) {
451
+ const wordsPerMinute = 200;
452
+ const text = $('article, main, [class*="content"], body').first().text();
453
+ const wordCount = text.split(/\s+/).filter(Boolean).length;
454
+ const minutes = Math.max(1, Math.ceil(wordCount / wordsPerMinute));
455
+ return `${minutes} min`;
456
+ }
457
+ // ---------------------------------------------------------------------------
458
+ // API docs extractor
459
+ // ---------------------------------------------------------------------------
460
+ const HTTP_METHODS = ['GET', 'POST', 'PUT', 'PATCH', 'DELETE', 'HEAD', 'OPTIONS'];
461
+ function extractApiDocs($, url) {
462
+ const endpoints = [];
463
+ // Try to detect base URL from page or URL
464
+ let baseUrl;
465
+ const pageText = getBodyText($);
466
+ const baseUrlMatch = pageText.match(/https?:\/\/api\.[a-zA-Z0-9.-]+/);
467
+ if (baseUrlMatch) {
468
+ baseUrl = baseUrlMatch[0];
469
+ }
470
+ else {
471
+ try {
472
+ const parsed = new URL(url);
473
+ baseUrl = `${parsed.protocol}//api.${parsed.hostname}`;
474
+ }
475
+ catch {
476
+ baseUrl = undefined;
477
+ }
478
+ }
479
+ // Strategy 1: Parse code blocks for HTTP method + path patterns
480
+ $('code, pre').each((_, el) => {
481
+ const text = $(el).text().trim();
482
+ const lines = text.split(/\n/);
483
+ for (const line of lines) {
484
+ const trimmed = line.trim();
485
+ for (const method of HTTP_METHODS) {
486
+ if (trimmed.startsWith(method + ' ') || trimmed.startsWith(method + '\t')) {
487
+ const rest = trimmed.slice(method.length).trim();
488
+ // Extract path (first URL-like token)
489
+ const pathMatch = rest.match(/^(https?:\/\/[^\s]+|\/[^\s]*)/);
490
+ if (pathMatch) {
491
+ let path = pathMatch[0];
492
+ // Normalize: strip base URL prefix if present
493
+ if (baseUrl && path.startsWith(baseUrl)) {
494
+ path = path.slice(baseUrl.length);
495
+ }
496
+ // Strip query string
497
+ path = path.split('?')[0];
498
+ // Try to find a description — look at nearest heading above this code block
499
+ const description = findNearestHeading($(el)) || undefined;
500
+ endpoints.push({ method, path, description });
501
+ }
502
+ }
503
+ }
504
+ }
505
+ });
506
+ // Strategy 2: Scan for method badges + inline paths in regular text
507
+ $('[class*="method"], [class*="http-method"], .badge, .label').each((_, el) => {
508
+ const methodText = $(el).text().trim().toUpperCase();
509
+ if (!HTTP_METHODS.includes(methodText))
510
+ return;
511
+ // Look for adjacent path element
512
+ const siblings = [
513
+ $(el).next('[class*="path"], [class*="endpoint"], [class*="route"], code'),
514
+ $(el).parent().find('code').first(),
515
+ ];
516
+ for (const sibling of siblings) {
517
+ if (sibling.length) {
518
+ const path = sibling.text().trim();
519
+ if (URL_PATH_PATTERN.test(path)) {
520
+ endpoints.push({ method: methodText, path });
521
+ break;
522
+ }
523
+ }
524
+ }
525
+ });
526
+ // Deduplicate by method+path
527
+ const seen = new Set();
528
+ const unique = endpoints.filter((ep) => {
529
+ const key = `${ep.method}:${ep.path}`;
530
+ if (seen.has(key))
531
+ return false;
532
+ seen.add(key);
533
+ return true;
534
+ });
535
+ return { type: 'api_docs', baseUrl, endpoints: unique };
536
+ }
537
+ function findNearestHeading($el) {
538
+ // Walk backwards through siblings/parents to find closest heading
539
+ let current = $el.prev();
540
+ let depth = 0;
541
+ while (depth < 5) {
542
+ if (current.length === 0) {
543
+ const parent = $el.parent();
544
+ if (!parent.length)
545
+ break;
546
+ current = parent.prev();
547
+ }
548
+ else if (current.is('h1,h2,h3,h4,h5,h6')) {
549
+ return current.text().trim();
550
+ }
551
+ else {
552
+ current = current.prev();
553
+ }
554
+ depth++;
555
+ }
556
+ return null;
557
+ }
558
+ // ---------------------------------------------------------------------------
559
+ // Main entry points
560
+ // ---------------------------------------------------------------------------
561
+ /**
562
+ * Detect the type of a web page based on HTML content and URL.
563
+ */
564
+ export { detectPageType as default };
565
+ /**
566
+ * Auto-extract structured data from a web page without an LLM API key.
567
+ */
568
+ export function autoExtract(html, url) {
569
+ const type = detectPageType(html, url);
570
+ const $ = load(html);
571
+ try {
572
+ switch (type) {
573
+ case 'pricing':
574
+ return { type: 'pricing', plans: extractPricingPlans($) };
575
+ case 'products':
576
+ return { type: 'products', items: extractProducts($, url) };
577
+ case 'contact':
578
+ return extractContact($);
579
+ case 'article':
580
+ return extractArticle($);
581
+ case 'api_docs':
582
+ return extractApiDocs($, url);
583
+ default:
584
+ return { type: 'unknown' };
585
+ }
586
+ }
587
+ catch {
588
+ // Return partial/empty result rather than crashing
589
+ switch (type) {
590
+ case 'pricing':
591
+ return { type: 'pricing', plans: [] };
592
+ case 'products':
593
+ return { type: 'products', items: [] };
594
+ case 'contact':
595
+ return { type: 'contact', emails: [], phones: [], addresses: [], social: {} };
596
+ case 'article':
597
+ return { type: 'article', sections: [] };
598
+ case 'api_docs':
599
+ return { type: 'api_docs', endpoints: [] };
600
+ default:
601
+ return { type: 'unknown' };
602
+ }
603
+ }
604
+ }
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Auto-interact: automatically dismiss cookie banners, consent popups,
3
+ * overlay modals, and optionally click "load more" / "show all" buttons.
4
+ *
5
+ * Runs after page.goto() and before content extraction.
6
+ * Never blocks extraction — each interaction has a tight timeout.
7
+ * Total budget: 3s max.
8
+ */
9
+ import type { Page } from 'playwright';
10
+ export interface AutoInteractResult {
11
+ cookieBannerDismissed: boolean;
12
+ consentHandled: boolean;
13
+ loadMoreClicked: number;
14
+ overlaysDismissed: number;
15
+ }
16
+ /**
17
+ * Automatically interact with the page to dismiss common UI overlays before
18
+ * content extraction. Never throws — all errors are swallowed.
19
+ *
20
+ * @param page - Playwright page (already navigated)
21
+ * @returns Summary of what was dismissed
22
+ */
23
+ export declare function autoInteract(page: Page): Promise<AutoInteractResult>;