@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,324 @@
1
+ /**
2
+ * Smart content distillation for WebPeel
3
+ *
4
+ * Intelligently compresses content to fit within a token budget using
5
+ * heuristic-based techniques — no LLM required.
6
+ *
7
+ * This is NOT simple truncation: it prioritises information-dense content
8
+ * and progressively removes lower-value sections while preserving structure.
9
+ *
10
+ * @module budget
11
+ */
12
+ import { estimateTokens } from './markdown.js';
13
+ /* ------------------------------------------------------------------ */
14
+ /* Constants */
15
+ /* ------------------------------------------------------------------ */
16
+ /**
17
+ * Heading patterns that indicate low-value boilerplate sections.
18
+ * When a heading matches, its entire section is removed.
19
+ */
20
+ const BOILERPLATE_HEADING_PATTERNS = [
21
+ /^#{1,3}\s*(cookie(s| notice| policy| banner| consent)?|privacy( policy)?|terms( of (use|service))?|disclaimer|copyright)/i,
22
+ /^#{1,3}\s*(about us|contact( us)?|subscribe|newsletter|follow us|social media)/i,
23
+ /^#{1,3}\s*(related posts?|you may also|more from|popular posts?|trending|recent posts?)/i,
24
+ /^#{1,3}\s*(comments?|leave a (comment|reply)|tags?|categories?|share this)/i,
25
+ /^#{1,3}\s*(table of contents?|toc|index)/i,
26
+ /^#{1,3}\s*(advertisement|sponsored|promoted|ad(s| section)?)/i,
27
+ /^#{1,3}\s*(navigation|menu|sidebar|footer|header)/i,
28
+ /^#{1,3}\s*(sign[\s-]*up|log[\s-]*in|register|create( an)? account|get started)/i,
29
+ ];
30
+ /** Maximum data rows to keep when compressing a markdown table */
31
+ const MAX_TABLE_ROWS = 3;
32
+ /** Tokens per listing item used for budget estimation in extract-all mode */
33
+ export const TOKENS_PER_LISTING_ITEM = 50;
34
+ /* ------------------------------------------------------------------ */
35
+ /* Public API */
36
+ /* ------------------------------------------------------------------ */
37
+ /**
38
+ * Distill content to fit within a token budget using smart compression.
39
+ *
40
+ * Strategy (applied progressively until within budget):
41
+ * 1. Remove image markdown — ![alt](url) → keep meaningful alt text
42
+ * 2. Remove boilerplate sections (cookie banners, nav headings, etc.)
43
+ * 3. Compress tables to MAX_TABLE_ROWS data rows
44
+ * 4. Collapse redundant whitespace
45
+ * 5. Remove low information-density paragraphs
46
+ * 6. Hard-truncate with notice as last resort
47
+ *
48
+ * @param content The content string to distill
49
+ * @param budget Maximum token budget (rough: 1 token ≈ 4 chars)
50
+ * @param format Content format: 'markdown' | 'text' | 'json'
51
+ * @returns Distilled content within the budget
52
+ */
53
+ export function distillToBudget(content, budget, format) {
54
+ if (!content || budget <= 0)
55
+ return content;
56
+ if (estimateTokens(content) <= budget)
57
+ return content;
58
+ if (format === 'json') {
59
+ return distillJson(content, budget);
60
+ }
61
+ return distillMarkdown(content, budget);
62
+ }
63
+ /**
64
+ * Calculate how many listing items fit within a token budget.
65
+ *
66
+ * @param totalItems Total available items
67
+ * @param budget Token budget
68
+ * @returns { maxItems, truncated, totalAvailable }
69
+ */
70
+ export function budgetListings(totalItems, budget) {
71
+ const maxItems = Math.max(1, Math.floor(budget / TOKENS_PER_LISTING_ITEM));
72
+ const truncated = maxItems < totalItems;
73
+ return {
74
+ maxItems: truncated ? maxItems : totalItems,
75
+ truncated,
76
+ totalAvailable: totalItems,
77
+ };
78
+ }
79
+ /* ------------------------------------------------------------------ */
80
+ /* Markdown / text distillation */
81
+ /* ------------------------------------------------------------------ */
82
+ function distillMarkdown(content, budget) {
83
+ let result = content;
84
+ // Step 1: Remove decorative images (minimal info loss)
85
+ if (estimateTokens(result) > budget) {
86
+ result = removeImages(result);
87
+ }
88
+ // Step 2: Remove boilerplate sections
89
+ if (estimateTokens(result) > budget) {
90
+ result = removeBoilerplateSections(result);
91
+ }
92
+ // Step 3: Compress wide tables
93
+ if (estimateTokens(result) > budget) {
94
+ result = compressTables(result);
95
+ }
96
+ // Step 4: Collapse redundant whitespace
97
+ if (estimateTokens(result) > budget) {
98
+ result = compressWhitespace(result);
99
+ }
100
+ // Step 5: Remove low-density paragraphs
101
+ if (estimateTokens(result) > budget) {
102
+ result = removeWeakParagraphs(result, budget);
103
+ }
104
+ // Step 6: Hard-truncate with notice as last resort
105
+ if (estimateTokens(result) > budget) {
106
+ result = hardTruncate(result, budget);
107
+ }
108
+ return result.trim();
109
+ }
110
+ /**
111
+ * Remove image markdown — replace informative alt text, drop decorative images.
112
+ */
113
+ function removeImages(content) {
114
+ return content
115
+ .replace(/!\[([^\]]*)\]\([^)]+\)/g, (_, alt) => {
116
+ const a = alt.trim();
117
+ // Keep short, descriptive alt text as a text label
118
+ return a.length > 0 && a.length < 60 ? `[Image: ${a}]` : '';
119
+ })
120
+ // Clean up empty image labels that remain
121
+ .replace(/\[Image: \]\s*/g, '');
122
+ }
123
+ /**
124
+ * Remove boilerplate sections by matching heading patterns.
125
+ *
126
+ * When a boilerplate heading is found, everything up to (but not including)
127
+ * the next heading of equal or higher importance is removed.
128
+ */
129
+ function removeBoilerplateSections(content) {
130
+ const lines = content.split('\n');
131
+ const result = [];
132
+ let skipping = false;
133
+ let skipDepth = 0;
134
+ for (const line of lines) {
135
+ const headingMatch = line.match(/^(#{1,6})\s/);
136
+ if (headingMatch) {
137
+ const depth = headingMatch[1].length;
138
+ // Stop skipping when we encounter a heading of equal or higher priority
139
+ if (skipping && depth <= skipDepth) {
140
+ skipping = false;
141
+ }
142
+ // Check if this heading starts a boilerplate section
143
+ if (!skipping && BOILERPLATE_HEADING_PATTERNS.some(p => p.test(line))) {
144
+ skipping = true;
145
+ skipDepth = depth;
146
+ continue;
147
+ }
148
+ }
149
+ if (!skipping) {
150
+ result.push(line);
151
+ }
152
+ }
153
+ return result.join('\n');
154
+ }
155
+ /**
156
+ * Compress markdown tables to MAX_TABLE_ROWS data rows + header + separator.
157
+ */
158
+ function compressTables(content) {
159
+ const lines = content.split('\n');
160
+ const result = [];
161
+ let inTable = false;
162
+ let headerDone = false;
163
+ let separatorDone = false;
164
+ let dataRows = 0;
165
+ let truncatedNote = false;
166
+ for (const line of lines) {
167
+ const trimmed = line.trim();
168
+ const isTableRow = trimmed.startsWith('|') && trimmed.endsWith('|');
169
+ const isSeparator = isTableRow && /^\|[\s|:-]+\|$/.test(trimmed);
170
+ if (isTableRow) {
171
+ if (!inTable) {
172
+ // New table begins
173
+ inTable = true;
174
+ headerDone = false;
175
+ separatorDone = false;
176
+ dataRows = 0;
177
+ truncatedNote = false;
178
+ }
179
+ if (!headerDone) {
180
+ result.push(line);
181
+ headerDone = true;
182
+ }
183
+ else if (isSeparator && !separatorDone) {
184
+ result.push(line);
185
+ separatorDone = true;
186
+ }
187
+ else if (!isSeparator) {
188
+ if (dataRows < MAX_TABLE_ROWS) {
189
+ result.push(line);
190
+ dataRows++;
191
+ }
192
+ else if (!truncatedNote) {
193
+ result.push(`| ... | *(${MAX_TABLE_ROWS}+ rows — additional rows omitted)* | ... |`);
194
+ truncatedNote = true;
195
+ }
196
+ // Further rows silently dropped
197
+ }
198
+ }
199
+ else {
200
+ inTable = false;
201
+ result.push(line);
202
+ }
203
+ }
204
+ return result.join('\n');
205
+ }
206
+ /**
207
+ * Collapse runs of 3+ blank lines to a single blank line.
208
+ */
209
+ function compressWhitespace(content) {
210
+ return content.replace(/\n{3,}/g, '\n\n');
211
+ }
212
+ /**
213
+ * Remove paragraphs scored as low information-density until within budget.
214
+ *
215
+ * Scoring heuristics:
216
+ * - Word count is the base score
217
+ * - Very short paragraphs (< 50 chars) are heavily penalised
218
+ * - Unusual avg word length penalised (nav menus, link lists)
219
+ * - Long bullet lists scored slightly lower
220
+ * - Headings and code blocks are never removed
221
+ */
222
+ function removeWeakParagraphs(content, budget) {
223
+ const paragraphs = content.split('\n\n');
224
+ const scored = paragraphs.map((para, i) => {
225
+ const trimmed = para.trim();
226
+ const isHeading = /^#{1,6}\s/.test(trimmed);
227
+ const isCodeBlock = trimmed.startsWith('```');
228
+ const isHtmlComment = trimmed.startsWith('<!--');
229
+ // Never remove structural elements
230
+ if (isHeading || isCodeBlock || isHtmlComment) {
231
+ return { para, score: Number.MAX_SAFE_INTEGER, i };
232
+ }
233
+ // Strip markdown formatting for text analysis
234
+ const textOnly = trimmed.replace(/[#*_\[\]\(\)\-`|>~]/g, '');
235
+ const words = textOnly.split(/\s+/).filter(w => w.length > 0);
236
+ let score = words.length;
237
+ // Heavily penalise very short paragraphs (likely nav labels / single words)
238
+ if (textOnly.length < 50)
239
+ score *= 0.15;
240
+ // Penalise unusual avg word lengths (short = icon labels, long = data URIs)
241
+ const avgWordLen = words.length > 0 ? textOnly.length / words.length : 0;
242
+ if (avgWordLen < 3 || avgWordLen > 15)
243
+ score *= 0.4;
244
+ // Slightly penalise long bullet lists (repetitive structure)
245
+ const lines = trimmed.split('\n');
246
+ const bulletLines = lines.filter(l => /^[-*]\s/.test(l.trim()));
247
+ if (bulletLines.length > 3 && bulletLines.length === lines.length) {
248
+ score *= 0.7;
249
+ }
250
+ return { para, score, i };
251
+ });
252
+ // Sort ascending — weakest paragraphs first
253
+ const byScore = [...scored].sort((a, b) => a.score - b.score);
254
+ const removed = new Set();
255
+ let current = content;
256
+ for (const item of byScore) {
257
+ if (estimateTokens(current) <= budget)
258
+ break;
259
+ // Don't remove paragraphs with reasonable content
260
+ if (item.score >= 8)
261
+ break;
262
+ removed.add(item.i);
263
+ current = scored
264
+ .filter(s => !removed.has(s.i))
265
+ .map(s => s.para)
266
+ .join('\n\n');
267
+ }
268
+ return current;
269
+ }
270
+ /**
271
+ * Hard-truncate at a clean line boundary, appending a notice.
272
+ * Used only as the last resort after all other compression steps fail.
273
+ */
274
+ function hardTruncate(content, budget) {
275
+ // Leave ~15 tokens for the truncation notice
276
+ const maxChars = Math.max((budget - 15) * 4, 0);
277
+ if (content.length <= maxChars)
278
+ return content;
279
+ // Find the last newline before the character limit
280
+ let cut = maxChars;
281
+ while (cut > 0 && content[cut] !== '\n')
282
+ cut--;
283
+ if (cut === 0)
284
+ cut = maxChars; // No newline found — hard cut
285
+ return content.slice(0, cut).trimEnd() + '\n\n[Content distilled to fit budget]';
286
+ }
287
+ /* ------------------------------------------------------------------ */
288
+ /* JSON distillation */
289
+ /* ------------------------------------------------------------------ */
290
+ /**
291
+ * Distill JSON content:
292
+ * - Arrays: binary-search for the maximum number of items that fit
293
+ * - Objects: fall back to text truncation
294
+ */
295
+ function distillJson(content, budget) {
296
+ try {
297
+ const parsed = JSON.parse(content);
298
+ if (Array.isArray(parsed)) {
299
+ // Binary search for max items that fit within budget
300
+ let lo = 0;
301
+ let hi = parsed.length;
302
+ while (lo < hi) {
303
+ const mid = Math.ceil((lo + hi) / 2);
304
+ const slice = parsed.slice(0, mid);
305
+ if (estimateTokens(JSON.stringify(slice, null, 2)) <= budget) {
306
+ lo = mid;
307
+ }
308
+ else {
309
+ hi = mid - 1;
310
+ }
311
+ }
312
+ return JSON.stringify(parsed.slice(0, lo), null, 2);
313
+ }
314
+ // Non-array JSON — fall back to text truncation
315
+ const str = JSON.stringify(parsed, null, 2);
316
+ if (estimateTokens(str) <= budget)
317
+ return str;
318
+ return hardTruncate(str, budget);
319
+ }
320
+ catch {
321
+ // Invalid JSON — treat as plain text
322
+ return hardTruncate(content, budget);
323
+ }
324
+ }
@@ -0,0 +1,47 @@
1
+ /**
2
+ * business-intel.ts — Extract structured business intelligence from a URL.
3
+ *
4
+ * Uses peel() to fetch the website, then extracts:
5
+ * - Name, description, industry from schema.org + OG tags
6
+ * - Products and pricing from /pricing and /plans pages
7
+ * - Tech stack from headers and script patterns
8
+ * - Social media links
9
+ * - Review aggregates
10
+ */
11
+ export interface BusinessIntel {
12
+ name?: string;
13
+ description?: string;
14
+ industry?: string;
15
+ products?: string[];
16
+ pricing?: {
17
+ plan: string;
18
+ price: string;
19
+ }[];
20
+ reviews?: {
21
+ source: string;
22
+ rating: number;
23
+ count: number;
24
+ }[];
25
+ socialMedia?: {
26
+ platform: string;
27
+ url: string;
28
+ }[];
29
+ techStack?: string[];
30
+ employees?: string;
31
+ founded?: string;
32
+ }
33
+ /**
34
+ * Extract structured business intelligence from a website URL.
35
+ *
36
+ * Fetches the homepage and optionally the /pricing page, then extracts
37
+ * structured data including tech stack, social media, pricing, and more.
38
+ *
39
+ * @example
40
+ * ```typescript
41
+ * const intel = await getBusinessIntel('https://stripe.com');
42
+ * console.log(intel.name); // "Stripe"
43
+ * console.log(intel.techStack); // ["React", "Cloudflare", ...]
44
+ * console.log(intel.pricing); // [{plan: "Starter", price: "$0"}, ...]
45
+ * ```
46
+ */
47
+ export declare function getBusinessIntel(url: string): Promise<BusinessIntel>;
@@ -0,0 +1,279 @@
1
+ /**
2
+ * business-intel.ts — Extract structured business intelligence from a URL.
3
+ *
4
+ * Uses peel() to fetch the website, then extracts:
5
+ * - Name, description, industry from schema.org + OG tags
6
+ * - Products and pricing from /pricing and /plans pages
7
+ * - Tech stack from headers and script patterns
8
+ * - Social media links
9
+ * - Review aggregates
10
+ */
11
+ import { peel } from '../index.js';
12
+ // ─── Tech stack detectors ─────────────────────────────────────────────────
13
+ const TECH_PATTERNS = [
14
+ // Frontend frameworks
15
+ { name: 'React', pattern: /react(?:\.min)?\.js|__reactFiber|react-dom/i, type: 'script' },
16
+ { name: 'Vue.js', pattern: /vue(?:\.min)?\.js|Vue\.component|__vue_/i, type: 'script' },
17
+ { name: 'Angular', pattern: /angular(?:\.min)?\.js|ng-version|ng-app/i, type: 'script' },
18
+ { name: 'Next.js', pattern: /__NEXT_DATA__|next\/dist\/|_next\/static/i, type: 'script' },
19
+ { name: 'Nuxt.js', pattern: /__NUXT__|_nuxt\/|nuxtjs\.org/i, type: 'script' },
20
+ { name: 'Svelte', pattern: /svelte\/internal|SvelteComponent/i, type: 'script' },
21
+ // E-commerce & CMS
22
+ { name: 'Shopify', pattern: /shopify\.com|Shopify\.theme|cdn\.shopify/i, type: 'script' },
23
+ { name: 'WordPress', pattern: /wp-content\/|wp-includes\/|WordPress/i, type: 'script' },
24
+ { name: 'Webflow', pattern: /webflow\.com|Webflow\.require/i, type: 'script' },
25
+ { name: 'Squarespace', pattern: /squarespace\.com|SQUARESPACE_ROLLUPS/i, type: 'script' },
26
+ { name: 'Wix', pattern: /wix\.com|wixstatic\.com/i, type: 'script' },
27
+ // Analytics & marketing
28
+ { name: 'Google Analytics', pattern: /google-analytics\.com|gtag\(|ga\('send/i, type: 'script' },
29
+ { name: 'Segment', pattern: /segment\.com|analytics\.identify/i, type: 'script' },
30
+ { name: 'Mixpanel', pattern: /mixpanel\.com|mixpanel\.track/i, type: 'script' },
31
+ { name: 'Intercom', pattern: /intercom\.io|window\.Intercom/i, type: 'script' },
32
+ { name: 'Hubspot', pattern: /hubspot\.com|hs-scripts\.com/i, type: 'script' },
33
+ { name: 'Stripe', pattern: /js\.stripe\.com|Stripe\(/i, type: 'script' },
34
+ // Server / infrastructure (detected via headers)
35
+ { name: 'Vercel', pattern: /vercel/i, type: 'header' },
36
+ { name: 'Netlify', pattern: /netlify/i, type: 'header' },
37
+ { name: 'Cloudflare', pattern: /cloudflare/i, type: 'header' },
38
+ { name: 'AWS', pattern: /amazonaws\.com|x-amz-/i, type: 'header' },
39
+ { name: 'Nginx', pattern: /nginx/i, type: 'header' },
40
+ { name: 'Apache', pattern: /apache/i, type: 'header' },
41
+ ];
42
+ const SOCIAL_PATTERNS = [
43
+ { platform: 'Twitter/X', pattern: /(?:twitter\.com|x\.com)\/([^/"?\s]+)/i },
44
+ { platform: 'LinkedIn', pattern: /linkedin\.com\/(?:company|in)\/([^/"?\s]+)/i },
45
+ { platform: 'Facebook', pattern: /facebook\.com\/([^/"?\s]+)/i },
46
+ { platform: 'Instagram', pattern: /instagram\.com\/([^/"?\s]+)/i },
47
+ { platform: 'YouTube', pattern: /youtube\.com\/(?:channel|c|@)\/([^/"?\s]+)/i },
48
+ { platform: 'GitHub', pattern: /github\.com\/([^/"?\s]+)/i },
49
+ { platform: 'Discord', pattern: /discord\.(?:gg|com\/invite)\/([^/"?\s]+)/i },
50
+ { platform: 'TikTok', pattern: /tiktok\.com\/@([^/"?\s]+)/i },
51
+ ];
52
+ const INDUSTRY_KEYWORDS = {
53
+ 'SaaS / Software': ['software', 'saas', 'platform', 'api', 'developer', 'cloud', 'app'],
54
+ 'E-commerce': ['shop', 'store', 'buy', 'cart', 'checkout', 'product', 'shipping'],
55
+ 'Finance / Fintech': ['payment', 'invoice', 'banking', 'crypto', 'invest', 'finance', 'loan'],
56
+ 'Healthcare': ['health', 'medical', 'patient', 'clinic', 'doctor', 'hospital', 'pharma'],
57
+ 'Education': ['course', 'learn', 'training', 'education', 'school', 'university', 'tutor'],
58
+ 'Marketing': ['marketing', 'seo', 'email campaign', 'crm', 'lead', 'analytics'],
59
+ 'AI / Machine Learning': ['ai', 'machine learning', 'nlp', 'model', 'inference', 'llm'],
60
+ 'Food & Restaurant': ['restaurant', 'food', 'menu', 'delivery', 'catering', 'dining'],
61
+ 'Travel': ['travel', 'hotel', 'flight', 'booking', 'tourism', 'vacation'],
62
+ 'Real Estate': ['real estate', 'property', 'mortgage', 'rent', 'apartment', 'lease'],
63
+ 'Media / Content': ['news', 'blog', 'podcast', 'video', 'streaming', 'media', 'content'],
64
+ };
65
+ function extractPricing(content) {
66
+ const plans = [];
67
+ const seen = new Set();
68
+ // Look for plan name + price in proximity
69
+ const lines = content.split('\n');
70
+ for (let i = 0; i < lines.length; i++) {
71
+ const line = lines[i];
72
+ const planMatch = line.match(/\b(free|starter|basic|pro|professional|business|enterprise|premium|growth|scale|team|individual|personal)\b/i);
73
+ if (!planMatch)
74
+ continue;
75
+ // Search nearby lines for a price
76
+ const context = lines.slice(Math.max(0, i - 1), Math.min(lines.length, i + 5)).join(' ');
77
+ const priceMatch = context.match(/\$([\d,]+(?:\.\d{2})?)/);
78
+ if (priceMatch) {
79
+ const key = `${planMatch[1].toLowerCase()}:${priceMatch[0]}`;
80
+ if (!seen.has(key)) {
81
+ seen.add(key);
82
+ plans.push({ plan: planMatch[1], price: priceMatch[0] });
83
+ }
84
+ }
85
+ else if (/\bfree\b/i.test(planMatch[1])) {
86
+ const key = `${planMatch[1].toLowerCase()}:$0`;
87
+ if (!seen.has(key)) {
88
+ seen.add(key);
89
+ plans.push({ plan: planMatch[1], price: '$0' });
90
+ }
91
+ }
92
+ }
93
+ return plans.slice(0, 8);
94
+ }
95
+ function detectTechStack(content, headers) {
96
+ const detected = new Set();
97
+ // Check headers
98
+ const headerString = Object.entries(headers)
99
+ .map(([k, v]) => `${k}: ${v}`)
100
+ .join('\n')
101
+ .toLowerCase();
102
+ // Check content (HTML/scripts)
103
+ for (const tech of TECH_PATTERNS) {
104
+ if (tech.type === 'header') {
105
+ if (tech.pattern.test(headerString))
106
+ detected.add(tech.name);
107
+ }
108
+ else {
109
+ if (tech.pattern.test(content))
110
+ detected.add(tech.name);
111
+ }
112
+ }
113
+ return [...detected].sort();
114
+ }
115
+ function extractSocialMedia(content) {
116
+ const found = [];
117
+ const seen = new Set();
118
+ for (const { platform, pattern } of SOCIAL_PATTERNS) {
119
+ const matches = content.matchAll(new RegExp(pattern.source, 'gi'));
120
+ for (const match of matches) {
121
+ const fullMatch = match[0];
122
+ if (!fullMatch.includes('share') && !fullMatch.includes('intent') && !seen.has(fullMatch)) {
123
+ seen.add(fullMatch);
124
+ // Build full URL
125
+ let url = fullMatch;
126
+ if (!url.startsWith('http'))
127
+ url = 'https://' + url;
128
+ found.push({ platform, url });
129
+ break; // one per platform
130
+ }
131
+ }
132
+ }
133
+ return found;
134
+ }
135
+ function detectIndustry(text) {
136
+ const lower = text.toLowerCase();
137
+ const scores = {};
138
+ for (const [industry, keywords] of Object.entries(INDUSTRY_KEYWORDS)) {
139
+ scores[industry] = keywords.filter(kw => lower.includes(kw)).length;
140
+ }
141
+ const best = Object.entries(scores)
142
+ .filter(([, score]) => score > 0)
143
+ .sort(([, a], [, b]) => b - a)[0];
144
+ return best ? best[0] : undefined;
145
+ }
146
+ function extractSchemaOrgData(content) {
147
+ const result = {};
148
+ // JSON-LD: look for Organization or LocalBusiness schema
149
+ const jsonLdMatch = content.match(/<script[^>]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/gi);
150
+ if (jsonLdMatch) {
151
+ for (const block of jsonLdMatch) {
152
+ try {
153
+ const json = JSON.parse(block.replace(/<script[^>]*>|<\/script>/gi, ''));
154
+ const items = Array.isArray(json) ? json : [json];
155
+ for (const item of items) {
156
+ if (!result.name && item.name)
157
+ result.name = item.name;
158
+ if (!result.description && item.description)
159
+ result.description = item.description;
160
+ if (!result.founded && item.foundingDate)
161
+ result.founded = String(item.foundingDate);
162
+ if (!result.employees && item.numberOfEmployees) {
163
+ const emp = item.numberOfEmployees;
164
+ result.employees = typeof emp === 'object' ? `${emp.minValue ?? ''}–${emp.maxValue ?? ''}` : String(emp);
165
+ }
166
+ // Review aggregate
167
+ if (item.aggregateRating) {
168
+ result.reviews = result.reviews || [];
169
+ result.reviews.push({
170
+ source: 'Schema.org',
171
+ rating: parseFloat(item.aggregateRating.ratingValue) || 0,
172
+ count: parseInt(item.aggregateRating.reviewCount) || 0,
173
+ });
174
+ }
175
+ }
176
+ }
177
+ catch {
178
+ // Invalid JSON-LD — skip
179
+ }
180
+ }
181
+ }
182
+ // Open Graph fallback
183
+ if (!result.name) {
184
+ const ogTitle = content.match(/property="og:title"\s+content="([^"]+)"/);
185
+ if (ogTitle)
186
+ result.name = ogTitle[1];
187
+ }
188
+ if (!result.description) {
189
+ const ogDesc = content.match(/(?:property="og:description"|name="description")\s+content="([^"]+)"/);
190
+ if (ogDesc)
191
+ result.description = ogDesc[1];
192
+ }
193
+ return result;
194
+ }
195
+ // ─── Main export ──────────────────────────────────────────────────────────
196
+ /**
197
+ * Extract structured business intelligence from a website URL.
198
+ *
199
+ * Fetches the homepage and optionally the /pricing page, then extracts
200
+ * structured data including tech stack, social media, pricing, and more.
201
+ *
202
+ * @example
203
+ * ```typescript
204
+ * const intel = await getBusinessIntel('https://stripe.com');
205
+ * console.log(intel.name); // "Stripe"
206
+ * console.log(intel.techStack); // ["React", "Cloudflare", ...]
207
+ * console.log(intel.pricing); // [{plan: "Starter", price: "$0"}, ...]
208
+ * ```
209
+ */
210
+ export async function getBusinessIntel(url) {
211
+ // Normalize URL
212
+ if (!url.startsWith('http'))
213
+ url = 'https://' + url;
214
+ const parsed = new URL(url);
215
+ const origin = parsed.origin;
216
+ // 1. Fetch homepage
217
+ const homeResult = await peel(url, {
218
+ format: 'html',
219
+ timeout: 15000,
220
+ });
221
+ const homeContent = homeResult.content || '';
222
+ const homeHtml = homeResult.rawHtml || homeContent;
223
+ // 2. Extract schema.org / OG data
224
+ const schemaData = extractSchemaOrgData(homeHtml);
225
+ // 3. Detect tech stack from content + headers
226
+ const responseHeaders = homeResult.headers || {};
227
+ const techStack = detectTechStack(homeHtml + homeContent, responseHeaders);
228
+ // 4. Extract social media links
229
+ const socialMedia = extractSocialMedia(homeHtml + homeContent);
230
+ // 5. Detect industry from description + content
231
+ const textForIndustry = [schemaData.description, homeContent].filter(Boolean).join(' ');
232
+ const industry = detectIndustry(textForIndustry);
233
+ // 6. Try to fetch pricing page (best-effort)
234
+ let pricing = [];
235
+ const pricingPaths = ['/pricing', '/plans', '/pricing-plans', '/subscribe'];
236
+ for (const path of pricingPaths) {
237
+ try {
238
+ const pricingUrl = origin + path;
239
+ const pricingResult = await peel(pricingUrl, { timeout: 8000 });
240
+ if (pricingResult.content && pricingResult.content.length > 200) {
241
+ pricing = extractPricing(pricingResult.content);
242
+ if (pricing.length > 0)
243
+ break;
244
+ }
245
+ }
246
+ catch {
247
+ // Pricing page not found — continue
248
+ }
249
+ }
250
+ // If no pricing found from pricing page, try extracting from homepage
251
+ if (pricing.length === 0) {
252
+ pricing = extractPricing(homeContent);
253
+ }
254
+ // 7. Extract products list from homepage (look for feature/product lists)
255
+ const products = [];
256
+ const productSection = homeContent.match(/(?:products?|features?|solutions?)[^\n]*\n((?:[^\n]+\n){1,10})/i);
257
+ if (productSection) {
258
+ const lines = productSection[1]
259
+ .split('\n')
260
+ .map(l => l.trim())
261
+ .filter(l => l.length > 3 && l.length < 80 && !l.startsWith('#') && !l.startsWith('http'));
262
+ products.push(...lines.slice(0, 8));
263
+ }
264
+ // Build final result
265
+ const intel = {
266
+ ...schemaData,
267
+ };
268
+ if (industry)
269
+ intel.industry = industry;
270
+ if (products.length > 0)
271
+ intel.products = products;
272
+ if (pricing.length > 0)
273
+ intel.pricing = pricing;
274
+ if (socialMedia.length > 0)
275
+ intel.socialMedia = socialMedia;
276
+ if (techStack.length > 0)
277
+ intel.techStack = techStack;
278
+ return intel;
279
+ }
@@ -0,0 +1,13 @@
1
+ /**
2
+ * In-memory LRU response cache.
3
+ */
4
+ export interface CacheResult<T = unknown> {
5
+ value: T;
6
+ stale: boolean;
7
+ }
8
+ export declare function getCached<T = unknown>(url: string): T | null;
9
+ export declare function getCachedWithSWR<T = unknown>(url: string): CacheResult<T> | null;
10
+ export declare function markRevalidating(url: string): boolean;
11
+ export declare function setCached<T = unknown>(url: string, result: T): void;
12
+ export declare function clearCache(): void;
13
+ export declare function setCacheTTL(ms: number): void;