@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,246 @@
1
+ /**
2
+ * Auto-interact: automatically dismiss cookie banners, consent popups,
3
+ * overlay modals, and optionally click "load more" / "show all" buttons.
4
+ *
5
+ * Runs after page.goto() and before content extraction.
6
+ * Never blocks extraction — each interaction has a tight timeout.
7
+ * Total budget: 3s max.
8
+ */
9
+ // ── Selector lists ─────────────────────────────────────────────────────────
10
+ const COOKIE_DISMISS_SELECTORS = [
11
+ // OneTrust (very common consent management platform)
12
+ '#onetrust-accept-btn-handler',
13
+ // Cookiebot
14
+ '#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll',
15
+ // Cookie Consent library
16
+ '.cc-btn.cc-dismiss',
17
+ '.cc-btn.cc-allow',
18
+ // Osano
19
+ '.osano-cm-accept',
20
+ '.osano-cm-accept-all',
21
+ // TrustArc
22
+ '#truste-consent-button',
23
+ // Quantcast
24
+ '#qc-cmp2-ui button[mode="primary"]',
25
+ // Didomi
26
+ '#didomi-notice-agree-button',
27
+ // Testing library markers
28
+ '[data-testid="cookie-policy-dialog-accept-button"]',
29
+ '[data-testid="accept-cookies"]',
30
+ '[data-testid="cookie-accept"]',
31
+ // ARIA labels
32
+ 'button[aria-label*="cookie" i]',
33
+ 'button[aria-label*="accept cookie" i]',
34
+ 'button[aria-label*="agree" i]',
35
+ 'button[aria-label*="consent" i]',
36
+ // Class-based matchers (broad)
37
+ '[class*="cookie"] button[class*="accept"]',
38
+ '[class*="cookie"] button[class*="dismiss"]',
39
+ '[class*="cookie"] button[class*="close"]',
40
+ '[class*="cookie"] button[class*="agree"]',
41
+ '[class*="cookie"] button[class*="allow"]',
42
+ '[class*="consent"] button[class*="accept"]',
43
+ '[class*="consent"] button[class*="agree"]',
44
+ '[class*="consent"] button[class*="allow"]',
45
+ '[id*="cookie"] button[class*="accept"]',
46
+ '[id*="cookie"] button[class*="agree"]',
47
+ '.cookie-banner button:first-of-type',
48
+ '.cookie-notice button:first-of-type',
49
+ '#cookie-notice button:first-of-type',
50
+ ];
51
+ const CONSENT_SELECTORS = [
52
+ // GDPR / privacy
53
+ '[class*="gdpr"] button[class*="accept"]',
54
+ '[class*="gdpr"] button[class*="agree"]',
55
+ '[class*="privacy"] button[class*="accept"]',
56
+ '[class*="privacy"] button[class*="agree"]',
57
+ // Modal/overlay consent
58
+ '.modal-overlay [class*="accept"]',
59
+ '[role="dialog"] button[class*="accept"]',
60
+ '[role="dialog"] button[class*="agree"]',
61
+ '[role="alertdialog"] button[class*="accept"]',
62
+ // Age gates and terms
63
+ '[class*="age-gate"] button[class*="confirm"]',
64
+ '[class*="terms"] button[class*="accept"]',
65
+ ];
66
+ const OVERLAY_DISMISS_SELECTORS = [
67
+ // Generic close buttons
68
+ '.modal-close',
69
+ '.overlay-close',
70
+ '[class*="modal"] [class*="close"]',
71
+ '[class*="modal"] button[aria-label="Close"]',
72
+ '[role="dialog"] [aria-label="Close"]',
73
+ '[role="dialog"] [aria-label="close"]',
74
+ '[role="dialog"] button[class*="close"]',
75
+ '[class*="popup"] [class*="close"]',
76
+ '[class*="popup"] button[aria-label="Close"]',
77
+ 'button[class*="dismiss"]',
78
+ // Newsletter/email capture popups
79
+ '[class*="newsletter"] [class*="close"]',
80
+ '[class*="subscribe"] [class*="close"]',
81
+ '[class*="signup"] [class*="close"]',
82
+ // Survey/feedback popups
83
+ '[class*="survey"] [class*="close"]',
84
+ '[class*="feedback"] [class*="close"]',
85
+ // Notification/alert banners
86
+ '[class*="notification"] button[class*="close"]',
87
+ '[class*="alert"] button[class*="close"]',
88
+ '[class*="banner"] button[class*="close"]',
89
+ ];
90
+ const LOAD_MORE_SELECTORS = [
91
+ 'button[class*="load-more"]',
92
+ 'button[class*="loadmore"]',
93
+ 'button[class*="load_more"]',
94
+ '[class*="load-more"] button',
95
+ 'a[class*="load-more"]',
96
+ 'button[class*="show-more"]',
97
+ 'button[class*="show_more"]',
98
+ 'button[class*="showmore"]',
99
+ '[class*="show-more"] button',
100
+ 'button[aria-label*="load more" i]',
101
+ 'button[aria-label*="show more" i]',
102
+ '[data-testid*="load-more"]',
103
+ '[data-testid*="show-more"]',
104
+ ];
105
+ // ── Helpers ────────────────────────────────────────────────────────────────
106
+ /**
107
+ * Check if an element is visible (has dimensions + not hidden).
108
+ * Returns false if the element doesn't exist or is invisible.
109
+ */
110
+ async function isVisible(page, selector) {
111
+ try {
112
+ const visible = await page.evaluate((sel) => {
113
+ const el = document.querySelector(sel);
114
+ if (!el)
115
+ return false;
116
+ const rect = el.getBoundingClientRect();
117
+ if (rect.width === 0 || rect.height === 0)
118
+ return false;
119
+ const style = window.getComputedStyle(el);
120
+ if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0')
121
+ return false;
122
+ return true;
123
+ }, selector);
124
+ return !!visible;
125
+ }
126
+ catch {
127
+ return false;
128
+ }
129
+ }
130
+ /**
131
+ * Try to click a selector with a 1s timeout. Returns true if clicked.
132
+ */
133
+ async function tryClick(page, selector) {
134
+ const CLICK_TIMEOUT_MS = 1000;
135
+ try {
136
+ const visible = await isVisible(page, selector);
137
+ if (!visible)
138
+ return false;
139
+ await Promise.race([
140
+ page.click(selector, { timeout: CLICK_TIMEOUT_MS }),
141
+ new Promise((_, reject) => setTimeout(() => reject(new Error('click timeout')), CLICK_TIMEOUT_MS)),
142
+ ]);
143
+ // Brief pause to let DOM settle after click
144
+ await page.waitForTimeout(300).catch(() => { });
145
+ return true;
146
+ }
147
+ catch {
148
+ return false;
149
+ }
150
+ }
151
+ /**
152
+ * Try each selector in the list; click the first visible one.
153
+ * Returns the selector that was clicked, or null.
154
+ */
155
+ async function tryClickFirst(page, selectors) {
156
+ for (const selector of selectors) {
157
+ const clicked = await tryClick(page, selector);
158
+ if (clicked) {
159
+ if (process.env.DEBUG) {
160
+ console.debug('[webpeel:auto-interact]', 'clicked:', selector);
161
+ }
162
+ return selector;
163
+ }
164
+ }
165
+ return null;
166
+ }
167
+ // ── Main export ────────────────────────────────────────────────────────────
168
+ /**
169
+ * Automatically interact with the page to dismiss common UI overlays before
170
+ * content extraction. Never throws — all errors are swallowed.
171
+ *
172
+ * @param page - Playwright page (already navigated)
173
+ * @returns Summary of what was dismissed
174
+ */
175
+ export async function autoInteract(page) {
176
+ const TOTAL_BUDGET_MS = 3000;
177
+ const startTime = Date.now();
178
+ const result = {
179
+ cookieBannerDismissed: false,
180
+ consentHandled: false,
181
+ loadMoreClicked: 0,
182
+ overlaysDismissed: 0,
183
+ };
184
+ const remaining = () => TOTAL_BUDGET_MS - (Date.now() - startTime);
185
+ try {
186
+ // ── 1. Cookie banners ─────────────────────────────────────────────────
187
+ if (remaining() > 0) {
188
+ const clicked = await tryClickFirst(page, COOKIE_DISMISS_SELECTORS);
189
+ if (clicked) {
190
+ result.cookieBannerDismissed = true;
191
+ if (process.env.DEBUG)
192
+ console.debug('[webpeel:auto-interact]', 'cookie banner dismissed');
193
+ }
194
+ }
195
+ // ── 2. Consent popups ────────────────────────────────────────────────
196
+ if (remaining() > 500) {
197
+ const clicked = await tryClickFirst(page, CONSENT_SELECTORS);
198
+ if (clicked) {
199
+ result.consentHandled = true;
200
+ if (process.env.DEBUG)
201
+ console.debug('[webpeel:auto-interact]', 'consent handled');
202
+ }
203
+ }
204
+ // ── 3. Overlay/modal dismiss ──────────────────────────────────────────
205
+ if (remaining() > 500) {
206
+ let dismissed = 0;
207
+ // Try up to 2 overlays to avoid infinite loops on persistent UI
208
+ for (let i = 0; i < 2 && remaining() > 300; i++) {
209
+ const clicked = await tryClickFirst(page, OVERLAY_DISMISS_SELECTORS);
210
+ if (!clicked)
211
+ break;
212
+ dismissed++;
213
+ }
214
+ result.overlaysDismissed = dismissed;
215
+ if (dismissed > 0 && process.env.DEBUG) {
216
+ console.debug('[webpeel:auto-interact]', `overlays dismissed: ${dismissed}`);
217
+ }
218
+ }
219
+ // ── 4. Load more (optional, only if budget remains) ───────────────────
220
+ if (remaining() > 500) {
221
+ let clicked = 0;
222
+ // Click at most 1 "load more" button to get more content without infinite looping
223
+ const loadMoreClicked = await tryClickFirst(page, LOAD_MORE_SELECTORS);
224
+ if (loadMoreClicked) {
225
+ clicked++;
226
+ // Wait briefly for new content to render
227
+ await page.waitForTimeout(500).catch(() => { });
228
+ }
229
+ result.loadMoreClicked = clicked;
230
+ if (clicked > 0 && process.env.DEBUG) {
231
+ console.debug('[webpeel:auto-interact]', `load-more clicked: ${clicked}`);
232
+ }
233
+ }
234
+ }
235
+ catch (err) {
236
+ // Never block extraction due to auto-interact errors
237
+ if (process.env.DEBUG) {
238
+ console.debug('[webpeel:auto-interact]', 'error (ignored):', err instanceof Error ? err.message : err);
239
+ }
240
+ }
241
+ const elapsed = Date.now() - startTime;
242
+ if (process.env.DEBUG) {
243
+ console.debug('[webpeel:auto-interact]', 'complete in', elapsed, 'ms', JSON.stringify(result));
244
+ }
245
+ return result;
246
+ }
@@ -0,0 +1,66 @@
1
+ /**
2
+ * BM25 Query-Focused Content Filter
3
+ *
4
+ * Filters markdown content by BM25 relevance to a query, keeping only the
5
+ * blocks that are most relevant. This can reduce token usage by 70-90% for
6
+ * focused tasks (e.g., "find hotel prices" should not return navigation menus,
7
+ * footer text, or unrelated article sections).
8
+ *
9
+ * Algorithm: BM25 (Best Matching 25) — Okapi BM25
10
+ * score(D, Q) = Σ IDF(qi) * tf(qi,D)*(k1+1) / (tf(qi,D) + k1*(1 - b + b*|D|/avgdl))
11
+ */
12
+ export interface BM25FilterOptions {
13
+ /** Query to rank content against */
14
+ query: string;
15
+ /** BM25 threshold score. Blocks below this are removed. Default: auto-calculated */
16
+ threshold?: number;
17
+ /** Whether to return scores in output. Default: false */
18
+ includeScores?: boolean;
19
+ }
20
+ export interface BM25FilterResult {
21
+ /** Filtered content (relevant paragraphs only) */
22
+ content: string;
23
+ /** Number of blocks kept */
24
+ kept: number;
25
+ /** Total number of blocks */
26
+ total: number;
27
+ /** Percentage of content removed */
28
+ reductionPercent: number;
29
+ }
30
+ interface ContentBlock {
31
+ /** Original markdown text (preserved verbatim in output) */
32
+ raw: string;
33
+ /** Index in the original block list (for order preservation) */
34
+ index: number;
35
+ }
36
+ /**
37
+ * Split markdown content into logical blocks for scoring:
38
+ * - Code fences (``` ... ```) → single block
39
+ * - Heading + immediately following paragraph → single block
40
+ * - Lists (contiguous lines starting with - / * / + / number.) → single block
41
+ * - Tables → single block
42
+ * - Paragraphs → one block each
43
+ */
44
+ export declare function splitIntoBlocks(content: string): ContentBlock[];
45
+ /**
46
+ * Calculate BM25 scores for all blocks against a query.
47
+ * Returns array of scores in same order as blocks.
48
+ */
49
+ export declare function scoreBM25(blocks: ContentBlock[], queryTerms: string[]): number[];
50
+ /**
51
+ * Compute a normalized relevance score (0-1) for content against a query.
52
+ * Uses BM25 at the block level and returns the weighted average score,
53
+ * normalized by query term count for comparability across queries.
54
+ *
55
+ * This is more meaningful than `reductionPercent` for ranking search results,
56
+ * because it measures actual term overlap and importance rather than how much
57
+ * content was filtered out.
58
+ */
59
+ export declare function computeRelevanceScore(content: string, query: string): number;
60
+ /**
61
+ * Filter markdown content by BM25 relevance to a query.
62
+ * Splits content into blocks (paragraphs, headings+body, list items),
63
+ * scores each by BM25, and returns only blocks above threshold.
64
+ */
65
+ export declare function filterByRelevance(content: string, options: BM25FilterOptions): BM25FilterResult;
66
+ export {};
@@ -0,0 +1,288 @@
1
+ /**
2
+ * BM25 Query-Focused Content Filter
3
+ *
4
+ * Filters markdown content by BM25 relevance to a query, keeping only the
5
+ * blocks that are most relevant. This can reduce token usage by 70-90% for
6
+ * focused tasks (e.g., "find hotel prices" should not return navigation menus,
7
+ * footer text, or unrelated article sections).
8
+ *
9
+ * Algorithm: BM25 (Best Matching 25) — Okapi BM25
10
+ * score(D, Q) = Σ IDF(qi) * tf(qi,D)*(k1+1) / (tf(qi,D) + k1*(1 - b + b*|D|/avgdl))
11
+ */
12
+ // BM25 tuning parameters
13
+ const K1 = 1.5; // term frequency saturation
14
+ const B = 0.75; // length normalization
15
+ // ---------------------------------------------------------------------------
16
+ // Tokenization
17
+ // ---------------------------------------------------------------------------
18
+ /**
19
+ * Tokenize text into lowercase terms, stripping punctuation.
20
+ * Markdown formatting characters are also stripped.
21
+ */
22
+ function tokenize(text) {
23
+ return text
24
+ .toLowerCase()
25
+ // Strip markdown formatting (bold, italic, code, links, images, headings)
26
+ .replace(/!\[.*?\]\(.*?\)/g, ' ') // images
27
+ .replace(/\[.*?\]\(.*?\)/g, ' ') // links
28
+ .replace(/`{1,3}[^`]*`{1,3}/g, ' ') // inline code
29
+ .replace(/[#*_~`>|\\]/g, ' ') // formatting chars
30
+ .replace(/[^\w\s]/g, ' ') // remaining punctuation
31
+ .split(/\s+/)
32
+ .filter(t => t.length > 0);
33
+ }
34
+ /**
35
+ * Strip markdown formatting from text for scoring purposes.
36
+ * Preserves words but removes symbols.
37
+ */
38
+ function stripMarkdown(text) {
39
+ return text
40
+ .replace(/```[\s\S]*?```/g, ' ') // fenced code blocks
41
+ .replace(/`[^`]+`/g, ' ') // inline code
42
+ .replace(/!\[.*?\]\(.*?\)/g, ' ') // images
43
+ .replace(/\[([^\]]*)\]\([^)]*\)/g, '$1') // links → text
44
+ .replace(/^#{1,6}\s+/gm, '') // headings
45
+ .replace(/[*_~`>|\\]/g, ' ') // formatting
46
+ .replace(/^\s*[-*+]\s+/gm, ' ') // list bullets
47
+ .replace(/^\s*\d+\.\s+/gm, ' ') // numbered list
48
+ .replace(/\s+/g, ' ')
49
+ .trim();
50
+ }
51
+ /**
52
+ * Split markdown content into logical blocks for scoring:
53
+ * - Code fences (``` ... ```) → single block
54
+ * - Heading + immediately following paragraph → single block
55
+ * - Lists (contiguous lines starting with - / * / + / number.) → single block
56
+ * - Tables → single block
57
+ * - Paragraphs → one block each
58
+ */
59
+ export function splitIntoBlocks(content) {
60
+ // Normalise line endings
61
+ const text = content.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
62
+ // First, extract fenced code blocks so they aren't broken apart
63
+ // We'll replace them with placeholders, split, then restore.
64
+ const codeBlocks = [];
65
+ const withPlaceholders = text.replace(/```[\s\S]*?```/g, (match) => {
66
+ const id = codeBlocks.length;
67
+ codeBlocks.push(match);
68
+ return `\x00CODE_BLOCK_${id}\x00`;
69
+ });
70
+ // Split on double newlines
71
+ const rawChunks = withPlaceholders.split(/\n{2,}/);
72
+ // Re-join heading with its following paragraph
73
+ const merged = [];
74
+ for (let i = 0; i < rawChunks.length; i++) {
75
+ const chunk = rawChunks[i].trim();
76
+ if (!chunk)
77
+ continue;
78
+ const isHeading = /^#{1,6}\s/.test(chunk);
79
+ const nextChunk = rawChunks[i + 1]?.trim();
80
+ if (isHeading && nextChunk && !/^#{1,6}\s/.test(nextChunk)) {
81
+ // Merge heading + following paragraph
82
+ merged.push(chunk + '\n\n' + nextChunk);
83
+ i++; // skip next
84
+ }
85
+ else {
86
+ merged.push(chunk);
87
+ }
88
+ }
89
+ // Now merge contiguous list lines that got split
90
+ const regrouped = [];
91
+ for (const chunk of merged) {
92
+ const lines = chunk.split('\n');
93
+ const isListBlock = lines.every(l => l.trim() === '' || /^\s*[-*+]\s/.test(l) || /^\s*\d+\.\s/.test(l) || /^\s*\d+\)\s/.test(l)) && lines.some(l => /^\s*[-*+]\s/.test(l) || /^\s*\d+[.)]\s/.test(l));
94
+ const isTableBlock = lines.some(l => /^\|/.test(l.trim()));
95
+ if (isListBlock || isTableBlock) {
96
+ // Check if previous block was the same type (adjacent lists should merge)
97
+ const prev = regrouped[regrouped.length - 1];
98
+ if (prev) {
99
+ const prevLines = prev.split('\n');
100
+ const prevIsListOrTable = prevLines.some(l => /^\s*[-*+]\s/.test(l) || /^\s*\d+[.)]\s/.test(l) || /^\|/.test(l.trim()));
101
+ if (prevIsListOrTable && isListBlock === prevIsListOrTable) {
102
+ regrouped[regrouped.length - 1] = prev + '\n' + chunk;
103
+ continue;
104
+ }
105
+ }
106
+ }
107
+ regrouped.push(chunk);
108
+ }
109
+ // Restore code blocks and build final ContentBlock array
110
+ const blocks = [];
111
+ for (let i = 0; i < regrouped.length; i++) {
112
+ let raw = regrouped[i];
113
+ // Restore code block placeholders
114
+ raw = raw.replace(/\x00CODE_BLOCK_(\d+)\x00/g, (_m, idx) => codeBlocks[Number(idx)]);
115
+ if (raw.trim()) {
116
+ blocks.push({ raw: raw.trim(), index: i });
117
+ }
118
+ }
119
+ return blocks;
120
+ }
121
+ // ---------------------------------------------------------------------------
122
+ // BM25 Scoring
123
+ // ---------------------------------------------------------------------------
124
+ /**
125
+ * Calculate BM25 scores for all blocks against a query.
126
+ * Returns array of scores in same order as blocks.
127
+ */
128
+ export function scoreBM25(blocks, queryTerms) {
129
+ if (blocks.length === 0 || queryTerms.length === 0) {
130
+ return blocks.map(() => 0);
131
+ }
132
+ const N = blocks.length;
133
+ // Tokenize each block (strip markdown for scoring)
134
+ const blockTokens = blocks.map(b => tokenize(stripMarkdown(b.raw)));
135
+ const blockLengths = blockTokens.map(t => t.length);
136
+ const avgdl = blockLengths.reduce((s, l) => s + l, 0) / N || 1;
137
+ // Build term frequency maps for each block
138
+ const tfMaps = blockTokens.map(tokens => {
139
+ const tf = new Map();
140
+ for (const t of tokens) {
141
+ tf.set(t, (tf.get(t) ?? 0) + 1);
142
+ }
143
+ return tf;
144
+ });
145
+ // For each query term, compute IDF and score contribution
146
+ const scores = new Array(N).fill(0);
147
+ for (const term of queryTerms) {
148
+ // n(qi) = number of documents containing the term
149
+ let nqi = 0;
150
+ for (const tf of tfMaps) {
151
+ if (tf.has(term))
152
+ nqi++;
153
+ }
154
+ // IDF(qi) = log((N - n(qi) + 0.5) / (n(qi) + 0.5) + 1)
155
+ const idf = Math.log((N - nqi + 0.5) / (nqi + 0.5) + 1);
156
+ for (let d = 0; d < N; d++) {
157
+ const tf = tfMaps[d].get(term) ?? 0;
158
+ if (tf === 0)
159
+ continue;
160
+ const dl = blockLengths[d];
161
+ // BM25 term score
162
+ const termScore = idf * (tf * (K1 + 1)) / (tf + K1 * (1 - B + B * dl / avgdl));
163
+ scores[d] += termScore;
164
+ }
165
+ }
166
+ return scores;
167
+ }
168
+ // ---------------------------------------------------------------------------
169
+ // Relevance scoring (document-level)
170
+ // ---------------------------------------------------------------------------
171
+ /**
172
+ * Compute a normalized relevance score (0-1) for content against a query.
173
+ * Uses BM25 at the block level and returns the weighted average score,
174
+ * normalized by query term count for comparability across queries.
175
+ *
176
+ * This is more meaningful than `reductionPercent` for ranking search results,
177
+ * because it measures actual term overlap and importance rather than how much
178
+ * content was filtered out.
179
+ */
180
+ export function computeRelevanceScore(content, query) {
181
+ if (!content || !query || !query.trim())
182
+ return 0;
183
+ const blocks = splitIntoBlocks(content);
184
+ if (blocks.length === 0)
185
+ return 0;
186
+ const queryTerms = tokenize(query);
187
+ if (queryTerms.length === 0)
188
+ return 0;
189
+ const scores = scoreBM25(blocks, queryTerms);
190
+ // Compute weighted average score — weight by block length to avoid
191
+ // short blocks (e.g. headers) dominating the score
192
+ const blockTexts = blocks.map(b => stripMarkdown(b.raw));
193
+ const blockLens = blockTexts.map(t => t.length);
194
+ const totalLen = blockLens.reduce((s, l) => s + l, 0) || 1;
195
+ let weightedSum = 0;
196
+ for (let i = 0; i < scores.length; i++) {
197
+ weightedSum += scores[i] * (blockLens[i] / totalLen);
198
+ }
199
+ // Normalize: divide by query term count to make scores comparable
200
+ // across queries with different numbers of terms, then apply sigmoid
201
+ // to squash to [0, 1] range. The constant 8 is tuned so that a
202
+ // well-matching document scores ~0.6-0.9 and a poor match ~0.0-0.2.
203
+ // perTermScore typical range: 0 (no match) to ~0.5+ (strong match)
204
+ const perTermScore = weightedSum / queryTerms.length;
205
+ const normalized = 2 / (1 + Math.exp(-perTermScore * 8)) - 1;
206
+ return Math.max(0, Math.min(1, normalized));
207
+ }
208
+ // ---------------------------------------------------------------------------
209
+ // Main filter function
210
+ // ---------------------------------------------------------------------------
211
+ /**
212
+ * Filter markdown content by BM25 relevance to a query.
213
+ * Splits content into blocks (paragraphs, headings+body, list items),
214
+ * scores each by BM25, and returns only blocks above threshold.
215
+ */
216
+ export function filterByRelevance(content, options) {
217
+ const { query, threshold, includeScores = false } = options;
218
+ // Empty query → return full content
219
+ if (!query || !query.trim()) {
220
+ return {
221
+ content,
222
+ kept: 0,
223
+ total: 0,
224
+ reductionPercent: 0,
225
+ };
226
+ }
227
+ const blocks = splitIntoBlocks(content);
228
+ const total = blocks.length;
229
+ if (total === 0) {
230
+ return { content, kept: 0, total: 0, reductionPercent: 0 };
231
+ }
232
+ const queryTerms = tokenize(query);
233
+ if (queryTerms.length === 0) {
234
+ return { content, kept: total, total, reductionPercent: 0 };
235
+ }
236
+ const scores = scoreBM25(blocks, queryTerms);
237
+ // Determine threshold
238
+ let effectiveThreshold;
239
+ if (threshold !== undefined) {
240
+ effectiveThreshold = threshold;
241
+ }
242
+ else {
243
+ const meanScore = scores.reduce((s, v) => s + v, 0) / scores.length;
244
+ effectiveThreshold = meanScore * 0.5;
245
+ }
246
+ // Select blocks above threshold
247
+ let keptIndices = scores
248
+ .map((score, i) => ({ score, i }))
249
+ .filter(({ score }) => score >= effectiveThreshold)
250
+ .map(({ i }) => i);
251
+ // Fallback: never return empty — keep top 3
252
+ if (keptIndices.length === 0) {
253
+ keptIndices = scores
254
+ .map((score, i) => ({ score, i }))
255
+ .sort((a, b) => b.score - a.score)
256
+ .slice(0, 3)
257
+ .map(({ i }) => i)
258
+ .sort((a, b) => a - b); // restore document order
259
+ }
260
+ // Preserve original document order
261
+ keptIndices.sort((a, b) => a - b);
262
+ const keptBlocks = keptIndices.map(i => blocks[i]);
263
+ const kept = keptBlocks.length;
264
+ // Build output content
265
+ let outputParts;
266
+ if (includeScores) {
267
+ outputParts = keptBlocks.map(b => {
268
+ const score = scores[b.index];
269
+ return `<!-- BM25: ${score.toFixed(4)} -->\n${b.raw}`;
270
+ });
271
+ }
272
+ else {
273
+ outputParts = keptBlocks.map(b => b.raw);
274
+ }
275
+ const filteredContent = outputParts.join('\n\n');
276
+ // Calculate reduction percent based on character count
277
+ const originalLen = content.length;
278
+ const filteredLen = filteredContent.length;
279
+ const reductionPercent = originalLen > 0
280
+ ? Math.round(((originalLen - filteredLen) / originalLen) * 100)
281
+ : 0;
282
+ return {
283
+ content: filteredContent,
284
+ kept,
285
+ total,
286
+ reductionPercent,
287
+ };
288
+ }
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Branding and design system extraction from web pages
3
+ * Extracts colors, fonts, typography, spacing, components, and CSS variables
4
+ */
5
+ import type { Page } from 'playwright';
6
+ export interface BrandingProfile {
7
+ colorScheme: 'light' | 'dark' | 'both';
8
+ logo?: string;
9
+ favicon?: string;
10
+ colors: {
11
+ primary?: string;
12
+ secondary?: string;
13
+ accent?: string;
14
+ background?: string;
15
+ textPrimary?: string;
16
+ textSecondary?: string;
17
+ [key: string]: string | undefined;
18
+ };
19
+ fonts: Array<{
20
+ family: string;
21
+ weights?: number[];
22
+ source?: string;
23
+ }>;
24
+ typography: {
25
+ fontFamilies: Record<string, string>;
26
+ fontSizes: Record<string, string>;
27
+ fontWeights: Record<string, number>;
28
+ lineHeights?: Record<string, string>;
29
+ };
30
+ spacing: {
31
+ baseUnit?: number;
32
+ borderRadius?: string;
33
+ containerMaxWidth?: string;
34
+ };
35
+ components: Record<string, Record<string, string>>;
36
+ cssVariables: Record<string, string>;
37
+ }
38
+ /**
39
+ * Extract branding and design system from a webpage
40
+ * This must run inside a Playwright browser context to access computed styles
41
+ *
42
+ * @param page - Playwright Page object
43
+ * @returns Complete branding profile
44
+ *
45
+ * @example
46
+ * ```typescript
47
+ * const browser = await chromium.launch();
48
+ * const page = await browser.newPage();
49
+ * await page.goto('https://example.com');
50
+ * const branding = await extractBranding(page);
51
+ * console.log(branding.colors.primary);
52
+ * ```
53
+ */
54
+ export declare function extractBranding(page: Page): Promise<BrandingProfile>;