@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Domain-aware structured extractors for WebPeel.
3
+ *
4
+ * This file re-exports from individual extractor files for backward compatibility.
5
+ * Each extractor now lives in its own file under src/ee/extractors/.
6
+ */
7
+ export { getDomainExtractor, hasDomainExtractor, extractDomainData, clearExtractorCache, setExtractorRedis, } from './extractors/index.js';
8
+ export type { DomainExtractResult, DomainExtractor } from './extractors/index.js';
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Domain-aware structured extractors for WebPeel.
3
+ *
4
+ * This file re-exports from individual extractor files for backward compatibility.
5
+ * Each extractor now lives in its own file under src/ee/extractors/.
6
+ */
7
+ // Re-exported from individual extractor files for backward compatibility
8
+ export { getDomainExtractor, hasDomainExtractor, extractDomainData, clearExtractorCache, setExtractorRedis, } from './extractors/index.js';
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Domain Intelligence — premium server-only optimisation.
3
+ *
4
+ * Learns from historical fetch outcomes which domains require browser or
5
+ * stealth mode, so subsequent requests skip the slow simple→browser
6
+ * escalation path and go straight to the right strategy.
7
+ *
8
+ * Uses an exponential moving average for latency tracking and requires a
9
+ * minimum sample count before issuing recommendations to avoid false
10
+ * positives from one-off failures.
11
+ *
12
+ * This module is NOT shipped in the npm package.
13
+ */
14
+ import type { StrategyHooks } from '../core/strategy-hooks.js';
15
+ export declare function clearDomainIntel(): void;
16
+ export declare function createDomainIntelHooks(): Pick<StrategyHooks, 'getDomainRecommendation' | 'recordDomainResult'>;
@@ -0,0 +1,133 @@
1
+ /**
2
+ * Domain Intelligence — premium server-only optimisation.
3
+ *
4
+ * Learns from historical fetch outcomes which domains require browser or
5
+ * stealth mode, so subsequent requests skip the slow simple→browser
6
+ * escalation path and go straight to the right strategy.
7
+ *
8
+ * Uses an exponential moving average for latency tracking and requires a
9
+ * minimum sample count before issuing recommendations to avoid false
10
+ * positives from one-off failures.
11
+ *
12
+ * This module is NOT shipped in the npm package.
13
+ */
14
+ /* ---------- configuration ----------------------------------------------- */
15
+ const MAX_DOMAINS = 500;
16
+ const TTL_MS = 60 * 60 * 1000; // 1 hour
17
+ const EMA_ALPHA = 0.3;
18
+ const MIN_SAMPLES = 3;
19
+ /* ---------- state ------------------------------------------------------- */
20
+ const domainIntel = new Map();
21
+ const methodCounts = new Map();
22
+ /* ---------- internals --------------------------------------------------- */
23
+ function domainKey(url) {
24
+ try {
25
+ return new URL(url).hostname.toLowerCase();
26
+ }
27
+ catch {
28
+ return '';
29
+ }
30
+ }
31
+ function prune(now) {
32
+ for (const [key, intel] of domainIntel) {
33
+ if (now - intel.lastSeen > TTL_MS) {
34
+ domainIntel.delete(key);
35
+ methodCounts.delete(key);
36
+ }
37
+ }
38
+ }
39
+ /* ---------- hook implementations ---------------------------------------- */
40
+ function getDomainRecommendation(url) {
41
+ const key = domainKey(url);
42
+ if (!key)
43
+ return null;
44
+ const intel = domainIntel.get(key);
45
+ if (!intel)
46
+ return null;
47
+ const now = Date.now();
48
+ if (now - intel.lastSeen > TTL_MS) {
49
+ domainIntel.delete(key);
50
+ methodCounts.delete(key);
51
+ return null;
52
+ }
53
+ if (intel.sampleCount < MIN_SAMPLES)
54
+ return null;
55
+ const counts = methodCounts.get(key);
56
+ if (!counts)
57
+ return null;
58
+ // LRU touch
59
+ domainIntel.delete(key);
60
+ domainIntel.set(key, intel);
61
+ // All samples needed stealth → recommend stealth
62
+ if (counts.stealth === intel.sampleCount && intel.needsStealth) {
63
+ return { mode: 'stealth' };
64
+ }
65
+ // All samples needed browser (never succeeded with simple) → recommend browser
66
+ if (counts.simple === 0 &&
67
+ counts.browser + counts.stealth === intel.sampleCount &&
68
+ intel.needsBrowser) {
69
+ return { mode: 'browser' };
70
+ }
71
+ return null;
72
+ }
73
+ function recordDomainResult(url, method, latencyMs) {
74
+ const key = domainKey(url);
75
+ if (!key)
76
+ return;
77
+ const now = Date.now();
78
+ prune(now);
79
+ const existing = domainIntel.get(key);
80
+ const sanitizedLatency = Number.isFinite(latencyMs) && latencyMs > 0
81
+ ? latencyMs
82
+ : (existing?.avgLatencyMs ?? 0);
83
+ const next = existing
84
+ ? {
85
+ needsBrowser: existing.needsBrowser ||
86
+ method === 'browser' ||
87
+ method === 'stealth',
88
+ needsStealth: existing.needsStealth || method === 'stealth',
89
+ avgLatencyMs: existing.avgLatencyMs === 0
90
+ ? sanitizedLatency
91
+ : existing.avgLatencyMs * (1 - EMA_ALPHA) +
92
+ sanitizedLatency * EMA_ALPHA,
93
+ lastSeen: now,
94
+ sampleCount: existing.sampleCount + 1,
95
+ }
96
+ : {
97
+ needsBrowser: method === 'browser' || method === 'stealth',
98
+ needsStealth: method === 'stealth',
99
+ avgLatencyMs: sanitizedLatency,
100
+ lastSeen: now,
101
+ sampleCount: 1,
102
+ };
103
+ const existingCounts = methodCounts.get(key) ?? {
104
+ simple: 0,
105
+ browser: 0,
106
+ stealth: 0,
107
+ };
108
+ existingCounts[method] += 1;
109
+ // Delete-then-set for LRU ordering
110
+ domainIntel.delete(key);
111
+ domainIntel.set(key, next);
112
+ methodCounts.set(key, existingCounts);
113
+ // Evict oldest when over capacity
114
+ while (domainIntel.size > MAX_DOMAINS) {
115
+ const oldest = domainIntel.keys().next().value;
116
+ if (!oldest)
117
+ break;
118
+ domainIntel.delete(oldest);
119
+ methodCounts.delete(oldest);
120
+ }
121
+ }
122
+ /* ---------- cleanup ----------------------------------------------------- */
123
+ export function clearDomainIntel() {
124
+ domainIntel.clear();
125
+ methodCounts.clear();
126
+ }
127
+ /* ---------- public export ----------------------------------------------- */
128
+ export function createDomainIntelHooks() {
129
+ return {
130
+ getDomainRecommendation,
131
+ recordDomainResult,
132
+ };
133
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function allrecipesExtractor(html: string, url: string): Promise<DomainExtractResult | null>;
@@ -0,0 +1,120 @@
1
+ import { tryParseJson } from './shared.js';
2
+ // ---------------------------------------------------------------------------
3
+ // 15. Allrecipes (Recipe Sites) extractor
4
+ // ---------------------------------------------------------------------------
5
+ export async function allrecipesExtractor(html, url) {
6
+ try {
7
+ const { load } = await import('cheerio');
8
+ const $ = load(html);
9
+ // Try Schema.org Recipe JSON-LD first
10
+ let recipe = null;
11
+ $('script[type="application/ld+json"]').each((_, el) => {
12
+ if (recipe)
13
+ return;
14
+ const raw = $(el).html() || '';
15
+ const parsed = tryParseJson(raw);
16
+ // Can be an array or direct object
17
+ const candidates = Array.isArray(parsed) ? parsed : [parsed];
18
+ for (const item of candidates) {
19
+ if (item?.['@type'] === 'Recipe' || (Array.isArray(item?.['@type']) && item['@type'].includes('Recipe'))) {
20
+ recipe = item;
21
+ break;
22
+ }
23
+ // Sometimes it's nested in @graph
24
+ if (item?.['@graph']) {
25
+ const graphRecipe = item['@graph'].find((g) => g?.['@type'] === 'Recipe');
26
+ if (graphRecipe) {
27
+ recipe = graphRecipe;
28
+ break;
29
+ }
30
+ }
31
+ }
32
+ });
33
+ let title;
34
+ let ingredients = [];
35
+ let instructions = [];
36
+ let prepTime = '';
37
+ let cookTime = '';
38
+ let totalTime = '';
39
+ let servings = '';
40
+ let rating = '';
41
+ let reviewCount = '';
42
+ let description = '';
43
+ if (recipe) {
44
+ title = recipe.name || '';
45
+ description = recipe.description || '';
46
+ ingredients = (recipe.recipeIngredient || []).map((i) => i.trim());
47
+ // Instructions can be strings or HowToStep objects
48
+ const rawInstructions = recipe.recipeInstructions || [];
49
+ for (const step of rawInstructions) {
50
+ if (typeof step === 'string')
51
+ instructions.push(step.trim());
52
+ else if (step.text)
53
+ instructions.push(step.text.trim());
54
+ else if (step['@type'] === 'HowToSection' && step.itemListElement) {
55
+ for (const s of step.itemListElement) {
56
+ if (s.text)
57
+ instructions.push(s.text.trim());
58
+ }
59
+ }
60
+ }
61
+ // Parse ISO 8601 duration (PT30M, PT1H30M)
62
+ const parseDuration = (d) => {
63
+ if (!d)
64
+ return '';
65
+ const h = d.match(/(\d+)H/)?.[1];
66
+ const m = d.match(/(\d+)M/)?.[1];
67
+ return [h ? `${h}h` : '', m ? `${m}m` : ''].filter(Boolean).join(' ');
68
+ };
69
+ prepTime = parseDuration(recipe.prepTime || '');
70
+ cookTime = parseDuration(recipe.cookTime || '');
71
+ totalTime = parseDuration(recipe.totalTime || '');
72
+ servings = String(recipe.recipeYield || '');
73
+ rating = recipe.aggregateRating?.ratingValue ? String(recipe.aggregateRating.ratingValue) : '';
74
+ reviewCount = recipe.aggregateRating?.reviewCount ? String(recipe.aggregateRating.reviewCount) : '';
75
+ }
76
+ else {
77
+ // HTML fallback
78
+ title = $('h1').first().text().trim() ||
79
+ $('meta[property="og:title"]').attr('content') || '';
80
+ description = $('meta[property="og:description"]').attr('content') || '';
81
+ $('[class*="ingredient"]').each((_, el) => {
82
+ const text = $(el).text().trim();
83
+ if (text && text.length < 200)
84
+ ingredients.push(text);
85
+ });
86
+ $('[class*="instruction"] li, [class*="step"] li').each((_, el) => {
87
+ const text = $(el).text().trim();
88
+ if (text)
89
+ instructions.push(text);
90
+ });
91
+ }
92
+ if (!title)
93
+ return null;
94
+ const structured = {
95
+ title, description, ingredients, instructions,
96
+ prepTime, cookTime, totalTime, servings, rating, reviewCount, url,
97
+ };
98
+ const timeParts = [
99
+ prepTime ? `Prep: ${prepTime}` : '',
100
+ cookTime ? `Cook: ${cookTime}` : '',
101
+ totalTime ? `Total: ${totalTime}` : '',
102
+ ].filter(Boolean).join(' | ');
103
+ const metaLine = [
104
+ timeParts,
105
+ servings ? `Servings: ${servings}` : '',
106
+ rating ? `Rating: ${rating}${reviewCount ? ` (${reviewCount} reviews)` : ''}` : '',
107
+ ].filter(Boolean).join(' | ');
108
+ const ingredientsMd = ingredients.length
109
+ ? `## Ingredients\n\n${ingredients.map(i => `- ${i}`).join('\n')}`
110
+ : '';
111
+ const instructionsMd = instructions.length
112
+ ? `## Instructions\n\n${instructions.map((s, i) => `${i + 1}. ${s}`).join('\n')}`
113
+ : '';
114
+ const cleanContent = `# 🍽️ ${title}\n\n${metaLine ? `*${metaLine}*\n\n` : ''}${description ? description + '\n\n' : ''}${ingredientsMd}\n\n${instructionsMd}`.trim();
115
+ return { domain: 'allrecipes.com', type: 'recipe', structured, cleanContent };
116
+ }
117
+ catch {
118
+ return null;
119
+ }
120
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function amazonExtractor(html: string, url: string): Promise<DomainExtractResult | null>;
@@ -0,0 +1,78 @@
1
+ import { tryParseJson } from './shared.js';
2
+ // ---------------------------------------------------------------------------
3
+ // 12. Amazon Products extractor
4
+ // ---------------------------------------------------------------------------
5
+ export async function amazonExtractor(html, url) {
6
+ try {
7
+ const { load } = await import('cheerio');
8
+ const $ = load(html);
9
+ // Extract from JSON-LD first
10
+ let jsonLdData = null;
11
+ $('script[type="application/ld+json"]').each((_, el) => {
12
+ if (jsonLdData)
13
+ return;
14
+ const raw = $(el).html() || '';
15
+ const parsed = tryParseJson(raw);
16
+ if (parsed?.['@type'] === 'Product')
17
+ jsonLdData = parsed;
18
+ });
19
+ // Meta tag fallbacks
20
+ const ogTitle = $('meta[property="og:title"]').attr('content') || '';
21
+ const ogDescription = $('meta[property="og:description"]').attr('content') || '';
22
+ const ogImage = $('meta[property="og:image"]').attr('content') || '';
23
+ // HTML selectors
24
+ const title = jsonLdData?.name ||
25
+ $('#productTitle').text().trim() ||
26
+ $('#title').text().trim() ||
27
+ ogTitle;
28
+ if (!title)
29
+ return null;
30
+ const priceWhole = $('#priceblock_ourprice').text().trim() ||
31
+ $('.a-price .a-offscreen').first().text().trim() ||
32
+ $('[data-asin-price]').first().attr('data-asin-price') || '';
33
+ const rating = jsonLdData?.aggregateRating?.ratingValue ||
34
+ $('#acrPopover .a-size-base.a-color-base').first().text().trim() ||
35
+ $('span[data-hook="rating-out-of-text"]').text().trim() || '';
36
+ const reviewCount = jsonLdData?.aggregateRating?.reviewCount ||
37
+ $('#acrCustomerReviewText').text().replace(/[^0-9,]/g, '').trim() || '';
38
+ const availability = jsonLdData?.offers?.availability?.replace('https://schema.org/', '') ||
39
+ $('#availability span').first().text().trim() || '';
40
+ const description = jsonLdData?.description ||
41
+ $('#feature-bullets .a-list-item').map((_, el) => $(el).text().trim()).get().join('\n') ||
42
+ $('#productDescription p').text().trim() ||
43
+ ogDescription;
44
+ const features = [];
45
+ $('#feature-bullets li').each((_, el) => {
46
+ const text = $(el).text().trim();
47
+ if (text && !text.includes('Make sure this fits'))
48
+ features.push(text);
49
+ });
50
+ // ASIN from URL
51
+ const asinMatch = url.match(/\/dp\/([A-Z0-9]{10})/i);
52
+ const asin = asinMatch?.[1] || '';
53
+ const structured = {
54
+ title,
55
+ price: priceWhole,
56
+ rating,
57
+ reviewCount,
58
+ availability,
59
+ description,
60
+ features,
61
+ asin,
62
+ image: ogImage,
63
+ url,
64
+ };
65
+ const ratingLine = rating ? `\n**Rating:** ${rating}${reviewCount ? ` (${reviewCount} reviews)` : ''}` : '';
66
+ const priceLine = priceWhole ? `\n**Price:** ${priceWhole}` : '';
67
+ const availLine = availability ? `\n**Availability:** ${availability}` : '';
68
+ const featuresSection = features.length
69
+ ? `\n\n## Features\n\n${features.map(f => `- ${f}`).join('\n')}`
70
+ : '';
71
+ const descSection = description ? `\n\n## Description\n\n${description.substring(0, 1000)}` : '';
72
+ const cleanContent = `# 🛒 ${title}${priceLine}${ratingLine}${availLine}${descSection}${featuresSection}`;
73
+ return { domain: 'amazon.com', type: 'product', structured, cleanContent };
74
+ }
75
+ catch {
76
+ return null;
77
+ }
78
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function arxivExtractor(_html: string, url: string): Promise<DomainExtractResult | null>;
@@ -0,0 +1,137 @@
1
+ import { simpleFetch } from '../../core/fetcher.js';
2
+ import { stripHtml } from './shared.js';
3
+ // ---------------------------------------------------------------------------
4
+ // 7. ArXiv extractor (ArXiv API)
5
+ // ---------------------------------------------------------------------------
6
+ export async function arxivExtractor(_html, url) {
7
+ const urlObj = new URL(url);
8
+ const path = urlObj.pathname;
9
+ // --- Search page: /search/?query=... or /search/?searchtype=all&query=... ---
10
+ if (path.startsWith('/search')) {
11
+ const rawQuery = urlObj.searchParams.get('query') || '';
12
+ if (!rawQuery)
13
+ return null;
14
+ try {
15
+ const searchQuery = encodeURIComponent(`all:${rawQuery}`);
16
+ const apiUrl = `https://export.arxiv.org/api/query?search_query=${searchQuery}&max_results=10&sortBy=relevance`;
17
+ const result = await simpleFetch(apiUrl, 'WebPeel/0.21', 20000, { Accept: 'application/xml' });
18
+ if (!result?.html)
19
+ return null;
20
+ const xml = result.html;
21
+ // Parse total results count from opensearch:totalResults
22
+ const totalMatch = xml.match(/<opensearch:totalResults[^>]*>(\d+)<\/opensearch:totalResults>/);
23
+ const total = totalMatch ? parseInt(totalMatch[1], 10) : 0;
24
+ // Parse all entries
25
+ const entries = [...xml.matchAll(/<entry[\s\S]*?<\/entry>/g)].map(m => m[0]);
26
+ const papers = entries.map(entryXml => {
27
+ const getTag = (tag) => {
28
+ const match = entryXml.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`));
29
+ return match ? stripHtml(match[1]).trim() : '';
30
+ };
31
+ const getAllTags = (tag) => {
32
+ const matches = [...entryXml.matchAll(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`, 'g'))];
33
+ return matches.map(m => stripHtml(m[1]).trim()).filter(Boolean);
34
+ };
35
+ const title = getTag('title');
36
+ const published = getTag('published');
37
+ const authors = getAllTags('name');
38
+ const summary = getTag('summary');
39
+ // Extract arXiv ID from <id> tag
40
+ const idTag = getTag('id');
41
+ const idMatch2 = idTag.match(/abs\/(\d{4}\.\d{4,5}(?:v\d+)?)/);
42
+ const paperId2 = idMatch2 ? idMatch2[1] : '';
43
+ // Categories
44
+ const cats = [...entryXml.matchAll(/category[^>]*term="([^"]+)"/g)].map(m => m[1]);
45
+ return { title, published: published?.split('T')[0], authors, summary, paperId: paperId2, categories: cats };
46
+ }).filter(p => p.title);
47
+ if (papers.length === 0)
48
+ return null;
49
+ const rows = papers.map((p, i) => {
50
+ const authorLine = p.authors.length === 0 ? '—'
51
+ : p.authors.length === 1 ? p.authors[0]
52
+ : `${p.authors[0]} et al.`;
53
+ const pdfLink = p.paperId ? ` [[PDF](https://arxiv.org/pdf/${p.paperId})]` : '';
54
+ return `| ${i + 1} | [${p.title}](https://arxiv.org/abs/${p.paperId}) | ${p.published || '?'} | ${authorLine} |${pdfLink}`;
55
+ }).join('\n');
56
+ const cleanContent = `# 🔍 arXiv Search — "${rawQuery}"\n\n| # | Paper | Published | Authors |\n|---|-------|-----------|--------|\n${rows}\n\n*Source: arXiv API · Total results: ${total.toLocaleString()}*`;
57
+ return {
58
+ domain: 'arxiv.org',
59
+ type: 'search',
60
+ structured: { query: rawQuery, total, papers },
61
+ cleanContent,
62
+ };
63
+ }
64
+ catch (e) {
65
+ if (process.env.DEBUG)
66
+ console.debug('[webpeel]', 'ArXiv search failed:', e instanceof Error ? e.message : e);
67
+ return null;
68
+ }
69
+ }
70
+ // Extract paper ID from URL patterns:
71
+ // /abs/2501.12948, /pdf/2501.12948, /abs/2501.12948v2
72
+ const idMatch = path.match(/\/(abs|pdf|html)\/(\d{4}\.\d{4,5}(?:v\d+)?)/);
73
+ if (!idMatch)
74
+ return null;
75
+ const paperId = idMatch[2];
76
+ try {
77
+ // Use ArXiv API
78
+ const apiUrl = `https://export.arxiv.org/api/query?id_list=${paperId}`;
79
+ const result = await simpleFetch(apiUrl, 'WebPeel/0.17.1', 15000, { Accept: 'application/xml' });
80
+ if (!result?.html)
81
+ return null;
82
+ const xml = result.html;
83
+ // Parse XML (simple regex-based for these known fields)
84
+ const getTag = (tag) => {
85
+ const match = xml.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`));
86
+ return match ? stripHtml(match[1]).trim() : '';
87
+ };
88
+ // getAllTags removed — unused
89
+ // ArXiv Atom feed: <feed><title>query URL</title> ... <entry><title>Paper Title</title>...
90
+ // We must grab the entry title, not the feed title.
91
+ const entryMatch = xml.match(/<entry[\s\S]*?<\/entry>/);
92
+ const entryXml = entryMatch ? entryMatch[0] : xml;
93
+ const getEntryTag = (tag) => {
94
+ const match = entryXml.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`));
95
+ return match ? stripHtml(match[1]).trim() : '';
96
+ };
97
+ const getAllEntryTags = (tag) => {
98
+ const matches = [...entryXml.matchAll(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`, 'g'))];
99
+ return matches.map(m => stripHtml(m[1]).trim()).filter(Boolean);
100
+ };
101
+ const title = getEntryTag('title') || getTag('title');
102
+ const summary = getEntryTag('summary') || getTag('summary');
103
+ const published = getEntryTag('published') || getTag('published');
104
+ const updated = getEntryTag('updated') || getTag('updated');
105
+ const authors = getAllEntryTags('name');
106
+ // Extract categories
107
+ const categories = [...xml.matchAll(/category[^>]*term="([^"]+)"/g)].map(m => m[1]);
108
+ // Extract DOI and journal ref if available
109
+ const doi = getTag('arxiv:doi');
110
+ const journalRef = getTag('arxiv:journal_ref');
111
+ if (!title)
112
+ return null;
113
+ const structured = {
114
+ title,
115
+ authors,
116
+ abstract: summary,
117
+ published: published || undefined,
118
+ updated: updated || undefined,
119
+ categories,
120
+ doi: doi || undefined,
121
+ journalRef: journalRef || undefined,
122
+ paperId,
123
+ pdfUrl: `https://arxiv.org/pdf/${paperId}`,
124
+ absUrl: `https://arxiv.org/abs/${paperId}`,
125
+ };
126
+ const authorLine = authors.length <= 5
127
+ ? authors.join(', ')
128
+ : `${authors.slice(0, 5).join(', ')} et al. (${authors.length} authors)`;
129
+ const cleanContent = `# 📄 arXiv: ${title} (${paperId})\n\n**Authors:** ${authorLine}\n**Submitted:** ${published?.split('T')[0] || 'N/A'}${categories.length ? `\n**Categories:** ${categories.join(', ')}` : ''}${doi ? `\n**DOI:** ${doi}` : ''}${journalRef ? `\n**Journal:** ${journalRef}` : ''}\n\n## Abstract\n\n${summary}\n\n**PDF:** [Download](${structured.pdfUrl}) | **HTML:** [View](https://arxiv.org/html/${paperId})`;
130
+ return { domain: 'arxiv.org', type: 'paper', structured, cleanContent };
131
+ }
132
+ catch (e) {
133
+ if (process.env.DEBUG)
134
+ console.debug('[webpeel]', 'ArXiv API failed:', e instanceof Error ? e.message : e);
135
+ return null;
136
+ }
137
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function bestBuyExtractor(_html: string, url: string): Promise<DomainExtractResult | null>;
@@ -0,0 +1,78 @@
1
+ import { fetchJson } from './shared.js';
2
+ // ---------------------------------------------------------------------------
3
+ // 10. Best Buy extractor (Best Buy Products API)
4
+ // ---------------------------------------------------------------------------
5
+ export async function bestBuyExtractor(_html, url) {
6
+ const apiKey = process.env.BESTBUY_API_KEY;
7
+ if (!apiKey)
8
+ return null; // No API key, skip
9
+ // Extract SKU from URL: /site/.../6587822.p → 6587822
10
+ const skuMatch = url.match(/\/(\d{7,})\.p/);
11
+ if (!skuMatch)
12
+ return null;
13
+ const sku = skuMatch[1];
14
+ const apiUrl = `https://api.bestbuy.com/v1/products/${sku}.json?apiKey=${apiKey}&show=sku,name,salePrice,regularPrice,onSale,shortDescription,longDescription,image,largeFrontImage,url,customerReviewAverage,customerReviewCount,categoryPath,manufacturer,modelNumber,upc,freeShipping,inStoreAvailability,onlineAvailability,condition,features.feature`;
15
+ try {
16
+ const data = await fetchJson(apiUrl);
17
+ if (!data || data.error)
18
+ return null;
19
+ // Build clean markdown
20
+ const lines = [];
21
+ lines.push(`# ${data.name}`);
22
+ lines.push('');
23
+ if (data.onSale) {
24
+ lines.push(`**Sale Price:** $${data.salePrice} (was $${data.regularPrice})`);
25
+ }
26
+ else {
27
+ lines.push(`**Price:** $${data.regularPrice}`);
28
+ }
29
+ lines.push(`**SKU:** ${data.sku}`);
30
+ if (data.manufacturer)
31
+ lines.push(`**Brand:** ${data.manufacturer}`);
32
+ if (data.modelNumber)
33
+ lines.push(`**Model:** ${data.modelNumber}`);
34
+ if (data.customerReviewAverage) {
35
+ lines.push(`**Rating:** ${data.customerReviewAverage}/5 (${data.customerReviewCount} reviews)`);
36
+ }
37
+ lines.push(`**Availability:** ${data.onlineAvailability ? 'In Stock Online' : 'Out of Stock Online'} | ${data.inStoreAvailability ? 'Available In Store' : 'Not Available In Store'}`);
38
+ if (data.freeShipping)
39
+ lines.push('**Free Shipping:** Yes');
40
+ lines.push('');
41
+ if (data.shortDescription)
42
+ lines.push(data.shortDescription);
43
+ lines.push('');
44
+ if (data.longDescription)
45
+ lines.push(data.longDescription);
46
+ if (data.features?.feature) {
47
+ lines.push('');
48
+ lines.push('## Features');
49
+ for (const f of data.features.feature) {
50
+ lines.push(`- ${f}`);
51
+ }
52
+ }
53
+ const structured = {
54
+ sku: data.sku,
55
+ name: data.name,
56
+ price: data.salePrice || data.regularPrice,
57
+ regularPrice: data.regularPrice,
58
+ onSale: data.onSale,
59
+ brand: data.manufacturer,
60
+ model: data.modelNumber,
61
+ upc: data.upc,
62
+ rating: data.customerReviewAverage,
63
+ reviewCount: data.customerReviewCount,
64
+ image: data.largeFrontImage || data.image,
65
+ url: data.url,
66
+ inStock: data.onlineAvailability,
67
+ freeShipping: data.freeShipping,
68
+ condition: data.condition,
69
+ category: data.categoryPath?.map((c) => c.name).join(' > '),
70
+ };
71
+ return { domain: 'bestbuy.com', type: 'product', structured, cleanContent: lines.join('\n') };
72
+ }
73
+ catch (e) {
74
+ if (process.env.DEBUG)
75
+ console.debug('[webpeel]', 'Best Buy API failed:', e instanceof Error ? e.message : e);
76
+ return null;
77
+ }
78
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function carsComExtractor(html: string, url: string): Promise<DomainExtractResult | null>;