@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,395 @@
1
+ /**
2
+ * Engine Quality-Ranked Fallback System
3
+ *
4
+ * Ranks extraction strategies by quality for a given URL, producing a
5
+ * dynamic fallback chain. Inspired by Firecrawl's engine cascade approach
6
+ * but tailored to WebPeel's architecture.
7
+ *
8
+ * Usage:
9
+ * ```ts
10
+ * import { buildFallbackChain } from './engine-ranker.js';
11
+ * const chain = buildFallbackChain('https://twitter.com/user', { render: true });
12
+ * // Returns engines sorted by quality, with domain-specific adjustments
13
+ * ```
14
+ *
15
+ * @module engine-ranker
16
+ */
17
+ // ---------------------------------------------------------------------------
18
+ // Default engine configurations
19
+ // ---------------------------------------------------------------------------
20
+ /**
21
+ * Baseline configuration for each engine type.
22
+ *
23
+ * Quality reflects extraction fidelity (not speed or cost):
24
+ * - domain-api: Best quality — structured data direct from source APIs
25
+ * - browser: High quality — full JS rendering captures dynamic content
26
+ * - stealth: Good quality — same as browser but with anti-bot bypass
27
+ * - cloaked: Decent quality — heavy stealth, sometimes degrades content
28
+ * - simple: Moderate — plain HTTP fetch, misses JS-rendered content
29
+ * - search-fallback: Low — cached/snippet data from search engine caches
30
+ */
31
+ const ENGINE_DEFAULTS = {
32
+ 'domain-api': {
33
+ type: 'domain-api',
34
+ quality: 95,
35
+ speed: 95,
36
+ cost: 5,
37
+ maxTimeoutMs: 5000,
38
+ features: { javascript: false, antibot: false, screenshots: false, stealth: false },
39
+ },
40
+ 'simple': {
41
+ type: 'simple',
42
+ quality: 70,
43
+ speed: 90,
44
+ cost: 10,
45
+ maxTimeoutMs: 8000,
46
+ features: { javascript: false, antibot: false, screenshots: false, stealth: false },
47
+ },
48
+ 'browser': {
49
+ type: 'browser',
50
+ quality: 85,
51
+ speed: 40,
52
+ cost: 60,
53
+ maxTimeoutMs: 15000,
54
+ features: { javascript: true, antibot: false, screenshots: true, stealth: false },
55
+ },
56
+ 'stealth': {
57
+ type: 'stealth',
58
+ quality: 80,
59
+ speed: 30,
60
+ cost: 80,
61
+ maxTimeoutMs: 20000,
62
+ features: { javascript: true, antibot: true, screenshots: true, stealth: true },
63
+ },
64
+ 'cloaked': {
65
+ type: 'cloaked',
66
+ quality: 75,
67
+ speed: 20,
68
+ cost: 90,
69
+ maxTimeoutMs: 25000,
70
+ features: { javascript: true, antibot: true, screenshots: true, stealth: true },
71
+ },
72
+ 'search-fallback': {
73
+ type: 'search-fallback',
74
+ quality: 40,
75
+ speed: 50,
76
+ cost: 30,
77
+ maxTimeoutMs: 10000,
78
+ features: { javascript: false, antibot: false, screenshots: false, stealth: false },
79
+ },
80
+ };
81
+ /**
82
+ * Domain rules that adjust engine scores for known site categories.
83
+ *
84
+ * Patterns use suffix matching: "twitter.com" matches both "twitter.com"
85
+ * and "www.twitter.com" but not "nottwitter.com".
86
+ */
87
+ const DOMAIN_RULES = [
88
+ // ── Social media: heavy JS, aggressive anti-bot ──────────────────────
89
+ {
90
+ pattern: 'twitter.com',
91
+ overrides: {
92
+ 'simple': { quality: 20, speed: 95 },
93
+ 'stealth': { quality: 90 },
94
+ 'cloaked': { quality: 85 },
95
+ 'browser': { quality: 80 },
96
+ },
97
+ },
98
+ {
99
+ pattern: 'x.com',
100
+ overrides: {
101
+ 'simple': { quality: 20, speed: 95 },
102
+ 'stealth': { quality: 90 },
103
+ 'cloaked': { quality: 85 },
104
+ 'browser': { quality: 80 },
105
+ },
106
+ },
107
+ {
108
+ pattern: 'instagram.com',
109
+ overrides: {
110
+ 'simple': { quality: 15 },
111
+ 'stealth': { quality: 90 },
112
+ 'cloaked': { quality: 88 },
113
+ 'browser': { quality: 75 },
114
+ },
115
+ },
116
+ {
117
+ pattern: 'tiktok.com',
118
+ overrides: {
119
+ 'simple': { quality: 15 },
120
+ 'stealth': { quality: 90 },
121
+ 'cloaked': { quality: 88 },
122
+ 'browser': { quality: 70 },
123
+ },
124
+ },
125
+ {
126
+ pattern: 'facebook.com',
127
+ overrides: {
128
+ 'simple': { quality: 20 },
129
+ 'stealth': { quality: 88 },
130
+ 'cloaked': { quality: 85 },
131
+ },
132
+ },
133
+ {
134
+ pattern: 'linkedin.com',
135
+ overrides: {
136
+ 'simple': { quality: 25 },
137
+ 'stealth': { quality: 88 },
138
+ 'browser': { quality: 78 },
139
+ },
140
+ },
141
+ {
142
+ pattern: 'reddit.com',
143
+ overrides: {
144
+ 'simple': { quality: 30 },
145
+ 'browser': { quality: 88 },
146
+ 'stealth': { quality: 85 },
147
+ },
148
+ },
149
+ {
150
+ pattern: 'threads.net',
151
+ overrides: {
152
+ 'simple': { quality: 15 },
153
+ 'stealth': { quality: 90 },
154
+ 'cloaked': { quality: 85 },
155
+ },
156
+ },
157
+ // ── SPA-heavy / JS-rendered sites ────────────────────────────────────
158
+ {
159
+ pattern: 'vercel.app',
160
+ overrides: {
161
+ 'browser': { quality: 90 },
162
+ 'simple': { quality: 50 },
163
+ },
164
+ },
165
+ {
166
+ pattern: 'netlify.app',
167
+ overrides: {
168
+ 'browser': { quality: 90 },
169
+ 'simple': { quality: 50 },
170
+ },
171
+ },
172
+ {
173
+ pattern: 'notion.so',
174
+ overrides: {
175
+ 'browser': { quality: 92 },
176
+ 'simple': { quality: 20 },
177
+ },
178
+ },
179
+ {
180
+ pattern: 'figma.com',
181
+ overrides: {
182
+ 'browser': { quality: 90 },
183
+ 'simple': { quality: 15 },
184
+ },
185
+ },
186
+ // ── Static / well-structured sites ───────────────────────────────────
187
+ {
188
+ pattern: 'wikipedia.org',
189
+ overrides: {
190
+ 'simple': { quality: 92 },
191
+ 'browser': { quality: 80, cost: 70 },
192
+ },
193
+ },
194
+ {
195
+ pattern: 'github.com',
196
+ overrides: {
197
+ 'simple': { quality: 85 },
198
+ 'browser': { quality: 78, cost: 65 },
199
+ },
200
+ },
201
+ {
202
+ pattern: 'stackoverflow.com',
203
+ overrides: {
204
+ 'simple': { quality: 88 },
205
+ 'browser': { quality: 78 },
206
+ },
207
+ },
208
+ {
209
+ pattern: 'docs.python.org',
210
+ overrides: {
211
+ 'simple': { quality: 90 },
212
+ },
213
+ },
214
+ {
215
+ pattern: 'developer.mozilla.org',
216
+ overrides: {
217
+ 'simple': { quality: 90 },
218
+ },
219
+ },
220
+ {
221
+ pattern: 'news.ycombinator.com',
222
+ overrides: {
223
+ 'simple': { quality: 92 },
224
+ 'browser': { quality: 75 },
225
+ },
226
+ },
227
+ // ── Known-blocked / aggressive anti-bot ──────────────────────────────
228
+ {
229
+ pattern: 'zillow.com',
230
+ overrides: {
231
+ 'simple': { quality: 10 },
232
+ 'browser': { quality: 50 },
233
+ 'cloaked': { quality: 90 },
234
+ 'stealth': { quality: 85 },
235
+ },
236
+ },
237
+ {
238
+ pattern: 'yelp.com',
239
+ overrides: {
240
+ 'simple': { quality: 15 },
241
+ 'cloaked': { quality: 88 },
242
+ 'stealth': { quality: 82 },
243
+ },
244
+ },
245
+ {
246
+ pattern: 'pinterest.com',
247
+ overrides: {
248
+ 'simple': { quality: 15 },
249
+ 'cloaked': { quality: 88 },
250
+ 'stealth': { quality: 85 },
251
+ },
252
+ },
253
+ {
254
+ pattern: 'ticketmaster.com',
255
+ overrides: {
256
+ 'simple': { quality: 10 },
257
+ 'cloaked': { quality: 90 },
258
+ 'stealth': { quality: 82 },
259
+ },
260
+ },
261
+ ];
262
+ /**
263
+ * Returns domain-specific engine config overrides for a given hostname.
264
+ *
265
+ * Matches against known domain patterns using suffix matching.
266
+ * A pattern "twitter.com" matches hostnames "twitter.com", "www.twitter.com",
267
+ * "mobile.twitter.com", etc.
268
+ *
269
+ * @param hostname - The hostname to look up (e.g. "www.twitter.com")
270
+ * @returns Partial config overrides keyed by engine type, or an empty object
271
+ */
272
+ export function getDomainOverrides(hostname) {
273
+ const lower = hostname.toLowerCase();
274
+ const merged = {};
275
+ for (const rule of DOMAIN_RULES) {
276
+ if (lower === rule.pattern || lower.endsWith(`.${rule.pattern}`)) {
277
+ // Merge overrides — last match wins for conflicting fields
278
+ for (const [engineKey, overrideValue] of Object.entries(rule.overrides)) {
279
+ const engine = engineKey;
280
+ merged[engine] = { ...merged[engine], ...overrideValue };
281
+ }
282
+ }
283
+ }
284
+ return merged;
285
+ }
286
+ // ---------------------------------------------------------------------------
287
+ // Fallback chain builder
288
+ // ---------------------------------------------------------------------------
289
+ /**
290
+ * Deep-clones an EngineConfig and merges partial overrides onto it.
291
+ */
292
+ function applyOverrides(base, overrides) {
293
+ return {
294
+ ...base,
295
+ ...overrides,
296
+ features: {
297
+ ...base.features,
298
+ ...(overrides.features ?? {}),
299
+ },
300
+ // Ensure type is always preserved from base
301
+ type: base.type,
302
+ };
303
+ }
304
+ /**
305
+ * Builds an ordered fallback chain of extraction engines for a given URL.
306
+ *
307
+ * The chain is constructed by:
308
+ * 1. Starting with default engine configurations
309
+ * 2. Applying domain-specific quality/score overrides
310
+ * 3. Filtering engines based on the provided options
311
+ * 4. Sorting by quality descending (ties broken by speed descending)
312
+ *
313
+ * @param url - The target URL to build a fallback chain for
314
+ * @param options - Controls which engines are eligible
315
+ * @returns Ordered array of engine entries, highest quality first
316
+ *
317
+ * @example
318
+ * ```ts
319
+ * // Basic chain for a static site
320
+ * const chain = buildFallbackChain('https://wikipedia.org/wiki/Test');
321
+ * // → [domain-api, simple, browser, stealth, cloaked, search-fallback]
322
+ *
323
+ * // Chain for a social media URL with rendering
324
+ * const chain = buildFallbackChain('https://twitter.com/user', { render: true });
325
+ * // → [domain-api, stealth, cloaked, browser, simple, search-fallback]
326
+ *
327
+ * // No browser rendering, no domain API
328
+ * const chain = buildFallbackChain('https://example.com', {
329
+ * render: false,
330
+ * noDomainApi: true,
331
+ * });
332
+ * // → [simple, search-fallback]
333
+ * ```
334
+ */
335
+ export function buildFallbackChain(url, options = {}) {
336
+ const { render, stealth, noDomainApi } = options;
337
+ // 1. Parse hostname for domain overrides
338
+ let hostname = '';
339
+ try {
340
+ hostname = new URL(url).hostname;
341
+ }
342
+ catch {
343
+ // Invalid URL — proceed with no domain overrides
344
+ }
345
+ const domainOverrides = hostname ? getDomainOverrides(hostname) : {};
346
+ // 2. Build full config for each engine (base + domain overrides)
347
+ const allEngines = Object.keys(ENGINE_DEFAULTS);
348
+ const configs = allEngines.map((engineType) => {
349
+ const base = { ...ENGINE_DEFAULTS[engineType] };
350
+ const override = domainOverrides[engineType];
351
+ const config = override ? applyOverrides(base, override) : { ...base };
352
+ return { engine: engineType, config };
353
+ });
354
+ // 3. Filter engines based on options
355
+ const filtered = configs.filter(({ engine, config }) => {
356
+ // Remove domain-api if explicitly excluded
357
+ if (noDomainApi && engine === 'domain-api')
358
+ return false;
359
+ // When render is explicitly false, remove engines that require a browser
360
+ // UNLESS stealth is explicitly requested
361
+ if (render === false) {
362
+ if (config.features.javascript) {
363
+ // Keep stealth/cloaked engines if stealth was explicitly requested
364
+ if (stealth && config.features.stealth)
365
+ return true;
366
+ return false;
367
+ }
368
+ }
369
+ return true;
370
+ });
371
+ // 4. Sort by quality descending, tie-break by speed descending
372
+ filtered.sort((a, b) => {
373
+ const qualityDiff = b.config.quality - a.config.quality;
374
+ if (qualityDiff !== 0)
375
+ return qualityDiff;
376
+ return b.config.speed - a.config.speed;
377
+ });
378
+ return filtered;
379
+ }
380
+ /**
381
+ * Returns the default engine configuration for a given engine type.
382
+ * Useful for inspecting baseline values without domain overrides.
383
+ *
384
+ * @param type - The engine type to look up
385
+ * @returns A copy of the default EngineConfig
386
+ */
387
+ export function getEngineDefaults(type) {
388
+ return { ...ENGINE_DEFAULTS[type], features: { ...ENGINE_DEFAULTS[type].features } };
389
+ }
390
+ /**
391
+ * Returns all available engine types.
392
+ */
393
+ export function getAvailableEngines() {
394
+ return Object.keys(ENGINE_DEFAULTS);
395
+ }
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Inline structured extraction using BYOK LLM
3
+ *
4
+ * After fetching page content, pass it + a JSON schema + optional prompt
5
+ * to an LLM and get back structured JSON matching the schema.
6
+ *
7
+ * Supports OpenAI, Anthropic, and Google (same BYOK pattern as /v1/answer).
8
+ */
9
+ export type LLMProvider = 'openai' | 'anthropic' | 'google';
10
+ export interface InlineExtractOptions {
11
+ /** JSON Schema describing the desired output structure */
12
+ schema?: Record<string, any>;
13
+ /** Natural language prompt describing what to extract */
14
+ prompt?: string;
15
+ /** LLM provider (required) */
16
+ llmProvider: LLMProvider;
17
+ /** LLM API key — BYOK (required) */
18
+ llmApiKey: string;
19
+ /** LLM model name (optional — uses provider default) */
20
+ llmModel?: string;
21
+ }
22
+ export interface InlineExtractResult {
23
+ /** Extracted structured data */
24
+ data: Record<string, any>;
25
+ /** Tokens consumed */
26
+ tokensUsed: {
27
+ input: number;
28
+ output: number;
29
+ };
30
+ }
31
+ /**
32
+ * Extract structured JSON from page content using an LLM (BYOK).
33
+ *
34
+ * @param content - Page content (markdown or text)
35
+ * @param options - Extraction options including schema, prompt, and LLM credentials
36
+ * @returns Extracted structured data + token usage
37
+ */
38
+ export declare function extractInlineJson(content: string, options: InlineExtractOptions): Promise<InlineExtractResult>;
@@ -0,0 +1,215 @@
1
+ /**
2
+ * Inline structured extraction using BYOK LLM
3
+ *
4
+ * After fetching page content, pass it + a JSON schema + optional prompt
5
+ * to an LLM and get back structured JSON matching the schema.
6
+ *
7
+ * Supports OpenAI, Anthropic, and Google (same BYOK pattern as /v1/answer).
8
+ */
9
+ function defaultModel(provider) {
10
+ switch (provider) {
11
+ case 'openai':
12
+ return 'gpt-4o-mini';
13
+ case 'anthropic':
14
+ return 'claude-3-5-sonnet-latest';
15
+ case 'google':
16
+ return 'gemini-1.5-flash';
17
+ }
18
+ }
19
+ function buildSystemPrompt(schema, prompt) {
20
+ const parts = [
21
+ 'You are a structured data extraction assistant.',
22
+ 'Extract data from the provided web page content and return ONLY valid JSON — no markdown fences, no explanation, no extra text.',
23
+ ];
24
+ if (prompt) {
25
+ parts.push(`\nInstruction: ${prompt}`);
26
+ }
27
+ if (schema) {
28
+ parts.push(`\nReturn a JSON object that conforms to this JSON Schema:\n${JSON.stringify(schema, null, 2)}`);
29
+ }
30
+ parts.push('\nReturn ONLY the JSON object.');
31
+ return parts.join('\n');
32
+ }
33
+ function truncateContent(content, maxChars = 24_000) {
34
+ if (content.length <= maxChars)
35
+ return content;
36
+ return content.slice(0, maxChars) + '\n\n[Content truncated]';
37
+ }
38
+ function parseJsonResponse(text) {
39
+ // Try direct parse first
40
+ try {
41
+ return JSON.parse(text);
42
+ }
43
+ catch {
44
+ // Strip markdown code fences if present
45
+ const fenceMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
46
+ if (fenceMatch) {
47
+ try {
48
+ return JSON.parse(fenceMatch[1].trim());
49
+ }
50
+ catch (e) {
51
+ if (process.env.DEBUG)
52
+ console.debug('[webpeel]', 'fence json parse failed:', e instanceof Error ? e.message : e);
53
+ }
54
+ }
55
+ // Try to find the first { ... } block
56
+ const braceStart = text.indexOf('{');
57
+ const braceEnd = text.lastIndexOf('}');
58
+ if (braceStart !== -1 && braceEnd > braceStart) {
59
+ try {
60
+ return JSON.parse(text.slice(braceStart, braceEnd + 1));
61
+ }
62
+ catch (e) {
63
+ if (process.env.DEBUG)
64
+ console.debug('[webpeel]', 'brace json parse failed:', e instanceof Error ? e.message : e);
65
+ }
66
+ }
67
+ throw new Error(`LLM returned invalid JSON: ${text.slice(0, 300)}`);
68
+ }
69
+ }
70
+ // ---------------------------------------------------------------------------
71
+ // Provider-specific calls (mirrors core/answer.ts patterns)
72
+ // ---------------------------------------------------------------------------
73
+ async function callOpenAI(apiKey, model, systemPrompt, userContent) {
74
+ const resp = await fetch('https://api.openai.com/v1/chat/completions', {
75
+ method: 'POST',
76
+ headers: {
77
+ 'Content-Type': 'application/json',
78
+ Authorization: `Bearer ${apiKey}`,
79
+ },
80
+ body: JSON.stringify({
81
+ model,
82
+ messages: [
83
+ { role: 'system', content: systemPrompt },
84
+ { role: 'user', content: userContent },
85
+ ],
86
+ temperature: 0,
87
+ response_format: { type: 'json_object' },
88
+ }),
89
+ });
90
+ if (!resp.ok) {
91
+ const errText = await resp.text().catch(() => '');
92
+ throw new Error(`OpenAI API error: HTTP ${resp.status}${errText ? ` - ${errText}` : ''}`);
93
+ }
94
+ const json = (await resp.json());
95
+ return {
96
+ text: String(json?.choices?.[0]?.message?.content || '').trim(),
97
+ usage: {
98
+ input: Number(json?.usage?.prompt_tokens || 0),
99
+ output: Number(json?.usage?.completion_tokens || 0),
100
+ },
101
+ };
102
+ }
103
+ async function callAnthropic(apiKey, model, systemPrompt, userContent) {
104
+ const resp = await fetch('https://api.anthropic.com/v1/messages', {
105
+ method: 'POST',
106
+ headers: {
107
+ 'Content-Type': 'application/json',
108
+ 'x-api-key': apiKey,
109
+ 'anthropic-version': '2023-06-01',
110
+ },
111
+ body: JSON.stringify({
112
+ model,
113
+ system: systemPrompt,
114
+ messages: [{ role: 'user', content: userContent }],
115
+ max_tokens: 4096,
116
+ temperature: 0,
117
+ }),
118
+ });
119
+ if (!resp.ok) {
120
+ const errText = await resp.text().catch(() => '');
121
+ throw new Error(`Anthropic API error: HTTP ${resp.status}${errText ? ` - ${errText}` : ''}`);
122
+ }
123
+ const json = (await resp.json());
124
+ const blocks = Array.isArray(json?.content) ? json.content : [];
125
+ const text = blocks
126
+ .map((b) => (typeof b?.text === 'string' ? b.text : ''))
127
+ .join('')
128
+ .trim();
129
+ return {
130
+ text,
131
+ usage: {
132
+ input: Number(json?.usage?.input_tokens || 0),
133
+ output: Number(json?.usage?.output_tokens || 0),
134
+ },
135
+ };
136
+ }
137
+ async function callGoogle(apiKey, model, systemPrompt, userContent) {
138
+ const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(model)}:generateContent?key=${encodeURIComponent(apiKey)}`;
139
+ const resp = await fetch(url, {
140
+ method: 'POST',
141
+ headers: { 'Content-Type': 'application/json' },
142
+ body: JSON.stringify({
143
+ contents: [
144
+ {
145
+ role: 'user',
146
+ parts: [{ text: `${systemPrompt}\n\n${userContent}` }],
147
+ },
148
+ ],
149
+ generationConfig: {
150
+ temperature: 0,
151
+ responseMimeType: 'application/json',
152
+ },
153
+ }),
154
+ });
155
+ if (!resp.ok) {
156
+ const errText = await resp.text().catch(() => '');
157
+ throw new Error(`Google API error: HTTP ${resp.status}${errText ? ` - ${errText}` : ''}`);
158
+ }
159
+ const json = (await resp.json());
160
+ const parts = json?.candidates?.[0]?.content?.parts;
161
+ const text = Array.isArray(parts)
162
+ ? parts.map((p) => (typeof p?.text === 'string' ? p.text : '')).join('')
163
+ : '';
164
+ return {
165
+ text: String(text || '').trim(),
166
+ usage: {
167
+ input: Number(json?.usageMetadata?.promptTokenCount || 0),
168
+ output: Number(json?.usageMetadata?.candidatesTokenCount || 0),
169
+ },
170
+ };
171
+ }
172
+ // ---------------------------------------------------------------------------
173
+ // Public API
174
+ // ---------------------------------------------------------------------------
175
+ /**
176
+ * Extract structured JSON from page content using an LLM (BYOK).
177
+ *
178
+ * @param content - Page content (markdown or text)
179
+ * @param options - Extraction options including schema, prompt, and LLM credentials
180
+ * @returns Extracted structured data + token usage
181
+ */
182
+ export async function extractInlineJson(content, options) {
183
+ const { schema, prompt, llmProvider, llmApiKey, llmModel } = options;
184
+ if (!llmApiKey) {
185
+ throw new Error('Inline extraction requires "llmApiKey" (BYOK)');
186
+ }
187
+ if (!llmProvider) {
188
+ throw new Error('Inline extraction requires "llmProvider" (openai, anthropic, or google)');
189
+ }
190
+ if (!schema && !prompt) {
191
+ throw new Error('Inline extraction requires "schema" or "prompt" (or both)');
192
+ }
193
+ const model = (llmModel || '').trim() || defaultModel(llmProvider);
194
+ const systemPrompt = buildSystemPrompt(schema, prompt);
195
+ const userContent = truncateContent(content);
196
+ let result;
197
+ switch (llmProvider) {
198
+ case 'openai':
199
+ result = await callOpenAI(llmApiKey, model, systemPrompt, userContent);
200
+ break;
201
+ case 'anthropic':
202
+ result = await callAnthropic(llmApiKey, model, systemPrompt, userContent);
203
+ break;
204
+ case 'google':
205
+ result = await callGoogle(llmApiKey, model, systemPrompt, userContent);
206
+ break;
207
+ default:
208
+ throw new Error(`Unsupported llmProvider: ${llmProvider}`);
209
+ }
210
+ const data = parseJsonResponse(result.text);
211
+ return {
212
+ data,
213
+ tokensUsed: result.usage,
214
+ };
215
+ }
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Auto-extract repeated listing patterns from HTML pages.
3
+ *
4
+ * Given raw HTML (e.g. an eBay search results page), this module detects the
5
+ * largest group of sibling elements with a consistent internal structure and
6
+ * extracts structured fields (title, price, image, link, description, rating)
7
+ * from each item.
8
+ *
9
+ * @module extract-listings
10
+ */
11
+ /** A single extracted listing item. */
12
+ export interface ListingItem {
13
+ title?: string;
14
+ price?: string;
15
+ image?: string;
16
+ link?: string;
17
+ description?: string;
18
+ rating?: string;
19
+ [key: string]: string | undefined;
20
+ }
21
+ /**
22
+ * Automatically detect repeated listing patterns in raw HTML and extract
23
+ * structured items.
24
+ *
25
+ * @param html - Raw HTML string to parse.
26
+ * @param url - Optional base URL for resolving relative links and images.
27
+ * @returns Array of extracted listing items (may be empty).
28
+ *
29
+ * @example
30
+ * ```typescript
31
+ * import { extractListings } from 'webpeel';
32
+ *
33
+ * const items = extractListings(ebayHtml, 'https://ebay.com/sch?q=card');
34
+ * console.log(items[0].title); // "Charizard VMAX 020/189"
35
+ * console.log(items[0].price); // "$24.99"
36
+ * ```
37
+ */
38
+ export declare function extractListings(html: string, url?: string): ListingItem[];