@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,507 @@
1
+ /**
2
+ * LLM-based extraction: sends markdown/text content to an LLM
3
+ * with instructions to extract structured data.
4
+ *
5
+ * Supports:
6
+ * - OpenAI-compatible APIs (OpenAI, custom models via baseUrl)
7
+ * - Anthropic (Claude Haiku, Sonnet, Opus)
8
+ * - Google (Gemini Flash, Pro)
9
+ */
10
+ /** Default models per provider (cheapest/fastest) */
11
+ export const DEFAULT_PROVIDER_MODELS = {
12
+ openai: 'gpt-4o-mini',
13
+ anthropic: 'claude-haiku-4-5',
14
+ google: 'gemini-2.0-flash',
15
+ };
16
+ // Cost per 1M tokens (input, output) for known models
17
+ const MODEL_COSTS = {
18
+ 'gpt-4o-mini': [0.15, 0.60],
19
+ 'gpt-4o': [2.50, 10.0],
20
+ };
21
+ const GENERIC_SYSTEM_PROMPT = `You are a data extraction assistant. Extract structured data from the provided web content.
22
+ Return a JSON array of objects. Each object represents one item/listing found on the page.
23
+ Always include these fields when available: title, price, link, rating, description, image.
24
+ If the user provides additional instructions, follow them.
25
+ Return ONLY valid JSON — no markdown, no explanation, just the array.`;
26
+ const SCHEMA_SYSTEM_PROMPT = `You are a data extraction assistant. Extract structured data from the web content below.
27
+ Return a JSON object that EXACTLY matches the provided schema structure.
28
+ Fill in the values from the page content. Use null for fields you can't find.
29
+ Return ONLY valid JSON matching the schema — no markdown, no explanation.`;
30
+ /**
31
+ * Detect if schema is a "full" JSON Schema (has type:"object" and properties).
32
+ */
33
+ export function isFullJsonSchema(schema) {
34
+ const s = schema;
35
+ return s['type'] === 'object' && typeof s['properties'] === 'object';
36
+ }
37
+ /**
38
+ * Convert a simple example object to a proper JSON Schema.
39
+ *
40
+ * Supports:
41
+ * - Primitive values: "" → { type: "string" }, 0 → { type: "number" }
42
+ * - Arrays of objects: [{name:"", price:""}] → { type: "array", items: { type: "object", properties: {...} } }
43
+ * - Nested objects
44
+ */
45
+ export function convertSimpleToJsonSchema(example) {
46
+ return buildSchemaFromValue(example);
47
+ }
48
+ function buildSchemaFromValue(value) {
49
+ if (value === null || value === undefined) {
50
+ return { type: 'string' };
51
+ }
52
+ if (typeof value === 'string') {
53
+ return { type: 'string' };
54
+ }
55
+ if (typeof value === 'number') {
56
+ return Number.isInteger(value) ? { type: 'integer' } : { type: 'number' };
57
+ }
58
+ if (typeof value === 'boolean') {
59
+ return { type: 'boolean' };
60
+ }
61
+ if (Array.isArray(value)) {
62
+ if (value.length === 0) {
63
+ return { type: 'array', items: {} };
64
+ }
65
+ // Use the first element as the template for item schema
66
+ const itemSchema = buildSchemaFromValue(value[0]);
67
+ return { type: 'array', items: itemSchema };
68
+ }
69
+ if (typeof value === 'object') {
70
+ const obj = value;
71
+ const properties = {};
72
+ for (const [key, val] of Object.entries(obj)) {
73
+ properties[key] = buildSchemaFromValue(val);
74
+ }
75
+ return {
76
+ type: 'object',
77
+ properties,
78
+ };
79
+ }
80
+ return { type: 'string' };
81
+ }
82
+ /**
83
+ * Build the user message from content + optional instruction + optional schema.
84
+ */
85
+ export function buildUserMessage(content, instruction, schema) {
86
+ // Truncate content if over 100K chars
87
+ const truncated = content.length > 100_000 ? content.slice(0, 50_000) : content;
88
+ let msg = `Here is the web content to extract data from:\n\n${truncated}`;
89
+ if (schema) {
90
+ msg += `\n\nExtract data matching this schema: ${JSON.stringify(schema, null, 2)}`;
91
+ }
92
+ if (instruction) {
93
+ msg += `\n\nAdditional instruction: ${instruction}`;
94
+ }
95
+ return msg;
96
+ }
97
+ /**
98
+ * Calculate estimated cost in USD for a given model and token counts.
99
+ */
100
+ export function estimateCost(model, inputTokens, outputTokens) {
101
+ // Normalize model key (strip version suffixes like -2024-11-20 for matching)
102
+ const key = Object.keys(MODEL_COSTS).find(k => model.startsWith(k) || model === k);
103
+ if (!key)
104
+ return undefined;
105
+ const [inputRate, outputRate] = MODEL_COSTS[key];
106
+ return (inputTokens / 1_000_000) * inputRate + (outputTokens / 1_000_000) * outputRate;
107
+ }
108
+ /**
109
+ * Parse the LLM response text into an items array.
110
+ * Handles both `{ "items": [...] }` and `[...]` formats.
111
+ * When a schema is provided, also handles single-object responses.
112
+ */
113
+ export function parseItems(text, _schema) {
114
+ const trimmed = text.trim();
115
+ // Try to parse as-is first
116
+ let parsed;
117
+ try {
118
+ parsed = JSON.parse(trimmed);
119
+ }
120
+ catch {
121
+ // Try to extract JSON from the text (sometimes LLMs add preamble despite instructions)
122
+ const arrayMatch = trimmed.match(/\[[\s\S]*\]/);
123
+ const objMatch = trimmed.match(/\{[\s\S]*\}/);
124
+ if (arrayMatch) {
125
+ try {
126
+ parsed = JSON.parse(arrayMatch[0]);
127
+ }
128
+ catch { /* fall through */ }
129
+ }
130
+ else if (objMatch) {
131
+ try {
132
+ parsed = JSON.parse(objMatch[0]);
133
+ }
134
+ catch { /* fall through */ }
135
+ }
136
+ if (parsed === undefined) {
137
+ throw new Error(`Failed to parse LLM response as JSON: ${trimmed.slice(0, 200)}`);
138
+ }
139
+ }
140
+ // Handle { items: [...] } or { data: [...] } or { results: [...] }
141
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
142
+ const obj = parsed;
143
+ if (Array.isArray(obj['items']))
144
+ return obj['items'];
145
+ if (Array.isArray(obj['data']))
146
+ return obj['data'];
147
+ if (Array.isArray(obj['results']))
148
+ return obj['results'];
149
+ // Single object — wrap in array
150
+ return [obj];
151
+ }
152
+ // Handle bare array
153
+ if (Array.isArray(parsed)) {
154
+ return parsed;
155
+ }
156
+ return [];
157
+ }
158
+ /**
159
+ * Validate that a parsed result roughly matches the expected schema shape.
160
+ * Logs a warning if the top-level keys don't match, but returns the result anyway.
161
+ */
162
+ function validateSchemaShape(result, schema) {
163
+ if (result.length === 0)
164
+ return;
165
+ const schemaObj = schema;
166
+ // For full JSON Schema: check that the object has the expected top-level properties
167
+ if (isFullJsonSchema(schema)) {
168
+ const expectedKeys = Object.keys(schemaObj['properties'] || {});
169
+ if (expectedKeys.length > 0 && result[0]) {
170
+ const actualKeys = Object.keys(result[0]);
171
+ const missingKeys = expectedKeys.filter(k => !actualKeys.includes(k));
172
+ if (missingKeys.length > 0) {
173
+ console.warn(`[webpeel] Schema validation warning: response missing expected keys: ${missingKeys.join(', ')}`);
174
+ }
175
+ }
176
+ return;
177
+ }
178
+ // For simple example schema: check top-level keys exist
179
+ const expectedTopLevelKeys = Object.keys(schemaObj);
180
+ if (expectedTopLevelKeys.length > 0 && result[0]) {
181
+ const actualKeys = Object.keys(result[0]);
182
+ const missingKeys = expectedTopLevelKeys.filter(k => !actualKeys.includes(k));
183
+ if (missingKeys.length > 0) {
184
+ console.warn(`[webpeel] Schema validation warning: response missing expected keys: ${missingKeys.join(', ')}`);
185
+ }
186
+ }
187
+ }
188
+ /**
189
+ * Build the response_format parameter for the OpenAI API call.
190
+ */
191
+ function buildResponseFormat(schema) {
192
+ if (!schema) {
193
+ return { type: 'json_object' };
194
+ }
195
+ // Use structured output only for full JSON Schema (has type:"object" and properties)
196
+ if (isFullJsonSchema(schema)) {
197
+ return {
198
+ type: 'json_schema',
199
+ json_schema: {
200
+ name: 'extraction',
201
+ strict: true,
202
+ schema,
203
+ },
204
+ };
205
+ }
206
+ // For simple example schemas, fall back to json_object
207
+ return { type: 'json_object' };
208
+ }
209
+ // ─── Multi-provider helpers ────────────────────────────────────────────────
210
+ /**
211
+ * Strip markdown code block wrappers from LLM output.
212
+ * Handles ```json...``` or ```...``` patterns.
213
+ */
214
+ function stripMarkdownCodeBlocks(text) {
215
+ // Match ```json ... ``` or ``` ... ``` (possibly multiline)
216
+ const stripped = text.replace(/^```(?:json)?\s*\n?([\s\S]*?)\n?```\s*$/m, '$1').trim();
217
+ return stripped || text.trim();
218
+ }
219
+ /**
220
+ * Attempt to fix common JSON issues: comments, trailing commas.
221
+ */
222
+ function fixJsonString(text) {
223
+ return text
224
+ .replace(/\/\/[^\n]*/g, '') // single-line comments
225
+ .replace(/\/\*[\s\S]*?\*\//g, '') // multi-line comments
226
+ .replace(/,(\s*[}\]])/g, '$1') // trailing commas
227
+ .trim();
228
+ }
229
+ /**
230
+ * Parse a raw LLM response into a JSON value (object or array).
231
+ * Strips markdown code blocks and attempts to fix invalid JSON.
232
+ * Returns the parsed value, or throws with `rawOutput` attached.
233
+ */
234
+ function parseJsonSafe(text) {
235
+ const cleaned = stripMarkdownCodeBlocks(text);
236
+ // 1. Direct parse
237
+ try {
238
+ return JSON.parse(cleaned);
239
+ }
240
+ catch { /* continue */ }
241
+ // 2. Fix comments/trailing commas
242
+ try {
243
+ return JSON.parse(fixJsonString(cleaned));
244
+ }
245
+ catch { /* continue */ }
246
+ // 3. Extract JSON object or array from surrounding text
247
+ const objMatch = cleaned.match(/\{[\s\S]*\}/);
248
+ const arrMatch = cleaned.match(/\[[\s\S]*\]/);
249
+ if (objMatch) {
250
+ try {
251
+ return JSON.parse(objMatch[0]);
252
+ }
253
+ catch { /* continue */ }
254
+ try {
255
+ return JSON.parse(fixJsonString(objMatch[0]));
256
+ }
257
+ catch { /* continue */ }
258
+ }
259
+ if (arrMatch) {
260
+ try {
261
+ return JSON.parse(arrMatch[0]);
262
+ }
263
+ catch { /* continue */ }
264
+ try {
265
+ return JSON.parse(fixJsonString(arrMatch[0]));
266
+ }
267
+ catch { /* continue */ }
268
+ }
269
+ const err = new Error(`Failed to parse LLM response as JSON: ${text.slice(0, 200)}`);
270
+ err.rawOutput = text;
271
+ throw err;
272
+ }
273
+ /**
274
+ * Normalize a parsed JSON value into an items array.
275
+ */
276
+ function normalizeToItems(parsed) {
277
+ if (Array.isArray(parsed))
278
+ return parsed;
279
+ if (parsed && typeof parsed === 'object') {
280
+ const obj = parsed;
281
+ if (Array.isArray(obj['items']))
282
+ return obj['items'];
283
+ if (Array.isArray(obj['data']))
284
+ return obj['data'];
285
+ if (Array.isArray(obj['results']))
286
+ return obj['results'];
287
+ return [obj];
288
+ }
289
+ return [];
290
+ }
291
+ /**
292
+ * Call the Anthropic Messages API for extraction.
293
+ */
294
+ async function callAnthropicExtract(params) {
295
+ const { content, schema, prompt, llmApiKey, llmModel } = params;
296
+ const model = llmModel || DEFAULT_PROVIDER_MODELS.anthropic;
297
+ const truncated = content.slice(0, 30_000);
298
+ const userContent = `Extract data from this webpage content according to the JSON schema.\n\n` +
299
+ `Schema: ${JSON.stringify(schema)}\n` +
300
+ (prompt ? `Instructions: ${prompt}\n` : '') +
301
+ `\nWebpage content:\n${truncated}\n\n` +
302
+ `Return ONLY valid JSON matching the schema. No explanation.`;
303
+ const response = await fetch('https://api.anthropic.com/v1/messages', {
304
+ method: 'POST',
305
+ headers: {
306
+ 'x-api-key': llmApiKey,
307
+ 'anthropic-version': '2023-06-01',
308
+ 'content-type': 'application/json',
309
+ },
310
+ body: JSON.stringify({
311
+ model,
312
+ max_tokens: 4096,
313
+ messages: [{ role: 'user', content: userContent }],
314
+ }),
315
+ });
316
+ if (!response.ok) {
317
+ const body = await response.text().catch(() => '');
318
+ if (response.status === 401)
319
+ throw new Error('LLM API authentication failed (401). Check your Anthropic API key.');
320
+ if (response.status === 429)
321
+ throw new Error('LLM API rate limit exceeded (429). Please wait and retry.');
322
+ throw new Error(`Anthropic API error: HTTP ${response.status}${body ? ` — ${body.slice(0, 200)}` : ''}`);
323
+ }
324
+ const data = await response.json();
325
+ const text = (data.content ?? []).filter(b => b.type === 'text').map(b => b.text).join('');
326
+ let parsed;
327
+ try {
328
+ parsed = parseJsonSafe(text);
329
+ }
330
+ catch (err) {
331
+ const e = new Error('llm_parse_error');
332
+ e.rawOutput = text;
333
+ throw e;
334
+ }
335
+ return {
336
+ items: normalizeToItems(parsed),
337
+ tokens: {
338
+ input: data.usage?.input_tokens ?? 0,
339
+ output: data.usage?.output_tokens ?? 0,
340
+ },
341
+ model: data.model || model,
342
+ };
343
+ }
344
+ /**
345
+ * Call the Google Gemini API for extraction.
346
+ */
347
+ async function callGoogleExtract(params) {
348
+ const { content, schema, prompt, llmApiKey, llmModel } = params;
349
+ const model = llmModel || DEFAULT_PROVIDER_MODELS.google;
350
+ const truncated = content.slice(0, 30_000);
351
+ const userText = `Extract data from this webpage content according to the JSON schema.\n\n` +
352
+ `Schema: ${JSON.stringify(schema)}\n` +
353
+ (prompt ? `Instructions: ${prompt}\n` : '') +
354
+ `\nWebpage content:\n${truncated}\n\n` +
355
+ `Return ONLY valid JSON matching the schema. No explanation.`;
356
+ const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent?key=${llmApiKey}`, {
357
+ method: 'POST',
358
+ headers: { 'content-type': 'application/json' },
359
+ body: JSON.stringify({
360
+ contents: [{ parts: [{ text: userText }] }],
361
+ generationConfig: { responseMimeType: 'application/json' },
362
+ }),
363
+ });
364
+ if (!response.ok) {
365
+ const body = await response.text().catch(() => '');
366
+ if (response.status === 401 || response.status === 403)
367
+ throw new Error('LLM API authentication failed. Check your Google API key.');
368
+ if (response.status === 429)
369
+ throw new Error('LLM API rate limit exceeded (429). Please wait and retry.');
370
+ throw new Error(`Google API error: HTTP ${response.status}${body ? ` — ${body.slice(0, 200)}` : ''}`);
371
+ }
372
+ const data = await response.json();
373
+ const text = (data.candidates?.[0]?.content?.parts ?? []).map(p => p.text).join('');
374
+ let parsed;
375
+ try {
376
+ parsed = parseJsonSafe(text);
377
+ }
378
+ catch (err) {
379
+ const e = new Error('llm_parse_error');
380
+ e.rawOutput = text;
381
+ throw e;
382
+ }
383
+ return {
384
+ items: normalizeToItems(parsed),
385
+ tokens: {
386
+ input: data.usageMetadata?.promptTokenCount ?? 0,
387
+ output: data.usageMetadata?.candidatesTokenCount ?? 0,
388
+ },
389
+ model: data.modelVersion || model,
390
+ };
391
+ }
392
+ // ─── Main export ───────────────────────────────────────────────────────────
393
+ /**
394
+ * Extract structured data from content using an LLM.
395
+ *
396
+ * Supports OpenAI (default), Anthropic, and Google providers.
397
+ * Pass `llmProvider` + `llmApiKey` to select a provider.
398
+ * Falls back to OpenAI-compatible path when no provider is specified.
399
+ */
400
+ export async function extractWithLLM(options) {
401
+ // Resolve aliases: new-style params take precedence over old-style
402
+ const resolvedProvider = (options.llmProvider || 'openai');
403
+ const resolvedApiKey = options.llmApiKey || options.apiKey || process.env.OPENAI_API_KEY;
404
+ const resolvedModel = options.llmModel || options.model;
405
+ const resolvedInstruction = options.prompt || options.instruction;
406
+ const { content, baseUrl = 'https://api.openai.com/v1', maxTokens = 4000, } = options;
407
+ if (!resolvedApiKey) {
408
+ throw new Error('LLM extraction requires an API key.\n' +
409
+ 'Set OPENAI_API_KEY environment variable or provide llmApiKey in the request.');
410
+ }
411
+ // ── Anthropic path ────────────────────────────────────────────────────────
412
+ if (resolvedProvider === 'anthropic') {
413
+ const schema = options.schema || {};
414
+ const result = await callAnthropicExtract({
415
+ content,
416
+ schema,
417
+ prompt: resolvedInstruction,
418
+ llmApiKey: resolvedApiKey,
419
+ llmModel: resolvedModel || DEFAULT_PROVIDER_MODELS.anthropic,
420
+ });
421
+ if (options.schema) {
422
+ validateSchemaShape(result.items, options.schema);
423
+ }
424
+ return {
425
+ items: result.items,
426
+ tokensUsed: result.tokens,
427
+ model: result.model,
428
+ provider: 'anthropic',
429
+ };
430
+ }
431
+ // ── Google path ───────────────────────────────────────────────────────────
432
+ if (resolvedProvider === 'google') {
433
+ const schema = options.schema || {};
434
+ const result = await callGoogleExtract({
435
+ content,
436
+ schema,
437
+ prompt: resolvedInstruction,
438
+ llmApiKey: resolvedApiKey,
439
+ llmModel: resolvedModel || DEFAULT_PROVIDER_MODELS.google,
440
+ });
441
+ if (options.schema) {
442
+ validateSchemaShape(result.items, options.schema);
443
+ }
444
+ return {
445
+ items: result.items,
446
+ tokensUsed: result.tokens,
447
+ model: result.model,
448
+ provider: 'google',
449
+ };
450
+ }
451
+ // ── OpenAI path (default, backward-compatible) ────────────────────────────
452
+ const finalModel = resolvedModel || DEFAULT_PROVIDER_MODELS.openai;
453
+ // Resolve schema: convert simple schemas to full JSON Schema if needed
454
+ let resolvedSchema = options.schema;
455
+ if (resolvedSchema && !isFullJsonSchema(resolvedSchema)) {
456
+ resolvedSchema = convertSimpleToJsonSchema(resolvedSchema);
457
+ }
458
+ // Choose system prompt based on whether a schema is provided
459
+ const systemPrompt = resolvedSchema ? SCHEMA_SYSTEM_PROMPT : GENERIC_SYSTEM_PROMPT;
460
+ const userMessage = buildUserMessage(content, resolvedInstruction, resolvedSchema ?? options.schema);
461
+ const responseFormat = buildResponseFormat(resolvedSchema);
462
+ const response = await fetch(`${baseUrl}/chat/completions`, {
463
+ method: 'POST',
464
+ headers: {
465
+ 'Content-Type': 'application/json',
466
+ 'Authorization': `Bearer ${resolvedApiKey}`,
467
+ },
468
+ body: JSON.stringify({
469
+ model: finalModel,
470
+ messages: [
471
+ { role: 'system', content: systemPrompt },
472
+ { role: 'user', content: userMessage },
473
+ ],
474
+ temperature: 0,
475
+ max_tokens: maxTokens,
476
+ response_format: responseFormat,
477
+ }),
478
+ });
479
+ if (!response.ok) {
480
+ const body = await response.text().catch(() => '');
481
+ if (response.status === 401) {
482
+ throw new Error(`LLM API authentication failed (401). Check your API key.`);
483
+ }
484
+ if (response.status === 429) {
485
+ throw new Error(`LLM API rate limit exceeded (429). Please wait and retry.`);
486
+ }
487
+ throw new Error(`LLM API error: HTTP ${response.status}${body ? ` — ${body.slice(0, 200)}` : ''}`);
488
+ }
489
+ const data = await response.json();
490
+ const rawText = data.choices?.[0]?.message?.content ?? '';
491
+ const items = parseItems(rawText, resolvedSchema);
492
+ // Validate schema shape and warn if mismatch
493
+ if (resolvedSchema) {
494
+ validateSchemaShape(items, resolvedSchema);
495
+ }
496
+ const inputTokens = data.usage?.prompt_tokens ?? 0;
497
+ const outputTokens = data.usage?.completion_tokens ?? 0;
498
+ const resolvedFinalModel = data.model ?? finalModel;
499
+ const cost = estimateCost(resolvedFinalModel, inputTokens, outputTokens);
500
+ return {
501
+ items,
502
+ tokensUsed: { input: inputTokens, output: outputTokens },
503
+ model: resolvedFinalModel,
504
+ cost,
505
+ provider: 'openai',
506
+ };
507
+ }
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Unified LLM Provider Abstraction
3
+ *
4
+ * Supports 7 providers:
5
+ * 1. Cloudflare Workers AI (free default, with daily neuron cap)
6
+ * 2. OpenAI (BYOK — also handles any OpenAI-compatible endpoint via baseUrl)
7
+ * 3. Anthropic (BYOK)
8
+ * 4. Google Gemini (BYOK)
9
+ * 5. Ollama (local, OpenAI-compatible)
10
+ * 6. Cerebras (fast inference)
11
+ * 7. Any OpenAI-compatible gateway (Glama, OpenRouter, etc.) via provider='openai' + baseUrl
12
+ */
13
+ export type DeepResearchLLMProvider = 'cloudflare' | 'openai' | 'anthropic' | 'google' | 'ollama' | 'cerebras';
14
+ export interface LLMConfig {
15
+ provider: DeepResearchLLMProvider;
16
+ apiKey?: string;
17
+ model?: string;
18
+ /** For Ollama: base endpoint URL. Default: http://localhost:11434 */
19
+ endpoint?: string;
20
+ /**
21
+ * Base URL for OpenAI-compatible APIs (Glama, OpenRouter, custom deployments).
22
+ * Only used when provider='openai'. Default: https://api.openai.com/v1
23
+ *
24
+ * Examples:
25
+ * - 'https://glama.ai/api/gateway/openai/v1'
26
+ * - 'https://openrouter.ai/api/v1'
27
+ * - 'https://api.openai.com/v1' (default)
28
+ */
29
+ baseUrl?: string;
30
+ }
31
+ export interface LLMMessage {
32
+ role: 'system' | 'user' | 'assistant';
33
+ content: string;
34
+ }
35
+ export interface LLMCallOptions {
36
+ messages: LLMMessage[];
37
+ stream?: boolean;
38
+ onChunk?: (text: string) => void;
39
+ signal?: AbortSignal;
40
+ maxTokens?: number;
41
+ temperature?: number;
42
+ }
43
+ export interface LLMCallResult {
44
+ text: string;
45
+ usage: {
46
+ input: number;
47
+ output: number;
48
+ };
49
+ }
50
+ export interface FreeTierLimitError {
51
+ error: 'free_tier_limit';
52
+ message: string;
53
+ }
54
+ /**
55
+ * Estimate neuron cost for a Cloudflare Workers AI call.
56
+ * Token count: split by whitespace * 1.3
57
+ */
58
+ export declare function estimateNeurons(inputText: string, outputText: string): number;
59
+ /** Get current neuron usage for today */
60
+ export declare function getNeuronUsage(): {
61
+ date: string;
62
+ neurons: number;
63
+ cap: number;
64
+ remaining: number;
65
+ };
66
+ /** Add neurons to today's usage (for testing / external tracking) */
67
+ export declare function addNeuronUsage(neurons: number): void;
68
+ /** Reset neuron usage (for testing) */
69
+ export declare function resetNeuronUsage(): void;
70
+ /**
71
+ * Call an LLM using the unified provider abstraction.
72
+ *
73
+ * @throws {FreeTierLimitError} if Cloudflare free tier cap is exceeded
74
+ * @throws {Error} for other failures
75
+ */
76
+ export declare function callLLM(config: LLMConfig, options: LLMCallOptions): Promise<LLMCallResult>;
77
+ /**
78
+ * Get the default LLM config based on available environment variables.
79
+ *
80
+ * Priority order (high-quality models first):
81
+ * Anthropic → OpenAI → Google → Cerebras → Glama → OpenRouter
82
+ * → Ollama → Cloudflare (free tier fallback).
83
+ *
84
+ * Glama/OpenRouter/custom OPENAI_BASE_URL are routed through the 'openai'
85
+ * provider with a custom `baseUrl`, so any OpenAI-compatible gateway works.
86
+ *
87
+ * If no BYOK key and no Cloudflare credentials are configured, returns a
88
+ * cloudflare config that will throw a clear error when callLLM is invoked.
89
+ */
90
+ export declare function getDefaultLLMConfig(): LLMConfig;
91
+ /**
92
+ * Get a fast/cheap LLM config suitable for quick classification & synthesis.
93
+ * Same priority as getDefaultLLMConfig but selects smaller models by default.
94
+ *
95
+ * Designed to replace ad-hoc provider detection in server routes.
96
+ * Returns null if no provider is configured.
97
+ */
98
+ export declare function getQuickLLMConfig(): LLMConfig | null;
99
+ /** Type guard: check if a thrown value is a FreeTierLimitError */
100
+ export declare function isFreeTierLimitError(err: unknown): err is FreeTierLimitError;