@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,198 @@
1
+ /**
2
+ * DNS Pre-Resolution Cache
3
+ *
4
+ * Warms a local Map<hostname, ip[]> on startup for the top ~50 popular domains
5
+ * and exposes a custom lookup function compatible with undici's Agent `connect.lookup`.
6
+ */
7
+ import dns from 'node:dns';
8
+ import net from 'node:net';
9
+ const DNS_CACHE_TTL_MS = 30 * 60 * 1000; // 30 minutes
10
+ const DNS_WARMUP_DOMAINS = [
11
+ 'github.com',
12
+ 'www.github.com',
13
+ 'raw.githubusercontent.com',
14
+ 'api.github.com',
15
+ 'wikipedia.org',
16
+ 'en.wikipedia.org',
17
+ 'news.ycombinator.com',
18
+ 'stackoverflow.com',
19
+ 'www.stackoverflow.com',
20
+ 'developer.mozilla.org',
21
+ 'react.dev',
22
+ 'nextjs.org',
23
+ 'vercel.com',
24
+ 'tailwindcss.com',
25
+ 'supabase.com',
26
+ 'npmjs.com',
27
+ 'www.npmjs.com',
28
+ 'reddit.com',
29
+ 'www.reddit.com',
30
+ 'www.cloudflare.com',
31
+ 'medium.com',
32
+ 'linkedin.com',
33
+ 'www.linkedin.com',
34
+ 'www.bloomberg.com',
35
+ 'www.glassdoor.com',
36
+ 'arxiv.org',
37
+ 'www.sec.gov',
38
+ 'w3.org',
39
+ 'www.w3.org',
40
+ 'tools.ietf.org',
41
+ 'unicode.org',
42
+ 'www.bbc.com',
43
+ 'bbc.co.uk',
44
+ 'stripe.com',
45
+ 'docs.stripe.com',
46
+ 'vuejs.org',
47
+ 'angular.io',
48
+ 'www.washingtonpost.com',
49
+ 'www.theguardian.com',
50
+ 'techcrunch.com',
51
+ 'www.wired.com',
52
+ 'arstechnica.com',
53
+ 'docs.google.com',
54
+ 'drive.google.com',
55
+ 'www.notion.so',
56
+ 'www.producthunt.com',
57
+ 'www.crunchbase.com',
58
+ 'news.google.com',
59
+ 'www.youtube.com',
60
+ 'example.com',
61
+ 'httpbin.org',
62
+ 'docs.python.org',
63
+ 'nodejs.org',
64
+ 'openai.com',
65
+ 'anthropic.com',
66
+ 'x.com',
67
+ 'twitter.com',
68
+ 'www.nytimes.com',
69
+ 'www.wsj.com',
70
+ 'www.reuters.com',
71
+ 'www.theverge.com',
72
+ 'www.cnn.com',
73
+ 'www.amazon.com',
74
+ 'www.apple.com',
75
+ 'www.microsoft.com',
76
+ ];
77
+ const dnsCache = new Map();
78
+ let warmupStarted = false;
79
+ let roundRobinCursor = 0;
80
+ function normalizeHostname(hostname) {
81
+ return hostname.trim().toLowerCase();
82
+ }
83
+ function pruneIfExpired(hostname) {
84
+ const entry = dnsCache.get(hostname);
85
+ if (!entry)
86
+ return;
87
+ if (entry.expiresAt <= Date.now()) {
88
+ dnsCache.delete(hostname);
89
+ }
90
+ }
91
+ export function getCachedDns(hostname) {
92
+ const normalized = normalizeHostname(hostname);
93
+ pruneIfExpired(normalized);
94
+ const entry = dnsCache.get(normalized);
95
+ if (!entry || entry.ips.length === 0)
96
+ return null;
97
+ return [...entry.ips];
98
+ }
99
+ function setCachedDns(hostname, ips) {
100
+ if (ips.length === 0)
101
+ return;
102
+ const normalized = normalizeHostname(hostname);
103
+ dnsCache.set(normalized, {
104
+ ips: [...new Set(ips)],
105
+ expiresAt: Date.now() + DNS_CACHE_TTL_MS,
106
+ });
107
+ }
108
+ export async function resolveAndCache(hostname) {
109
+ const normalized = normalizeHostname(hostname);
110
+ const cached = getCachedDns(normalized);
111
+ if (cached)
112
+ return cached;
113
+ try {
114
+ const ips = await dns.promises.resolve4(normalized);
115
+ if (ips.length > 0)
116
+ setCachedDns(normalized, ips);
117
+ return ips;
118
+ }
119
+ catch {
120
+ return [];
121
+ }
122
+ }
123
+ function selectCachedIp(ips) {
124
+ if (ips.length === 1)
125
+ return ips[0];
126
+ const selected = ips[roundRobinCursor % ips.length];
127
+ roundRobinCursor = (roundRobinCursor + 1) % Number.MAX_SAFE_INTEGER;
128
+ return selected;
129
+ }
130
+ /**
131
+ * Custom lookup function compatible with undici's Agent `connect.lookup`.
132
+ *
133
+ * undici passes `{ hints: 1024, all: true }` — so when `all` is true the
134
+ * callback must receive `(err, entries: { address, family }[])`.
135
+ * When `all` is false (or absent), the callback is `(err, address, family)`.
136
+ */
137
+ export function cachedLookup(hostname, options, callback) {
138
+ // If hostname is already an IP, return immediately
139
+ const ipFamily = net.isIP(hostname);
140
+ if (ipFamily === 4 || ipFamily === 6) {
141
+ if (options?.all) {
142
+ callback(null, [{ address: hostname, family: ipFamily }]);
143
+ }
144
+ else {
145
+ callback(null, hostname, ipFamily);
146
+ }
147
+ return;
148
+ }
149
+ // Only use cache for IPv4 lookups (family 0 or 4)
150
+ const requestedFamily = typeof options?.family === 'number' ? options.family : 0;
151
+ if (requestedFamily !== 6) {
152
+ const cachedIps = getCachedDns(hostname);
153
+ if (cachedIps && cachedIps.length > 0) {
154
+ if (options?.all) {
155
+ callback(null, cachedIps.map(ip => ({ address: ip, family: 4 })));
156
+ }
157
+ else {
158
+ callback(null, selectCachedIp(cachedIps), 4);
159
+ }
160
+ return;
161
+ }
162
+ // Async resolve, fall back to native lookup on failure
163
+ void resolveAndCache(hostname)
164
+ .then((resolvedIps) => {
165
+ if (resolvedIps.length > 0) {
166
+ if (options?.all) {
167
+ callback(null, resolvedIps.map(ip => ({ address: ip, family: 4 })));
168
+ }
169
+ else {
170
+ callback(null, selectCachedIp(resolvedIps), 4);
171
+ }
172
+ }
173
+ else {
174
+ dns.lookup(hostname, options, callback);
175
+ }
176
+ })
177
+ .catch(() => {
178
+ dns.lookup(hostname, options, callback);
179
+ });
180
+ return;
181
+ }
182
+ // IPv6 requested — fall through to native lookup
183
+ dns.lookup(hostname, options, callback);
184
+ }
185
+ export async function warmupDnsCache(domains = DNS_WARMUP_DOMAINS) {
186
+ await Promise.allSettled(domains.map((d) => resolveAndCache(d)));
187
+ }
188
+ export function startDnsWarmup() {
189
+ if (warmupStarted)
190
+ return;
191
+ warmupStarted = true;
192
+ void warmupDnsCache().catch(() => {
193
+ // Best-effort only.
194
+ });
195
+ }
196
+ export function clearDnsCache() {
197
+ dnsCache.clear();
198
+ }
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Document (PDF/DOCX) parsing utilities.
3
+ *
4
+ * Keeps binary/document parsing separate from the HTML scraping pipeline.
5
+ */
6
+ export type DocumentFormat = 'markdown' | 'text' | 'html';
7
+ export interface DocumentExtractionResult {
8
+ content: string;
9
+ metadata: {
10
+ title: string;
11
+ contentType: string;
12
+ wordCount: number;
13
+ [key: string]: any;
14
+ };
15
+ }
16
+ export declare function normalizeContentType(contentTypeHeader: string | undefined | null): string;
17
+ export declare function isPdfContentType(contentTypeHeader: string | undefined | null): boolean;
18
+ export declare function isDocxContentType(contentTypeHeader: string | undefined | null): boolean;
19
+ export declare function extractDocumentToFormat(buffer: Buffer, options?: {
20
+ url?: string;
21
+ contentType?: string;
22
+ format?: DocumentFormat;
23
+ }): Promise<DocumentExtractionResult>;
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Document (PDF/DOCX) parsing utilities.
3
+ *
4
+ * Keeps binary/document parsing separate from the HTML scraping pipeline.
5
+ */
6
+ import { htmlToMarkdown, htmlToText } from './markdown.js';
7
+ import { extractPdf } from './pdf.js';
8
+ export function normalizeContentType(contentTypeHeader) {
9
+ if (!contentTypeHeader)
10
+ return '';
11
+ return contentTypeHeader.split(';')[0]?.trim().toLowerCase() || '';
12
+ }
13
+ export function isPdfContentType(contentTypeHeader) {
14
+ const ct = normalizeContentType(contentTypeHeader);
15
+ return ct === 'application/pdf' || ct.endsWith('+pdf');
16
+ }
17
+ export function isDocxContentType(contentTypeHeader) {
18
+ const ct = normalizeContentType(contentTypeHeader);
19
+ return ct === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
20
+ }
21
+ function basenameFromUrl(url) {
22
+ if (!url)
23
+ return '';
24
+ try {
25
+ const u = new URL(url);
26
+ const last = u.pathname.split('/').filter(Boolean).pop() || '';
27
+ return decodeURIComponent(last);
28
+ }
29
+ catch {
30
+ return '';
31
+ }
32
+ }
33
+ function stripExtension(name) {
34
+ return name.replace(/\.(pdf|docx)$/i, '');
35
+ }
36
+ function escapeHtml(text) {
37
+ return text
38
+ .replace(/&/g, '&amp;')
39
+ .replace(/</g, '&lt;')
40
+ .replace(/>/g, '&gt;')
41
+ .replace(/"/g, '&quot;')
42
+ .replace(/'/g, '&#39;');
43
+ }
44
+ function countWords(text) {
45
+ const words = text
46
+ .replace(/\s+/g, ' ')
47
+ .trim()
48
+ .split(' ')
49
+ .filter(Boolean);
50
+ return words.length;
51
+ }
52
+ function normalizePlainText(text) {
53
+ // pdf-parse returns lots of line breaks; keep paragraphs but reduce noise.
54
+ return text
55
+ .replace(/\r\n/g, '\n')
56
+ .replace(/\n{3,}/g, '\n\n')
57
+ .replace(/[ \t]+/g, ' ')
58
+ .trim();
59
+ }
60
+ export async function extractDocumentToFormat(buffer, options = {}) {
61
+ const { url, contentType, format = 'markdown' } = options;
62
+ const normalized = normalizeContentType(contentType);
63
+ const urlLower = (url || '').toLowerCase();
64
+ const isPdf = isPdfContentType(normalized) || urlLower.endsWith('.pdf');
65
+ const isDocx = isDocxContentType(normalized) || urlLower.endsWith('.docx');
66
+ if (isPdf) {
67
+ const pdf = await extractPdf(buffer);
68
+ const text = normalizePlainText(pdf.text || '');
69
+ const fallbackTitle = stripExtension(basenameFromUrl(url)) || 'PDF Document';
70
+ const title = pdf.metadata?.title || fallbackTitle;
71
+ const wordCount = countWords(text);
72
+ let content;
73
+ if (format === 'html') {
74
+ content = `<pre>${escapeHtml(text)}</pre>`;
75
+ }
76
+ else {
77
+ // markdown + text: return readable plain text.
78
+ content = text;
79
+ }
80
+ return {
81
+ content,
82
+ metadata: {
83
+ title,
84
+ contentType: normalized || 'application/pdf',
85
+ wordCount,
86
+ pages: pdf.pages,
87
+ ...pdf.metadata,
88
+ },
89
+ };
90
+ }
91
+ if (isDocx) {
92
+ // Mammoth returns clean semantic HTML.
93
+ const mammothMod = await import('mammoth');
94
+ const mammoth = mammothMod.default || mammothMod;
95
+ const result = await mammoth.convertToHtml({ buffer });
96
+ const html = (result?.value || '').trim();
97
+ const fallbackTitle = stripExtension(basenameFromUrl(url)) || 'Word Document';
98
+ const title = fallbackTitle;
99
+ // Word count should be based on plain text, not markdown formatting.
100
+ const plainText = htmlToText(html);
101
+ const wordCount = countWords(plainText);
102
+ let content;
103
+ if (format === 'html') {
104
+ content = html;
105
+ }
106
+ else if (format === 'text') {
107
+ content = plainText;
108
+ }
109
+ else {
110
+ content = htmlToMarkdown(html);
111
+ }
112
+ return {
113
+ content,
114
+ metadata: {
115
+ title,
116
+ contentType: normalized || 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
117
+ wordCount,
118
+ messages: result?.messages || [],
119
+ },
120
+ };
121
+ }
122
+ throw new Error(`Unsupported document type: ${normalized || contentType || 'unknown'}`);
123
+ }
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Adaptive Domain Learning — Store and Reuse Successful Extraction Patterns
3
+ *
4
+ * Remembers which extraction method works best per domain and reuses that
5
+ * knowledge on subsequent fetches. Backed by an in-memory LRU cache with
6
+ * optional Postgres persistence (TODO).
7
+ */
8
+ export interface DomainMemoryEntry {
9
+ domain: string;
10
+ /** Best method that succeeded: 'simple' | 'browser' | 'stealth' | 'cloaked' | 'domain-api' */
11
+ bestMethod: string;
12
+ /** Average response time in ms for the best method */
13
+ avgResponseMs: number;
14
+ /** Success count for the best method */
15
+ successCount: number;
16
+ /** Last successful fetch timestamp */
17
+ lastSuccess: number;
18
+ /** Content quality score (0-1) from the best method */
19
+ avgQuality: number;
20
+ /** Whether this domain requires JavaScript rendering */
21
+ requiresJs: boolean;
22
+ /** Whether this domain has anti-bot protection */
23
+ hasAntibot: boolean;
24
+ /** Total fetch attempts across all methods */
25
+ totalAttempts: number;
26
+ }
27
+ /**
28
+ * Record a fetch result for a domain.
29
+ * Call this after every successful fetch to build up domain knowledge.
30
+ */
31
+ export declare function recordFetchResult(url: string, result: {
32
+ method: string;
33
+ responseMs: number;
34
+ quality: number;
35
+ wasBlocked: boolean;
36
+ hadJavascript: boolean;
37
+ }): void;
38
+ /**
39
+ * Get the recommended method for a domain based on past experience.
40
+ * Returns null if no history exists.
41
+ */
42
+ export declare function getRecommendedMethod(url: string): {
43
+ method: string;
44
+ confidence: number;
45
+ avgResponseMs: number;
46
+ requiresJs: boolean;
47
+ } | null;
48
+ /**
49
+ * Get full domain memory entry.
50
+ */
51
+ export declare function getDomainMemory(domain: string): DomainMemoryEntry | null;
52
+ /**
53
+ * Get stats about the domain memory cache.
54
+ */
55
+ export declare function getDomainMemoryStats(): {
56
+ totalDomains: number;
57
+ topDomains: Array<{
58
+ domain: string;
59
+ bestMethod: string;
60
+ successCount: number;
61
+ }>;
62
+ };
63
+ /** Persist current in-memory cache to Postgres. */
64
+ export declare function syncToPostgres(): Promise<void>;
65
+ /** Load domain memory from Postgres into the in-memory cache on startup. */
66
+ export declare function loadFromPostgres(): Promise<void>;
@@ -0,0 +1,163 @@
1
+ /**
2
+ * Adaptive Domain Learning — Store and Reuse Successful Extraction Patterns
3
+ *
4
+ * Remembers which extraction method works best per domain and reuses that
5
+ * knowledge on subsequent fetches. Backed by an in-memory LRU cache with
6
+ * optional Postgres persistence (TODO).
7
+ */
8
+ import { LRUCache } from 'lru-cache';
9
+ // ---------------------------------------------------------------------------
10
+ // Cache
11
+ // ---------------------------------------------------------------------------
12
+ /** In-memory cache: 5 000 domains, 1-hour TTL */
13
+ const memoryCache = new LRUCache({
14
+ max: 5000,
15
+ ttl: 3_600_000,
16
+ });
17
+ // ---------------------------------------------------------------------------
18
+ // Helpers
19
+ // ---------------------------------------------------------------------------
20
+ /**
21
+ * Extract and normalise the hostname from a URL string.
22
+ * Strips leading "www." so that www.example.com and example.com share an entry.
23
+ */
24
+ function normaliseDomain(url) {
25
+ try {
26
+ const hostname = new URL(url).hostname.toLowerCase();
27
+ return hostname.replace(/^www\./, '');
28
+ }
29
+ catch {
30
+ // If the URL is already just a hostname (e.g. "example.com"), use as-is.
31
+ return url.toLowerCase().replace(/^www\./, '');
32
+ }
33
+ }
34
+ /** Methods that imply JS rendering was needed. */
35
+ const JS_METHODS = new Set(['browser', 'stealth']);
36
+ /** Methods that imply anti-bot protection. */
37
+ const ANTIBOT_METHODS = new Set(['stealth', 'cloaked']);
38
+ // ---------------------------------------------------------------------------
39
+ // Core functions
40
+ // ---------------------------------------------------------------------------
41
+ /**
42
+ * Record a fetch result for a domain.
43
+ * Call this after every successful fetch to build up domain knowledge.
44
+ */
45
+ export function recordFetchResult(url, result) {
46
+ const domain = normaliseDomain(url);
47
+ const existing = memoryCache.get(domain);
48
+ if (existing) {
49
+ // Decide whether the incoming method should become the new bestMethod.
50
+ // We upgrade if the incoming quality is strictly higher.
51
+ const isBetterMethod = result.quality > existing.avgQuality;
52
+ if (isBetterMethod) {
53
+ existing.bestMethod = result.method;
54
+ }
55
+ // Rolling averages: (old * count + new) / (count + 1)
56
+ const count = existing.successCount;
57
+ existing.avgResponseMs =
58
+ (existing.avgResponseMs * count + result.responseMs) / (count + 1);
59
+ existing.avgQuality =
60
+ (existing.avgQuality * count + result.quality) / (count + 1);
61
+ existing.successCount += 1;
62
+ existing.totalAttempts += 1;
63
+ existing.lastSuccess = Date.now();
64
+ // Accumulate flags — once set they stay set.
65
+ if (JS_METHODS.has(result.method) || result.hadJavascript) {
66
+ existing.requiresJs = true;
67
+ }
68
+ if (ANTIBOT_METHODS.has(result.method) || result.wasBlocked) {
69
+ existing.hasAntibot = true;
70
+ }
71
+ memoryCache.set(domain, existing);
72
+ }
73
+ else {
74
+ // Brand-new entry
75
+ const entry = {
76
+ domain,
77
+ bestMethod: result.method,
78
+ avgResponseMs: result.responseMs,
79
+ successCount: 1,
80
+ lastSuccess: Date.now(),
81
+ avgQuality: result.quality,
82
+ requiresJs: JS_METHODS.has(result.method) || result.hadJavascript,
83
+ hasAntibot: ANTIBOT_METHODS.has(result.method) || result.wasBlocked,
84
+ totalAttempts: 1,
85
+ };
86
+ memoryCache.set(domain, entry);
87
+ }
88
+ }
89
+ /**
90
+ * Get the recommended method for a domain based on past experience.
91
+ * Returns null if no history exists.
92
+ */
93
+ export function getRecommendedMethod(url) {
94
+ const domain = normaliseDomain(url);
95
+ const entry = memoryCache.get(domain);
96
+ if (!entry)
97
+ return null;
98
+ // Confidence ramp: 1 → 0.3, 5+ → 0.8, 10+ → 0.95
99
+ let confidence;
100
+ if (entry.successCount >= 10) {
101
+ confidence = 0.95;
102
+ }
103
+ else if (entry.successCount >= 5) {
104
+ // Linear interpolation between 0.8 and 0.95 for 5..9
105
+ confidence = 0.8 + ((entry.successCount - 5) / 5) * 0.15;
106
+ }
107
+ else if (entry.successCount >= 2) {
108
+ // Linear interpolation between 0.3 and 0.8 for 1..4
109
+ confidence = 0.3 + ((entry.successCount - 1) / 4) * 0.5;
110
+ }
111
+ else {
112
+ confidence = 0.3;
113
+ }
114
+ return {
115
+ method: entry.bestMethod,
116
+ confidence,
117
+ avgResponseMs: entry.avgResponseMs,
118
+ requiresJs: entry.requiresJs,
119
+ };
120
+ }
121
+ /**
122
+ * Get full domain memory entry.
123
+ */
124
+ export function getDomainMemory(domain) {
125
+ // Accept both raw domain and full URL.
126
+ const key = normaliseDomain(domain);
127
+ return memoryCache.get(key) ?? null;
128
+ }
129
+ /**
130
+ * Get stats about the domain memory cache.
131
+ */
132
+ export function getDomainMemoryStats() {
133
+ const entries = [];
134
+ // LRUCache v11 supports for..of iteration
135
+ for (const [, value] of memoryCache.entries()) {
136
+ if (value)
137
+ entries.push(value);
138
+ }
139
+ // Sort by successCount descending, take top 20
140
+ entries.sort((a, b) => b.successCount - a.successCount);
141
+ const topDomains = entries.slice(0, 20).map((e) => ({
142
+ domain: e.domain,
143
+ bestMethod: e.bestMethod,
144
+ successCount: e.successCount,
145
+ }));
146
+ return {
147
+ totalDomains: memoryCache.size,
148
+ topDomains,
149
+ };
150
+ }
151
+ // ---------------------------------------------------------------------------
152
+ // Postgres sync stubs (wire later)
153
+ // ---------------------------------------------------------------------------
154
+ /** Persist current in-memory cache to Postgres. */
155
+ export async function syncToPostgres() {
156
+ // TODO: INSERT/UPSERT all entries from memoryCache into a
157
+ // `domain_memory` table keyed on `domain`.
158
+ }
159
+ /** Load domain memory from Postgres into the in-memory cache on startup. */
160
+ export async function loadFromPostgres() {
161
+ // TODO: SELECT * FROM domain_memory and populate memoryCache
162
+ // with each row, respecting the LRU max-size limit.
163
+ }
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Active domain verification — runtime TLS, HTTP header, and DNS signals.
3
+ *
4
+ * Runs during the fetch pipeline for sites that are NOT already in the known
5
+ * official/established lists. All network operations have a hard 3-second
6
+ * timeout and fail-open (any error → null for that section).
7
+ *
8
+ * Scoring adds bonus points (0–80) on top of the static source-credibility score.
9
+ */
10
+ export interface DomainVerification {
11
+ tls: {
12
+ valid: boolean;
13
+ issuer: string;
14
+ daysRemaining: number;
15
+ ev: boolean;
16
+ } | null;
17
+ headers: {
18
+ hsts: boolean;
19
+ csp: boolean;
20
+ xFrameOptions: boolean;
21
+ server: string;
22
+ poweredBy: string | null;
23
+ };
24
+ dns: {
25
+ hasMx: boolean;
26
+ hasDmarc: boolean;
27
+ hasSpf: boolean;
28
+ nameservers: string[];
29
+ } | null;
30
+ signals: string[];
31
+ warnings: string[];
32
+ verificationScore: number;
33
+ }
34
+ /**
35
+ * Perform active domain verification (TLS + HTTP headers + DNS).
36
+ *
37
+ * @param url Full URL to verify (e.g. "https://stripe.com")
38
+ * @param existingHeaders Optional pre-fetched HTTP response headers (avoids a HEAD request)
39
+ */
40
+ export declare function verifyDomain(url: string, existingHeaders?: Record<string, string>): Promise<DomainVerification>;