@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,176 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Google Flights extractor
3
+ // ---------------------------------------------------------------------------
4
+ export async function googleFlightsExtractor(_html, url) {
5
+ if (!url.includes('/travel/flights'))
6
+ return null;
7
+ // Google Flights is a SPA. The _html parameter is usually readability-processed markdown
8
+ // (from the pipeline's post-fetch processing), which looks like:
9
+ // - 7:15 PM
10
+ // 7:15 PM on Sat, Apr 4
11
+ // – 10:29 PM
12
+ // United
13
+ // 3 hr 14 min
14
+ // EWR
15
+ // ...
16
+ // $188
17
+ //
18
+ // This markdown is much easier to parse than raw HTML.
19
+ let text = _html;
20
+ // If this is raw HTML (contains <!DOCTYPE or <html), strip HTML tags
21
+ if (text.includes('<!DOCTYPE') || text.includes('<html')) {
22
+ text = text
23
+ .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
24
+ .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
25
+ .replace(/<[^>]+>/g, '\n')
26
+ .replace(/&amp;/g, '&')
27
+ .replace(/&lt;/g, '<')
28
+ .replace(/&gt;/g, '>')
29
+ .replace(/&#\d+;/g, '')
30
+ .replace(/\n{2,}/g, '\n');
31
+ }
32
+ const lines = text.split('\n').map(l => l.trim()).filter(Boolean);
33
+ const AIRLINES = ['United', 'Delta', 'American', 'JetBlue', 'Spirit', 'Frontier', 'Southwest', 'Breeze', 'Alaska', 'Hawaiian', 'Sun Country', 'Avelo'];
34
+ const flights = [];
35
+ for (let i = 0; i < lines.length; i++) {
36
+ const line = lines[i];
37
+ // Detect departure time
38
+ const departMatch = line.match(/^(?:-\s+)?(\d{1,2}:\d{2}\s*[AP]M)$/);
39
+ if (!departMatch)
40
+ continue;
41
+ const departTime = departMatch[1];
42
+ let departDate = '', arriveTime = '', airline = '', duration = '';
43
+ let fromAirport = '', toAirport = '', stops = '', bags = '';
44
+ let price = 0;
45
+ for (let j = i + 1; j < Math.min(i + 45, lines.length); j++) {
46
+ const l = lines[j];
47
+ // Date
48
+ const dateM = l.match(/on\s+(\w+,\s+\w+\s+\d+)/);
49
+ if (dateM && !departDate) {
50
+ departDate = dateM[1];
51
+ continue;
52
+ }
53
+ // Arrival time
54
+ const arrM = l.match(/^[–\-–—]\s*(\d{1,2}:\d{2}\s*[AP]M)$/) || l.match(/^(\d{1,2}:\d{2}\s*[AP]M)\s+on\s/);
55
+ if (arrM && !arriveTime && departTime) {
56
+ arriveTime = arrM[1];
57
+ continue;
58
+ }
59
+ // Arrival time: also check for "10:29 PM on Sat, Apr 4" pattern (second occurrence)
60
+ if (!arriveTime && l.match(/^\d{1,2}:\d{2}\s*[AP]M\s+on\s/)) {
61
+ const m = l.match(/^(\d{1,2}:\d{2}\s*[AP]M)/);
62
+ if (m) {
63
+ arriveTime = m[1];
64
+ continue;
65
+ }
66
+ }
67
+ // Airline
68
+ if (!airline) {
69
+ for (const a of AIRLINES) {
70
+ if (l === a || l.startsWith(a + 'Operated') || l.startsWith(a + ' ')) {
71
+ airline = a;
72
+ break;
73
+ }
74
+ }
75
+ if (airline)
76
+ continue;
77
+ }
78
+ // Duration
79
+ if (!duration && l.match(/^\d+\s+hr\s+\d+\s+min$/)) {
80
+ duration = l;
81
+ continue;
82
+ }
83
+ // Airport codes
84
+ if (l.match(/^[A-Z]{3}$/) && !fromAirport) {
85
+ fromAirport = l;
86
+ continue;
87
+ }
88
+ if (l.match(/^[A-Z]{3}$/) && fromAirport && !toAirport && l !== fromAirport) {
89
+ toAirport = l;
90
+ continue;
91
+ }
92
+ // Stops
93
+ if (!stops && (l === 'Nonstop' || l.match(/^\d+\s+stop/))) {
94
+ stops = l;
95
+ continue;
96
+ }
97
+ // Bags
98
+ if (l.includes('carry-on bag') && !bags) {
99
+ bags = l.includes('not included') ? 'Carry-on NOT included (extra fee)' : 'Carry-on included';
100
+ continue;
101
+ }
102
+ // Price — first occurrence only
103
+ const priceM = l.match(/^\$(\d[\d,]*)$/);
104
+ if (priceM && !price) {
105
+ price = parseInt(priceM[1].replace(',', ''));
106
+ break;
107
+ }
108
+ }
109
+ if (departTime && arriveTime && airline && price) {
110
+ flights.push({ departTime, arriveTime, departDate, airline, duration, fromAirport, toAirport, stops: stops || 'Unknown', price, priceStr: `$${price}`, bags });
111
+ }
112
+ }
113
+ // Deduplicate
114
+ const seen = new Set();
115
+ const unique = flights.filter(f => {
116
+ const key = `${f.departTime}-${f.airline}-${f.price}`;
117
+ if (seen.has(key))
118
+ return false;
119
+ seen.add(key);
120
+ return true;
121
+ });
122
+ if (unique.length === 0)
123
+ return null;
124
+ unique.sort((a, b) => a.price - b.price);
125
+ // Helper: get airline booking URL
126
+ function getAirlineBookingUrl(airline, from, to, dateStr) {
127
+ const fromUp = from.toUpperCase();
128
+ const toUp = to.toUpperCase();
129
+ // Airline homepages — direct booking pages (deep links don't work without session/cookies)
130
+ const urlMap = {
131
+ 'United': `https://www.united.com`,
132
+ 'Delta': `https://www.delta.com`,
133
+ 'JetBlue': `https://www.jetblue.com`,
134
+ 'American': `https://www.aa.com`,
135
+ 'Spirit': `https://www.spirit.com`,
136
+ 'Frontier': `https://www.flyfrontier.com`,
137
+ 'Southwest': `https://www.southwest.com`,
138
+ 'Breeze': `https://www.flybreeze.com`,
139
+ 'Alaska': `https://www.alaskaair.com`,
140
+ 'Hawaiian': `https://www.hawaiianairlines.com`,
141
+ 'Sun Country': `https://www.suncountry.com`,
142
+ 'Avelo': `https://www.aveloair.com`,
143
+ };
144
+ return urlMap[airline] || `https://www.google.com/travel/flights?q=${encodeURIComponent(`${airline} flights ${fromUp} to ${toUp} ${dateStr}`)}`;
145
+ }
146
+ // Parse route from URL
147
+ const u = new URL(url);
148
+ const query = (u.searchParams.get('q') || '').replace(/Flights?\s+(from\s+)?/i, '').replace(/\s+one\s+way/i, '').trim();
149
+ const md = [
150
+ `# ✈️ Flights — ${query || 'Search Results'}`,
151
+ '',
152
+ `*${unique.length} flights found · Source: [Google Flights](${url})*`,
153
+ `*Prices include taxes + fees for 1 adult. Book directly via airline.*`,
154
+ '',
155
+ ];
156
+ for (let idx = 0; idx < unique.length; idx++) {
157
+ const f = unique[idx];
158
+ const bookingUrl = getAirlineBookingUrl(f.airline, f.fromAirport, f.toAirport, f.departDate);
159
+ md.push(`## ${idx + 1}. ${f.airline} — ${f.priceStr}`);
160
+ md.push(`🕐 Depart **${f.departTime}** → Arrive **${f.arriveTime}**${f.departDate ? ` · ${f.departDate}` : ''}`);
161
+ md.push(`🛫 ${f.fromAirport} → ${f.toAirport} · ${f.duration} · ${f.stops}`);
162
+ if (f.bags)
163
+ md.push(`🧳 ${f.bags}`);
164
+ md.push(`🔍 [See price on Google Flights](${url})`);
165
+ md.push(`🛒 [Book on ${f.airline}](${bookingUrl})`);
166
+ md.push('');
167
+ }
168
+ md.push('---');
169
+ md.push(`📌 *All prices verified via [Google Flights](${url}). Click "See price" to confirm, then book directly with the airline.*`);
170
+ return {
171
+ domain: 'google.com/travel/flights',
172
+ type: 'flights',
173
+ structured: { flights: unique, route: query, source: 'Google Flights', sourceUrl: url },
174
+ cleanContent: md.join('\n'),
175
+ };
176
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function hackerNewsExtractor(_html: string, url: string): Promise<DomainExtractResult | null>;
@@ -0,0 +1,147 @@
1
+ import { stripHtml, unixToIso, fetchJson } from './shared.js';
2
+ async function fetchHNComment(id, depth) {
3
+ if (depth < 0)
4
+ return null;
5
+ try {
6
+ const data = await fetchJson(`https://hacker-news.firebaseio.com/v0/item/${id}.json`);
7
+ if (!data || data.deleted || data.dead)
8
+ return null;
9
+ const text = stripHtml(data.text || '');
10
+ if (!text)
11
+ return null;
12
+ let replies = [];
13
+ if (depth > 0 && Array.isArray(data.kids) && data.kids.length > 0) {
14
+ const replyResults = await Promise.all(data.kids.slice(0, 5).map((kid) => fetchHNComment(kid, depth - 1)));
15
+ replies = replyResults.filter(Boolean);
16
+ }
17
+ return {
18
+ author: data.by || '[deleted]',
19
+ text,
20
+ time: unixToIso(data.time),
21
+ replies,
22
+ };
23
+ }
24
+ catch {
25
+ return null;
26
+ }
27
+ }
28
+ export async function hackerNewsExtractor(_html, url) {
29
+ const urlObj = new URL(url);
30
+ const path = urlObj.pathname;
31
+ const domain = 'news.ycombinator.com';
32
+ // Story: ?id=12345 or /item?id=12345
33
+ const itemId = urlObj.searchParams.get('id');
34
+ if (itemId && (path === '/' || path === '/item' || path === '')) {
35
+ const storyData = await fetchJson(`https://hacker-news.firebaseio.com/v0/item/${itemId}.json`);
36
+ if (!storyData)
37
+ return null;
38
+ // Comment items — fetch parent story for context
39
+ if (storyData.type === 'comment') {
40
+ const parentId = storyData.parent;
41
+ let parentTitle = '';
42
+ if (parentId) {
43
+ try {
44
+ const parentData = await fetchJson(`https://hacker-news.firebaseio.com/v0/item/${parentId}.json`);
45
+ parentTitle = parentData?.title || '';
46
+ // Walk up to root story if parent is also a comment
47
+ if (!parentTitle && parentData?.parent) {
48
+ const rootData = await fetchJson(`https://hacker-news.firebaseio.com/v0/item/${parentData.parent}.json`);
49
+ parentTitle = rootData?.title || '';
50
+ }
51
+ }
52
+ catch { /* non-fatal */ }
53
+ }
54
+ const text = storyData.text ? stripHtml(storyData.text) : '';
55
+ const titleStr = parentTitle ? `Comment on: ${parentTitle}` : 'HN Comment';
56
+ const cleanContent = `## 🟠 ${titleStr}\n\n**Author:** ${storyData.by || '[deleted]'} | **Posted:** ${unixToIso(storyData.time)}\n\n${text}`;
57
+ return { domain: 'news.ycombinator.com', type: 'comment', structured: { title: titleStr, author: storyData.by, text }, cleanContent };
58
+ }
59
+ const type = storyData.type === 'story' ? 'story' :
60
+ storyData.type === 'ask' ? 'ask_hn' :
61
+ storyData.type === 'show' ? 'show_hn' :
62
+ storyData.type === 'job' ? 'job' : 'story';
63
+ // Fetch top 15 comments (top-level), 2 levels deep
64
+ const commentIds = Array.isArray(storyData.kids) ? storyData.kids.slice(0, 15) : [];
65
+ const commentResults = await Promise.all(commentIds.map((id) => fetchHNComment(id, 2)));
66
+ const comments = commentResults.filter(Boolean);
67
+ const structured = {
68
+ id: storyData.id,
69
+ title: storyData.title || '',
70
+ author: storyData.by || '[deleted]',
71
+ score: storyData.score ?? 0,
72
+ url: storyData.url || `https://news.ycombinator.com/item?id=${storyData.id}`,
73
+ commentCount: storyData.descendants ?? 0,
74
+ created: unixToIso(storyData.time),
75
+ text: storyData.text ? stripHtml(storyData.text) : null,
76
+ comments,
77
+ };
78
+ const commentsMd = comments.slice(0, 10).map(c => {
79
+ const repliesMd = c.replies.slice(0, 3).map(r => ` > **${r.author}**: ${r.text.slice(0, 200)}`).join('\n');
80
+ return `**${c.author}** (${c.time})\n${c.text.slice(0, 300)}${repliesMd ? '\n' + repliesMd : ''}`;
81
+ }).join('\n\n---\n\n');
82
+ const bodySection = structured.text ? `\n\n${structured.text.slice(0, 500)}` : '';
83
+ const cleanContent = `## 🟠 Hacker News: ${structured.title}
84
+
85
+ **Author:** ${structured.author} | **Score:** ${structured.score} | **Comments:** ${structured.commentCount}
86
+ **Posted:** ${structured.created}
87
+ ${structured.url !== `https://news.ycombinator.com/item?id=${structured.id}` ? `**Link:** ${structured.url}` : ''}${bodySection}
88
+
89
+ ---
90
+
91
+ ### Top Comments
92
+
93
+ ${commentsMd || '*No comments found.*'}`;
94
+ return { domain, type, structured, cleanContent };
95
+ }
96
+ // Front page / /news — fetch top stories
97
+ if (path === '/' || path === '/news' || path === '') {
98
+ const topIds = await fetchJson('https://hacker-news.firebaseio.com/v0/topstories.json');
99
+ if (!Array.isArray(topIds))
100
+ return null;
101
+ const top30 = topIds.slice(0, 30);
102
+ const storyResults = await Promise.all(top30.map((id) => fetchJson(`https://hacker-news.firebaseio.com/v0/item/${id}.json`).catch(() => null)));
103
+ const stories = storyResults
104
+ .filter((s) => s && s.title)
105
+ .map((s) => ({
106
+ id: s.id,
107
+ title: s.title,
108
+ author: s.by || '[deleted]',
109
+ score: s.score ?? 0,
110
+ commentCount: s.descendants ?? 0,
111
+ url: s.url || `https://news.ycombinator.com/item?id=${s.id}`,
112
+ hnUrl: `https://news.ycombinator.com/item?id=${s.id}`,
113
+ domain: s.url ? (() => { try {
114
+ return new URL(s.url).hostname.replace(/^www\./, '');
115
+ }
116
+ catch {
117
+ return '';
118
+ } })() : '',
119
+ }));
120
+ const structured = { title: 'Hacker News — Front Page', stories };
121
+ // Compact format: title (domain) | score pts | N comments
122
+ const cleanContent = `## 🟠 Hacker News — Front Page
123
+
124
+ ${stories.map((s, i) => `${i + 1}. **${s.title}**${s.domain ? ` (${s.domain})` : ''} — ↑${s.score} · 💬${s.commentCount}`).join('\n')}`;
125
+ return { domain, type: 'frontpage', structured, cleanContent };
126
+ }
127
+ // User page: ?id=username
128
+ const userId = urlObj.searchParams.get('id');
129
+ if (path === '/user' && userId) {
130
+ const userData = await fetchJson(`https://hacker-news.firebaseio.com/v0/user/${userId}.json`);
131
+ if (!userData)
132
+ return null;
133
+ const structured = {
134
+ id: userData.id,
135
+ karma: userData.karma ?? 0,
136
+ about: userData.about ? stripHtml(userData.about) : '',
137
+ created: unixToIso(userData.created),
138
+ submitted: (userData.submitted || []).length,
139
+ };
140
+ const cleanContent = `## 🟠 HN User: ${structured.id}
141
+
142
+ **Karma:** ${structured.karma} | **Member since:** ${structured.created}
143
+ ${structured.about ? '\n' + structured.about : ''}`;
144
+ return { domain, type: 'user', structured, cleanContent };
145
+ }
146
+ return null;
147
+ }
@@ -0,0 +1,2 @@
1
+ import type { DomainExtractResult } from './types.js';
2
+ export declare function imdbExtractor(html: string, url: string): Promise<DomainExtractResult | null>;
@@ -0,0 +1,172 @@
1
+ import { tryParseJson } from './shared.js';
2
+ // ---------------------------------------------------------------------------
3
+ // 16. IMDB extractor
4
+ // ---------------------------------------------------------------------------
5
+ export async function imdbExtractor(html, url) {
6
+ try {
7
+ const { load } = await import('cheerio');
8
+ const $ = load(html);
9
+ // IMDB uses JSON-LD richly
10
+ let jsonLd = null;
11
+ $('script[type="application/ld+json"]').each((_, el) => {
12
+ if (jsonLd)
13
+ return;
14
+ const raw = $(el).html() || '';
15
+ const parsed = tryParseJson(raw);
16
+ if (parsed?.['@type'] === 'Movie' || parsed?.['@type'] === 'TVSeries' || parsed?.['@type'] === 'TVEpisode') {
17
+ jsonLd = parsed;
18
+ }
19
+ });
20
+ const title = jsonLd?.name ||
21
+ $('meta[property="og:title"]').attr('content')?.replace(/ - IMDb$/, '') ||
22
+ $('h1[data-testid="hero__pageTitle"] span').first().text().trim() || '';
23
+ if (!title)
24
+ return null;
25
+ const description = jsonLd?.description ||
26
+ $('meta[property="og:description"]').attr('content') ||
27
+ $('p[data-testid="plot"]').text().trim() || '';
28
+ const year = jsonLd?.datePublished?.substring(0, 4) ||
29
+ $('a[href*="releaseinfo"]').first().text().trim() || '';
30
+ const ratingValue = jsonLd?.aggregateRating?.ratingValue ||
31
+ $('[data-testid="hero-rating-bar__aggregate-rating__score"] span').first().text().trim() || '';
32
+ const ratingCount = jsonLd?.aggregateRating?.ratingCount || '';
33
+ const contentType = jsonLd?.['@type'] || 'Movie';
34
+ // Genres
35
+ const genres = jsonLd?.genre
36
+ ? (Array.isArray(jsonLd.genre) ? jsonLd.genre : [jsonLd.genre])
37
+ : [];
38
+ if (!genres.length) {
39
+ $('[data-testid="genres"] a, a[href*="/search/title?genres"]').each((_, el) => {
40
+ const g = $(el).text().trim();
41
+ if (g && !genres.includes(g))
42
+ genres.push(g);
43
+ });
44
+ }
45
+ // Director
46
+ const director = jsonLd?.director
47
+ ? (Array.isArray(jsonLd.director)
48
+ ? jsonLd.director.map((d) => d.name || d).join(', ')
49
+ : jsonLd.director?.name || String(jsonLd.director))
50
+ : $('a[href*="/name/"][class*="ipc-metadata-list-item__list-content-item"]').first().text().trim() || '';
51
+ // Cast — parse HTML first for actor+character pairs, then fall back to JSON-LD
52
+ const castPairs = [];
53
+ // IMDB new UI: each title-cast-item contains actor link + character link
54
+ $('[data-testid="title-cast-item"]').each((_, el) => {
55
+ const actorEl = $(el).find('a[href*="/name/nm"]').first();
56
+ const charEl = $(el).find('[data-testid="title-cast-item__character"]').first();
57
+ const actor = actorEl.text().trim();
58
+ // Character name may span multiple elements; clean whitespace
59
+ const character = charEl.text().trim().replace(/\s+/g, ' ').replace(/^\.\.\.$/, '');
60
+ if (actor && actor.length > 1) {
61
+ castPairs.push({ actor, character: character || '' });
62
+ }
63
+ });
64
+ // Fall back to classic cast list (older IMDB page versions)
65
+ const castFromHtml = [];
66
+ if (!castPairs.length) {
67
+ $('.cast_list td.itemprop a').each((_, el) => {
68
+ const name = $(el).text().trim();
69
+ if (name && name.length > 1 && !castFromHtml.includes(name))
70
+ castFromHtml.push(name);
71
+ });
72
+ }
73
+ // JSON-LD actors as final fallback
74
+ const castFromLd = jsonLd?.actor
75
+ ? (Array.isArray(jsonLd.actor) ? jsonLd.actor : [jsonLd.actor])
76
+ .map((a) => a.name || a)
77
+ : [];
78
+ // Build final cast list: with characters if available (top 10), otherwise names only
79
+ const cast = castPairs.length > 0
80
+ ? castPairs.slice(0, 10).map(({ actor, character }) => character ? `${actor} as ${character}` : actor)
81
+ : [...new Set([...castFromLd, ...castFromHtml])].slice(0, 10);
82
+ // Runtime
83
+ const runtime = jsonLd?.duration
84
+ ? (() => {
85
+ const m = String(jsonLd.duration).match(/PT(?:(\d+)H)?(?:(\d+)M)?/);
86
+ if (m)
87
+ return [m[1] ? `${m[1]}h` : '', m[2] ? `${m[2]}m` : ''].filter(Boolean).join(' ');
88
+ return String(jsonLd.duration);
89
+ })()
90
+ : '';
91
+ // Full plot/storyline — try to get the longer version from HTML
92
+ const fullPlot = $('[data-testid="storyline-plot-summary"] span, [data-testid="plot-xl"] span, span[data-testid="plot-l"], #titleStoryLine p, .plot_summary .summary_text').first().text().trim() || description;
93
+ // Additional details: Writers, Keywords, Awards
94
+ const writers = [];
95
+ $('[data-testid="title-pc-wide-screen"] li[data-testid="title-pc-principal-credit"]:nth-child(2) a, .credit_summary_item:contains("Writer") a').each((_, el) => {
96
+ const name = $(el).text().trim();
97
+ if (name && !writers.includes(name))
98
+ writers.push(name);
99
+ });
100
+ // Keywords — try HTML first, fall back to JSON-LD keywords
101
+ let keywords = [];
102
+ $('[data-testid="storyline-plot-keywords"] a, .see-more.inline.canwrap span a, a[href*="keyword"]').each((_, el) => {
103
+ const kw = $(el).text().trim();
104
+ if (kw && kw.length < 30 && !keywords.includes(kw))
105
+ keywords.push(kw);
106
+ });
107
+ // Fall back to JSON-LD keywords if HTML didn't yield any
108
+ if (!keywords.length && jsonLd?.keywords) {
109
+ keywords = (typeof jsonLd.keywords === 'string'
110
+ ? jsonLd.keywords.split(',')
111
+ : Array.isArray(jsonLd.keywords) ? jsonLd.keywords : []).map((k) => k.trim()).filter(Boolean);
112
+ }
113
+ // Writers — also try JSON-LD creator field
114
+ if (!writers.length && jsonLd?.creator) {
115
+ const creators = Array.isArray(jsonLd.creator) ? jsonLd.creator : [jsonLd.creator];
116
+ for (const c of creators) {
117
+ const name = c?.name || (typeof c === 'string' ? c : '');
118
+ if (name && !writers.includes(name))
119
+ writers.push(name);
120
+ }
121
+ }
122
+ // Awards / accolades — try hero accolades chip, then any awards-related link text
123
+ let awardsSummary = '';
124
+ // IMDB new UI: awards accolades chip in the hero section
125
+ const accoladesEl = $('[data-testid="awards-accolades"]');
126
+ if (accoladesEl.length) {
127
+ awardsSummary = accoladesEl.text().trim().replace(/\s+/g, ' ');
128
+ }
129
+ // Fallback: look for per-title awards link (href contains the title ID /tt\d+/awards)
130
+ if (!awardsSummary) {
131
+ const titleMatch = url.match(/\/(tt\d+)/);
132
+ const titleId = titleMatch ? titleMatch[1] : '';
133
+ if (titleId) {
134
+ $(`a[href*="${titleId}"][href*="awards"]`).each((_, el) => {
135
+ const text = $(el).text().trim().replace(/\s+/g, ' ');
136
+ if (text && text.length > 3 && text.length < 200) {
137
+ awardsSummary = text;
138
+ return false; // break
139
+ }
140
+ });
141
+ }
142
+ }
143
+ // Fallback: JSON-LD award field
144
+ if (!awardsSummary && jsonLd?.award) {
145
+ awardsSummary = typeof jsonLd.award === 'string' ? jsonLd.award : '';
146
+ }
147
+ // Content rating & release date from JSON-LD
148
+ const contentRating = jsonLd?.contentRating || '';
149
+ const datePublished = jsonLd?.datePublished || '';
150
+ const structured = {
151
+ title, year, contentType, description: fullPlot, ratingValue, ratingCount,
152
+ genres, director, writers, cast, runtime, keywords, contentRating, datePublished, awardsSummary, url,
153
+ };
154
+ const ratingLine = ratingValue ? `⭐ ${ratingValue}/10${ratingCount ? ` (${Number(ratingCount).toLocaleString()} votes)` : ''}` : '';
155
+ const genreLine = genres.length ? genres.join(', ') : '';
156
+ const directorLine = director ? `**Director:** ${director}` : '';
157
+ const writersLine = writers.length ? `**Writers:** ${writers.slice(0, 5).join(', ')}` : '';
158
+ const castLine = cast.length ? `**Cast:** ${cast.join(', ')}` : '';
159
+ const runtimeLine = runtime ? `**Runtime:** ${runtime}` : '';
160
+ const ratedLine = contentRating ? `**Rated:** ${contentRating}` : '';
161
+ const releaseLine = datePublished ? `**Released:** ${datePublished}` : '';
162
+ const keywordsLine = keywords.length ? `\n**Keywords:** ${keywords.slice(0, 10).join(', ')}` : '';
163
+ const awardsLine = awardsSummary ? `**Awards:** ${awardsSummary}` : '';
164
+ const metaParts = [ratingLine, genreLine, runtimeLine, year ? `**Year:** ${year}` : ''].filter(Boolean).join(' | ');
165
+ const detailParts = [directorLine, writersLine, castLine, ratedLine, releaseLine, awardsLine].filter(Boolean).join('\n');
166
+ const cleanContent = `# 🎬 ${title}\n\n${metaParts}\n\n${detailParts}${keywordsLine}\n\n## Plot\n\n${fullPlot}`;
167
+ return { domain: 'imdb.com', type: contentType === 'TVSeries' ? 'tv_show' : 'movie', structured, cleanContent };
168
+ }
169
+ catch {
170
+ return null;
171
+ }
172
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Domain extractor registry — imports all individual extractors and
3
+ * provides the getDomainExtractor / extractDomainData public API.
4
+ *
5
+ * This file is the entry point for the split extractor architecture.
6
+ * The original domain-extractors.ts re-exports from here for backward compat.
7
+ */
8
+ export type { DomainExtractResult, DomainExtractor } from './types.js';
9
+ import type { DomainExtractor, DomainExtractResult } from './types.js';
10
+ /**
11
+ * Returns the domain extractor for a URL, or null if none matches.
12
+ */
13
+ export declare function getDomainExtractor(url: string): DomainExtractor | null;
14
+ /**
15
+ * Returns true if a domain extractor exists for the given URL.
16
+ */
17
+ export declare function hasDomainExtractor(url: string): boolean;
18
+ /** Clear the extractor response cache (used in tests). */
19
+ export declare function clearExtractorCache(): void;
20
+ /** Inject a Redis client for shared cross-pod caching. */
21
+ export declare function setExtractorRedis(redis: any): void;
22
+ /**
23
+ * Convenience: run the extractor for the URL (if one exists).
24
+ * Wraps _extractDomainDataImpl with a two-tier cache (in-memory + Redis).
25
+ */
26
+ export declare function extractDomainData(html: string, url: string): Promise<DomainExtractResult | null>;