@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,1021 @@
1
+ /**
2
+ * Search commands: search, sites, batch, crawl, map
3
+ */
4
+ import ora from 'ora';
5
+ import { readFileSync } from 'fs';
6
+ import { peel, peelBatch, cleanup } from '../../index.js';
7
+ import { checkUsage, showUsageFooter, loadConfig } from '../../cli-auth.js';
8
+ import { writeStdout, formatListingsCsv } from '../utils.js';
9
+ /**
10
+ * Parse a date range string like "Mar29-Apr4" into an array of date strings.
11
+ * Returns ["Mar 29", "Mar 30", ..., "Apr 4"]
12
+ */
13
+ function parseDateRange(range) {
14
+ const match = range.match(/(\w{3})\s*(\d{1,2})\s*[-–to]+\s*(\w{3})\s*(\d{1,2})/i);
15
+ if (!match)
16
+ return [];
17
+ const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
18
+ const startMonthIdx = months.findIndex(m => m.toLowerCase() === match[1].toLowerCase().slice(0, 3));
19
+ const endMonthIdx = months.findIndex(m => m.toLowerCase() === match[3].toLowerCase().slice(0, 3));
20
+ if (startMonthIdx === -1 || endMonthIdx === -1)
21
+ return [];
22
+ const startDay = parseInt(match[2]);
23
+ const endDay = parseInt(match[4]);
24
+ const year = new Date().getFullYear();
25
+ const dates = [];
26
+ const start = new Date(year, startMonthIdx, startDay);
27
+ const end = new Date(year, endMonthIdx, endDay);
28
+ for (let d = new Date(start); d <= end; d.setDate(d.getDate() + 1)) {
29
+ const mon = months[d.getMonth()];
30
+ dates.push(`${mon} ${d.getDate()}`);
31
+ }
32
+ return dates;
33
+ }
34
+ export function registerSearchCommands(program) {
35
+ // ── search command ────────────────────────────────────────────────────────
36
+ program
37
+ .command('search <query>')
38
+ .description('Search the web (DuckDuckGo by default, or use --site for site-specific search)')
39
+ .option('-n, --count <n>', 'Number of results (1-10)', '5')
40
+ .option('--top <n>', 'Limit results (alias for --count)')
41
+ .option('--provider <provider>', 'Search provider: duckduckgo (default), brave, google, baidu, yandex, naver, yahoo_japan')
42
+ .option('--search-api-key <key>', 'API key for the search provider (or env WEBPEEL_BRAVE_API_KEY)')
43
+ .option('--site <site>', 'Search a specific site (e.g. ebay, amazon, github). Run "webpeel sites" for full list.')
44
+ .option('--json', 'Output as JSON')
45
+ .option('--urls-only', 'Output only URLs, one per line (pipe-friendly)')
46
+ .option('--table', 'Output site-search results as a formatted table (requires --site)')
47
+ .option('--csv', 'Output site-search results as CSV (requires --site)')
48
+ .option('--budget <n>', 'Token budget for site-search result content', parseInt)
49
+ .option('-s, --silent', 'Silent mode')
50
+ .option('--proxy <url>', 'Proxy URL for requests (http://host:port, socks5://user:pass@host:port)')
51
+ .option('--fetch', 'Also fetch and include content from each result URL')
52
+ .option('--local', 'Local business search via Google Places / Yelp (requires API key)')
53
+ .option('--location <location>', 'Location for local search (e.g. "Shibuya, Tokyo", "35.6595,139.7004")')
54
+ .option('--language <lang>', 'Language code for local search results (e.g. "ja", "fr")')
55
+ .option('--country <code>', 'ISO 3166-1 alpha-2 country code for local search (e.g. "JP", "FR")')
56
+ .option('--agent', 'Agent mode: sets --json, --silent, and --budget 4000 (override with --budget N)')
57
+ .action(async (query, options) => {
58
+ // --agent sets sensible defaults for AI agents; explicit flags override
59
+ if (options.agent) {
60
+ if (!options.json)
61
+ options.json = true;
62
+ if (!options.silent)
63
+ options.silent = true;
64
+ if (options.budget === undefined)
65
+ options.budget = 4000;
66
+ }
67
+ const isJson = options.json;
68
+ const isSilent = options.silent;
69
+ // --top overrides --count when both are provided
70
+ const count = parseInt(options.top ?? options.count) || 5;
71
+ // Check usage quota
72
+ const usageCheck = await checkUsage();
73
+ if (!usageCheck.allowed) {
74
+ console.error(usageCheck.message);
75
+ process.exit(1);
76
+ }
77
+ // ── --site: site-specific structured search ───────────────────────────
78
+ if (options.site) {
79
+ const spinner = isSilent ? null : ora(`Searching ${options.site}...`).start();
80
+ try {
81
+ const { buildSiteSearchUrl } = await import('../../core/site-search.js');
82
+ const siteResult = buildSiteSearchUrl(options.site, query);
83
+ // Fetch the raw HTML (needed for listing extraction)
84
+ const htmlResult = await peel(siteResult.url, {
85
+ format: 'html',
86
+ timeout: 30000,
87
+ proxy: options.proxy,
88
+ });
89
+ if (spinner) {
90
+ spinner.succeed(`Fetched ${siteResult.site} in ${htmlResult.elapsed}ms`);
91
+ }
92
+ // Extract listings from the HTML
93
+ const { extractListings } = await import('../../core/extract-listings.js');
94
+ let listings = extractListings(htmlResult.content, siteResult.url);
95
+ // Apply budget if requested
96
+ if (options.budget && options.budget > 0 && listings.length > 0) {
97
+ const { budgetListings } = await import('../../core/budget.js');
98
+ const { maxItems } = budgetListings(listings.length, options.budget);
99
+ listings = listings.slice(0, maxItems);
100
+ }
101
+ // Show usage footer
102
+ if (usageCheck.usageInfo && !isSilent) {
103
+ showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, false);
104
+ }
105
+ // Output
106
+ if (options.csv) {
107
+ const rows = listings.map(item => {
108
+ const row = {};
109
+ for (const [k, v] of Object.entries(item)) {
110
+ if (v !== undefined)
111
+ row[k] = v;
112
+ }
113
+ return row;
114
+ });
115
+ await writeStdout(formatListingsCsv(rows));
116
+ }
117
+ else if (options.table) {
118
+ const { formatTable } = await import('../../core/table-format.js');
119
+ const rows = listings.map(item => {
120
+ const row = {};
121
+ for (const [k, v] of Object.entries(item)) {
122
+ if (v !== undefined)
123
+ row[k] = v;
124
+ }
125
+ return row;
126
+ });
127
+ await writeStdout(formatTable(rows) + '\n');
128
+ }
129
+ else if (isJson) {
130
+ const envelope = {
131
+ site: siteResult.site,
132
+ query: siteResult.query,
133
+ url: siteResult.url,
134
+ count: listings.length,
135
+ items: listings,
136
+ elapsed: htmlResult.elapsed,
137
+ };
138
+ await writeStdout(JSON.stringify(envelope, null, 2) + '\n');
139
+ }
140
+ else {
141
+ if (listings.length === 0) {
142
+ await writeStdout('No listings found.\n');
143
+ }
144
+ else {
145
+ await writeStdout(`Found ${listings.length} listings on ${siteResult.site}:\n\n`);
146
+ for (const [i, item] of listings.entries()) {
147
+ const pricePart = item.price ? ` — ${item.price}` : '';
148
+ process.stdout.write(`${i + 1}. ${item.title}${pricePart}\n`);
149
+ if (item.link)
150
+ process.stdout.write(` ${item.link}\n`);
151
+ process.stdout.write('\n');
152
+ }
153
+ }
154
+ }
155
+ await cleanup();
156
+ process.exit(0);
157
+ }
158
+ catch (error) {
159
+ if (spinner)
160
+ spinner.fail('Site search failed');
161
+ if (error instanceof Error) {
162
+ console.error(`\nError: ${error.message}`);
163
+ }
164
+ else {
165
+ console.error('\nError: Unknown error occurred');
166
+ }
167
+ await cleanup();
168
+ process.exit(1);
169
+ }
170
+ }
171
+ // ── --local: local business search via Google Places / Yelp ─────────
172
+ if (options.local) {
173
+ const spinner = isSilent ? null : ora('Searching local businesses...').start();
174
+ try {
175
+ const { localSearch } = await import('../../core/local-search.js');
176
+ const localResults = await localSearch({
177
+ query,
178
+ location: options.location,
179
+ language: options.language,
180
+ country: options.country,
181
+ limit: count,
182
+ });
183
+ if (spinner)
184
+ spinner.succeed(`Found ${localResults.results.length} results (${localResults.source})`);
185
+ if (isJson) {
186
+ await writeStdout(JSON.stringify(localResults, null, 2) + '\n');
187
+ }
188
+ else {
189
+ if (localResults.results.length === 0) {
190
+ await writeStdout('No local results found.\n');
191
+ }
192
+ else {
193
+ await writeStdout(`\n📍 Local results for "${query}"${localResults.location ? ` near ${localResults.location}` : ''}\n`);
194
+ await writeStdout(`Source: ${localResults.source}\n\n`);
195
+ for (const [i, r] of localResults.results.entries()) {
196
+ const rating = r.rating ? `⭐${r.rating}` : '';
197
+ const reviews = r.reviewCount ? `(${r.reviewCount.toLocaleString()})` : '';
198
+ const price = r.priceLevel !== undefined ? ` · ${'$'.repeat(Math.max(1, r.priceLevel))}` : '';
199
+ const open = r.isOpen === true ? ' · 🟢 Open' : r.isOpen === false ? ' · 🔴 Closed' : '';
200
+ await writeStdout(`${i + 1}. ${r.name} ${rating} ${reviews}${price}${open}\n`);
201
+ if (r.address)
202
+ await writeStdout(` ${r.address}\n`);
203
+ if (r.googleMapsUrl)
204
+ await writeStdout(` ${r.googleMapsUrl}\n`);
205
+ await writeStdout('\n');
206
+ }
207
+ }
208
+ }
209
+ process.exit(0);
210
+ }
211
+ catch (err) {
212
+ if (spinner)
213
+ spinner.fail('Local search failed');
214
+ console.error(`Error: ${err instanceof Error ? err.message : 'Unknown error'}`);
215
+ console.error('Hint: Set GOOGLE_PLACES_API_KEY or YELP_API_KEY environment variable for local search.');
216
+ process.exit(1);
217
+ }
218
+ }
219
+ const spinner = isSilent ? null : ora('Searching...').start();
220
+ try {
221
+ // Route search through the WebPeel API when a key is configured
222
+ const searchCfg = loadConfig();
223
+ const searchApiKey = searchCfg.apiKey || process.env.WEBPEEL_API_KEY;
224
+ const searchApiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
225
+ if (!searchApiKey) {
226
+ if (spinner)
227
+ spinner.fail('Authentication required');
228
+ console.error('No API key configured. Run: webpeel auth <your-key>');
229
+ console.error('Get a free key at: https://app.webpeel.dev/keys');
230
+ process.exit(2);
231
+ }
232
+ const searchParams = new URLSearchParams({ q: query });
233
+ searchParams.set('limit', String(Math.min(Math.max(count, 1), 10)));
234
+ if (options.budget)
235
+ searchParams.set('budget', String(options.budget));
236
+ if (options.provider)
237
+ searchParams.set('provider', options.provider);
238
+ if (options.searchApiKey)
239
+ searchParams.set('searchApiKey', options.searchApiKey);
240
+ const searchRes = await fetch(`${searchApiUrl}/v1/search?${searchParams}`, {
241
+ headers: { Authorization: `Bearer ${searchApiKey}` },
242
+ signal: AbortSignal.timeout(30000),
243
+ });
244
+ if (searchRes.status === 401) {
245
+ if (spinner)
246
+ spinner.fail('Authentication failed');
247
+ console.error('API key invalid or expired. Run: webpeel auth <new-key>');
248
+ process.exit(1);
249
+ }
250
+ if (searchRes.status === 429) {
251
+ if (spinner)
252
+ spinner.fail('Rate limited');
253
+ console.error('Rate limit exceeded. Check your plan at https://app.webpeel.dev/billing');
254
+ process.exit(1);
255
+ }
256
+ if (!searchRes.ok) {
257
+ const body = await searchRes.text().catch(() => '');
258
+ throw new Error(`Search API error ${searchRes.status}: ${body.slice(0, 200)}`);
259
+ }
260
+ const searchData = await searchRes.json();
261
+ // API returns { success: true, data: { web: [...] } } or { results: [...] }
262
+ let results = searchData.data?.web || searchData.data?.results || searchData.results || [];
263
+ // Client-side ad filtering: remove DuckDuckGo ads that slip through the server
264
+ results = results.filter(r => {
265
+ // Filter DDG-internal URLs
266
+ try {
267
+ const parsed = new URL(r.url);
268
+ if (parsed.hostname === 'duckduckgo.com')
269
+ return false;
270
+ if (parsed.searchParams.has('ad_domain') ||
271
+ parsed.searchParams.has('ad_provider') ||
272
+ parsed.searchParams.has('ad_type'))
273
+ return false;
274
+ }
275
+ catch {
276
+ return false;
277
+ }
278
+ // Filter ad snippets
279
+ if (r.snippet && (r.snippet.includes('Ad ·') ||
280
+ r.snippet.includes('Ad Viewing ads is privacy protected by DuckDuckGo') ||
281
+ r.snippet.toLowerCase().startsWith('ad ·')))
282
+ return false;
283
+ return true;
284
+ });
285
+ if (spinner) {
286
+ spinner.succeed(`Found ${results.length} results`);
287
+ }
288
+ // --fetch: fetch content from each result
289
+ if (options.fetch && results.length > 0) {
290
+ const fetchCfg = loadConfig();
291
+ const fetchApiKey = fetchCfg.apiKey || process.env.WEBPEEL_API_KEY;
292
+ const fetchApiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
293
+ if (fetchApiKey) {
294
+ const fetchSpinner = isSilent ? null : ora(`Fetching content from ${results.length} results...`).start();
295
+ await Promise.all(results.map(async (result) => {
296
+ try {
297
+ const fetchParams = new URLSearchParams({ url: result.url });
298
+ if (options.budget)
299
+ fetchParams.set('budget', String(options.budget || 2000));
300
+ const fetchRes = await fetch(`${fetchApiUrl}/v1/fetch?${fetchParams}`, {
301
+ headers: { Authorization: `Bearer ${fetchApiKey}` },
302
+ signal: AbortSignal.timeout(20000),
303
+ });
304
+ if (fetchRes.ok) {
305
+ const fetchData = await fetchRes.json();
306
+ result.content = fetchData.content || fetchData.data?.content || '';
307
+ }
308
+ }
309
+ catch { /* skip on error */ }
310
+ }));
311
+ if (fetchSpinner)
312
+ fetchSpinner.succeed('Content fetched');
313
+ }
314
+ else if (!isSilent) {
315
+ console.error('Warning: --fetch requires API key (run: webpeel auth <key>)');
316
+ }
317
+ }
318
+ // Show usage footer for free/anonymous users
319
+ if (usageCheck.usageInfo && !isSilent) {
320
+ showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, false);
321
+ }
322
+ if (options.urlsOnly) {
323
+ // Pipe-friendly: one URL per line
324
+ for (const result of results) {
325
+ await writeStdout(result.url + '\n');
326
+ }
327
+ }
328
+ else if (isJson) {
329
+ const jsonStr = JSON.stringify({ query, results, count: results.length }, null, 2);
330
+ await writeStdout(jsonStr + '\n');
331
+ }
332
+ else {
333
+ // Human-readable numbered results
334
+ if (results.length === 0) {
335
+ await writeStdout('No results found.\n');
336
+ }
337
+ else {
338
+ await writeStdout(`\n`);
339
+ for (const [i, result] of results.entries()) {
340
+ await writeStdout(`${i + 1}. ${result.title}\n`);
341
+ await writeStdout(` ${result.url}\n`);
342
+ if (result.snippet) {
343
+ await writeStdout(` ${result.snippet}\n`);
344
+ }
345
+ if (result.content) {
346
+ const preview = result.content.slice(0, 500);
347
+ await writeStdout(`\n --- Content ---\n${preview}${result.content.length > 500 ? '\n [...]' : ''}\n`);
348
+ }
349
+ await writeStdout('\n');
350
+ }
351
+ }
352
+ }
353
+ process.exit(0);
354
+ }
355
+ catch (error) {
356
+ if (spinner) {
357
+ spinner.fail('Search failed');
358
+ }
359
+ if (error instanceof Error) {
360
+ console.error(`\nError: ${error.message}`);
361
+ const msg = error.message.toLowerCase();
362
+ if (msg.includes('brave') && msg.includes('api key')) {
363
+ console.error('\n💡 Hint: Set your Brave API key: webpeel config set braveApiKey YOUR_KEY');
364
+ console.error(' Or use free DuckDuckGo search (default, no key needed).');
365
+ }
366
+ else if (msg.includes('timeout') || msg.includes('timed out')) {
367
+ console.error('\n💡 Hint: Search timed out. Try a more specific query or try again.');
368
+ }
369
+ }
370
+ else {
371
+ console.error('\nError: Unknown error occurred');
372
+ }
373
+ process.exit(1);
374
+ }
375
+ });
376
+ // ── sites command — list all supported site templates ────────────────────
377
+ program
378
+ .command('sites')
379
+ .description('List all sites supported by "webpeel search --site <site>"')
380
+ .option('--json', 'Output as JSON')
381
+ .option('--category <cat>', 'Filter by category (shopping, social, tech, jobs, general, real-estate, food)')
382
+ .action(async (options) => {
383
+ const { listSites } = await import('../../core/site-search.js');
384
+ let sites = listSites();
385
+ if (options.category) {
386
+ sites = sites.filter(s => s.category === options.category);
387
+ }
388
+ if (options.json) {
389
+ await writeStdout(JSON.stringify(sites, null, 2) + '\n');
390
+ process.exit(0);
391
+ }
392
+ // Group by category for pretty output
393
+ const byCategory = new Map();
394
+ for (const site of sites) {
395
+ if (!byCategory.has(site.category))
396
+ byCategory.set(site.category, []);
397
+ byCategory.get(site.category).push(site);
398
+ }
399
+ const categoryOrder = ['shopping', 'general', 'social', 'tech', 'jobs', 'real-estate', 'food'];
400
+ const sortedCategories = categoryOrder.filter(c => byCategory.has(c));
401
+ console.log('\nWebPeel Site-Aware Search — supported sites\n');
402
+ console.log('Usage: webpeel search --site <id> "<query>"\n');
403
+ for (const cat of sortedCategories) {
404
+ const catSites = byCategory.get(cat);
405
+ const label = cat.charAt(0).toUpperCase() + cat.slice(1);
406
+ console.log(` ${label}:`);
407
+ for (const s of catSites) {
408
+ console.log(` ${s.id.padEnd(16)} ${s.name}`);
409
+ }
410
+ console.log('');
411
+ }
412
+ process.exit(0);
413
+ });
414
+ // ── batch command ─────────────────────────────────────────────────────────
415
+ program
416
+ .command('batch [file]')
417
+ .description('Fetch multiple URLs from file or stdin pipe')
418
+ .option('-c, --concurrency <n>', 'Max concurrent fetches (default: 3)', '3')
419
+ .option('-o, --output <dir>', 'Output directory (one file per URL)')
420
+ .option('--json', 'Output as JSON array')
421
+ .option('-s, --silent', 'Silent mode')
422
+ .option('-r, --render', 'Use headless browser')
423
+ .option('--selector <css>', 'CSS selector to extract')
424
+ .action(async (file, options) => {
425
+ const isJson = options.json;
426
+ const isSilent = options.silent;
427
+ const shouldRender = options.render;
428
+ const selector = options.selector;
429
+ // Check usage quota
430
+ const usageCheck = await checkUsage();
431
+ if (!usageCheck.allowed) {
432
+ console.error(usageCheck.message);
433
+ process.exit(1);
434
+ }
435
+ const spinner = isSilent ? null : ora('Loading URLs...').start();
436
+ try {
437
+ // Read URLs from file or stdin
438
+ let urls;
439
+ if (file) {
440
+ // Read from file
441
+ try {
442
+ const content = readFileSync(file, 'utf-8');
443
+ urls = content.split('\n')
444
+ .map(line => line.trim())
445
+ .filter(line => line && !line.startsWith('#'));
446
+ }
447
+ catch (error) {
448
+ throw new Error(`Failed to read file: ${file}`);
449
+ }
450
+ }
451
+ else if (!process.stdin.isTTY) {
452
+ // Read from stdin pipe
453
+ const chunks = [];
454
+ for await (const chunk of process.stdin) {
455
+ chunks.push(chunk);
456
+ }
457
+ const content = Buffer.concat(chunks).toString('utf-8');
458
+ urls = content.split('\n')
459
+ .map(line => line.trim())
460
+ .filter(line => line && !line.startsWith('#'));
461
+ }
462
+ else {
463
+ throw new Error('Provide a file path or pipe URLs via stdin.\n Example: cat urls.txt | webpeel batch');
464
+ }
465
+ if (urls.length === 0) {
466
+ throw new Error('No URLs found in file');
467
+ }
468
+ if (spinner) {
469
+ spinner.text = `Fetching ${urls.length} URLs (concurrency: ${options.concurrency})...`;
470
+ }
471
+ // Batch fetch
472
+ const results = await peelBatch(urls, {
473
+ concurrency: parseInt(options.concurrency) || 3,
474
+ render: shouldRender,
475
+ selector: selector,
476
+ });
477
+ if (spinner) {
478
+ const successCount = results.filter(r => 'content' in r).length;
479
+ spinner.succeed(`Completed: ${successCount}/${urls.length} successful`);
480
+ }
481
+ // Show usage footer for free/anonymous users
482
+ if (usageCheck.usageInfo && !isSilent) {
483
+ showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, false);
484
+ }
485
+ // Output results
486
+ if (isJson) {
487
+ const jsonStr = JSON.stringify(results, null, 2);
488
+ await new Promise((resolve, reject) => {
489
+ process.stdout.write(jsonStr + '\n', (err) => {
490
+ if (err)
491
+ reject(err);
492
+ else
493
+ resolve();
494
+ });
495
+ });
496
+ }
497
+ else if (options.output) {
498
+ const { writeFileSync, mkdirSync } = await import('fs');
499
+ const { join } = await import('path');
500
+ // Create output directory
501
+ mkdirSync(options.output, { recursive: true });
502
+ results.forEach((result, i) => {
503
+ const urlObj = new URL(urls[i]);
504
+ const filename = `${i + 1}_${urlObj.hostname.replace(/[^a-z0-9]/gi, '_')}.md`;
505
+ const filepath = join(options.output, filename);
506
+ if ('content' in result) {
507
+ writeFileSync(filepath, result.content);
508
+ }
509
+ else {
510
+ writeFileSync(filepath, `Error: ${result.error}`);
511
+ }
512
+ });
513
+ if (!isSilent) {
514
+ console.log(`\nResults saved to: ${options.output}`);
515
+ }
516
+ }
517
+ else {
518
+ // Print results to stdout
519
+ results.forEach((result, i) => {
520
+ console.log(`\n=== ${urls[i]} ===\n`);
521
+ if ('content' in result) {
522
+ console.log(result.content.slice(0, 500) + '...');
523
+ }
524
+ else {
525
+ console.log(`Error: ${result.error}`);
526
+ }
527
+ });
528
+ }
529
+ await cleanup();
530
+ process.exit(0);
531
+ }
532
+ catch (error) {
533
+ if (spinner) {
534
+ spinner.fail('Batch fetch failed');
535
+ }
536
+ if (error instanceof Error) {
537
+ console.error(`\nError: ${error.message}`);
538
+ }
539
+ else {
540
+ console.error('\nError: Unknown error occurred');
541
+ }
542
+ await cleanup();
543
+ process.exit(1);
544
+ }
545
+ });
546
+ // ── crawl command ─────────────────────────────────────────────────────────
547
+ program
548
+ .command('crawl <url>')
549
+ .description('Crawl a website starting from a URL')
550
+ .option('--max-pages <number>', 'Maximum number of pages to crawl (default: 10, max: 100)', (v) => parseInt(v, 10), 10)
551
+ .option('--max-depth <number>', 'Maximum depth to crawl (default: 2, max: 5)', (v) => parseInt(v, 10), 2)
552
+ .option('--allowed-domains <domains...>', 'Only crawl these domains (default: same as starting URL)')
553
+ .option('--exclude <patterns...>', 'Exclude URLs matching these regex patterns')
554
+ .option('--ignore-robots', 'Ignore robots.txt (default: respect robots.txt)')
555
+ .option('--rate-limit <ms>', 'Rate limit between requests in ms (default: 500)', (v) => parseInt(v, 10), 500)
556
+ .option('-r, --render', 'Use headless browser for all pages')
557
+ .option('--stealth', 'Use stealth mode for all pages')
558
+ .option('-s, --silent', 'Silent mode (no spinner)')
559
+ .option('--json', 'Output as JSON')
560
+ .option('--resume', 'Resume an interrupted crawl from its last checkpoint')
561
+ .action(async (url, options) => {
562
+ // Check usage quota
563
+ const usageCheck = await checkUsage();
564
+ if (!usageCheck.allowed) {
565
+ console.error(usageCheck.message);
566
+ process.exit(1);
567
+ }
568
+ const { crawl } = await import('../../core/crawler.js');
569
+ const spinner = options.silent ? null : ora('Crawling...').start();
570
+ try {
571
+ const results = await crawl(url, {
572
+ maxPages: options.maxPages,
573
+ maxDepth: options.maxDepth,
574
+ allowedDomains: options.allowedDomains,
575
+ excludePatterns: options.exclude,
576
+ respectRobotsTxt: !options.ignoreRobots,
577
+ rateLimitMs: options.rateLimit,
578
+ render: options.render || false,
579
+ stealth: options.stealth || false,
580
+ resume: options.resume || false,
581
+ });
582
+ if (spinner) {
583
+ spinner.succeed(`Crawled ${results.length} pages`);
584
+ }
585
+ // Show usage footer for free/anonymous users
586
+ if (usageCheck.usageInfo && !options.silent) {
587
+ showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, options.stealth || false);
588
+ }
589
+ if (options.json) {
590
+ const totalTokens = results.reduce((sum, r) => sum + (r.tokens ?? 0), 0);
591
+ const pages = results.map(r => ({
592
+ url: r.url,
593
+ title: r.title,
594
+ tokens: r.tokens ?? 0,
595
+ content: r.markdown,
596
+ depth: r.depth,
597
+ parent: r.parent,
598
+ links: r.links,
599
+ elapsed: r.elapsed,
600
+ ...(r.error ? { error: r.error } : {}),
601
+ ...(r.fingerprint ? { fingerprint: r.fingerprint } : {}),
602
+ }));
603
+ console.log(JSON.stringify({ pages, totalPages: results.length, totalTokens }, null, 2));
604
+ }
605
+ else {
606
+ results.forEach((result, i) => {
607
+ console.log(`\n${'='.repeat(60)}`);
608
+ console.log(`[${i + 1}/${results.length}] ${result.title}`);
609
+ console.log(`URL: ${result.url}`);
610
+ console.log(`Depth: ${result.depth}${result.parent ? ` (from: ${result.parent})` : ''}`);
611
+ console.log(`Links found: ${result.links.length}`);
612
+ console.log(`Elapsed: ${result.elapsed}ms`);
613
+ if (result.error) {
614
+ console.log(`ERROR: ${result.error}`);
615
+ }
616
+ else {
617
+ console.log(`\n${result.markdown.slice(0, 500)}${result.markdown.length > 500 ? '...' : ''}`);
618
+ }
619
+ });
620
+ }
621
+ await cleanup();
622
+ process.exit(0);
623
+ }
624
+ catch (error) {
625
+ if (spinner) {
626
+ spinner.fail('Crawl failed');
627
+ }
628
+ if (error instanceof Error) {
629
+ console.error(`\nError: ${error.message}`);
630
+ }
631
+ else {
632
+ console.error('\nError: Unknown error occurred');
633
+ }
634
+ await cleanup();
635
+ process.exit(1);
636
+ }
637
+ });
638
+ // ── map command ───────────────────────────────────────────────────────────
639
+ program
640
+ .command('map <url>')
641
+ .description('Discover all URLs on a domain (sitemap + crawl)')
642
+ .option('--no-sitemap', 'Skip sitemap.xml discovery')
643
+ .option('--no-crawl', 'Skip homepage crawl')
644
+ .option('--max <n>', 'Maximum URLs to discover (default: 5000)', (v) => parseInt(v, 10), 5000)
645
+ .option('--include <patterns...>', 'Include only URLs matching these regex patterns')
646
+ .option('--exclude <patterns...>', 'Exclude URLs matching these regex patterns')
647
+ .option('--json', 'Output as JSON')
648
+ .option('-s, --silent', 'Silent mode')
649
+ .action(async (url, options) => {
650
+ const { mapDomain } = await import('../../core/map.js');
651
+ const spinner = options.silent ? null : ora('Discovering URLs...').start();
652
+ try {
653
+ const result = await mapDomain(url, {
654
+ useSitemap: options.sitemap !== false,
655
+ crawlHomepage: options.crawl !== false,
656
+ maxUrls: options.max,
657
+ includePatterns: options.include,
658
+ excludePatterns: options.exclude,
659
+ });
660
+ if (spinner)
661
+ spinner.succeed(`Found ${result.total} URLs in ${result.elapsed}ms`);
662
+ if (options.json) {
663
+ console.log(JSON.stringify(result, null, 2));
664
+ }
665
+ else {
666
+ for (const u of result.urls) {
667
+ console.log(u);
668
+ }
669
+ if (!options.silent) {
670
+ console.error(`\nTotal: ${result.total} URLs`);
671
+ if (result.sitemapUrls.length > 0) {
672
+ console.error(`Sitemaps used: ${result.sitemapUrls.join(', ')}`);
673
+ }
674
+ }
675
+ }
676
+ process.exit(0);
677
+ }
678
+ catch (error) {
679
+ if (spinner)
680
+ spinner.fail('URL discovery failed');
681
+ console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
682
+ process.exit(1);
683
+ }
684
+ });
685
+ // ── flights command ───────────────────────────────────────────────────────
686
+ program
687
+ .command('flights <query>')
688
+ .description('Search for flights (via Google Flights) — e.g. "NYC to Fort Myers Apr 4"')
689
+ .option('--one-way', 'One-way flight (default)')
690
+ .option('--round-trip', 'Round-trip flight')
691
+ .option('-n, --count <n>', 'Max flights to show', '10')
692
+ .option('--dates <range>', 'Compare prices across date range (e.g., "Mar29-Apr4")')
693
+ .option('--json', 'Output as JSON')
694
+ .option('-s, --silent', 'Silent mode')
695
+ .action(async (query, options) => {
696
+ // ── --dates: compare cheapest flight across a date range ──────────────
697
+ if (options.dates) {
698
+ const dates = parseDateRange(options.dates);
699
+ if (dates.length === 0) {
700
+ console.error('Could not parse date range. Format: "Mar29-Apr4"');
701
+ process.exit(1);
702
+ }
703
+ const spinner = options.silent ? null : ora(`Comparing flights across ${dates.length} dates...`).start();
704
+ const tripType = options.roundTrip ? '' : ' one way';
705
+ const rows = [];
706
+ for (const date of dates) {
707
+ if (spinner)
708
+ spinner.text = `Fetching flights for ${date}...`;
709
+ try {
710
+ const dateQuery = `Flights from ${query} ${date}${tripType}`;
711
+ const encoded = encodeURIComponent(dateQuery);
712
+ const url = `https://www.google.com/travel/flights?q=${encoded}`;
713
+ const result = await peel(url, { render: true, timeout: 30000 });
714
+ // Try to extract cheapest flight from structured data or content
715
+ let price = null;
716
+ let airline = null;
717
+ let time = null;
718
+ const flights = result.domainData?.structured?.flights || [];
719
+ if (flights.length > 0) {
720
+ const cheapest = flights.reduce((a, b) => {
721
+ const ap = parseFloat(String(a.price || '').replace(/[^0-9.]/g, '')) || Infinity;
722
+ const bp = parseFloat(String(b.price || '').replace(/[^0-9.]/g, '')) || Infinity;
723
+ return ap <= bp ? a : b;
724
+ });
725
+ price = cheapest.priceStr || (cheapest.price ? `$${cheapest.price}` : null);
726
+ airline = cheapest.airline || cheapest.carrier || null;
727
+ time = cheapest.departTime && cheapest.arriveTime
728
+ ? `${cheapest.departTime} → ${cheapest.arriveTime}`
729
+ : (cheapest.time || cheapest.departure || null);
730
+ }
731
+ else {
732
+ // Extract from markdown content — look for price patterns
733
+ const priceMatch = result.content.match(/\$(\d+)/);
734
+ if (priceMatch)
735
+ price = `$${priceMatch[1]}`;
736
+ const airlineMatch = result.content.match(/\b(American|Delta|United|Southwest|Spirit|JetBlue|Alaska|Frontier|Allegiant|Sun Country)\b/i);
737
+ if (airlineMatch)
738
+ airline = airlineMatch[1];
739
+ const timeMatch = result.content.match(/(\d{1,2}:\d{2}\s*(?:AM|PM))\s*[–—→]\s*(\d{1,2}:\d{2}\s*(?:AM|PM))/i);
740
+ if (timeMatch)
741
+ time = `${timeMatch[1]} → ${timeMatch[2]}`;
742
+ }
743
+ const priceNum = price ? parseFloat(price.replace(/[^0-9.]/g, '')) || Infinity : Infinity;
744
+ rows.push({ date, price, airline, time, priceNum });
745
+ }
746
+ catch {
747
+ rows.push({ date, price: null, airline: null, time: null, priceNum: Infinity });
748
+ }
749
+ }
750
+ if (spinner)
751
+ spinner.succeed(`Compared ${rows.length} dates`);
752
+ if (options.json) {
753
+ console.log(JSON.stringify({ query, dateRange: options.dates, rows }, null, 2));
754
+ }
755
+ else {
756
+ // Find best price
757
+ const best = rows.reduce((a, b) => a.priceNum <= b.priceNum ? a : b);
758
+ console.log(`\n# ✈️ Flight Price Comparison — ${query}\n`);
759
+ console.log('| Date | Airline | Time | Price |');
760
+ console.log('|------|---------|------|-------|');
761
+ for (const row of rows) {
762
+ const star = row.priceNum === best.priceNum ? ' ⭐' : '';
763
+ const priceStr = row.price ? `${row.price}${star}` : 'N/A';
764
+ const airlineStr = row.airline || 'Unknown';
765
+ const timeStr = row.time || '—';
766
+ console.log(`| ${row.date} | ${airlineStr} | ${timeStr} | ${priceStr} |`);
767
+ }
768
+ if (best.price) {
769
+ console.log(`\n⭐ Best price: ${best.date} — ${best.airline || 'Unknown'} ${best.price}`);
770
+ }
771
+ }
772
+ await cleanup();
773
+ process.exit(0);
774
+ }
775
+ // ── Single date (default) ─────────────────────────────────────────────
776
+ const tripType = options.roundTrip ? '' : ' one way';
777
+ const encoded = encodeURIComponent(`Flights from ${query}${tripType}`);
778
+ const url = `https://www.google.com/travel/flights?q=${encoded}`;
779
+ const spinner = options.silent ? null : ora(`Searching flights: ${query}...`).start();
780
+ try {
781
+ // render is forced automatically by SPA auto-detect, but be explicit here
782
+ const result = await peel(url, { render: true, timeout: 30000 });
783
+ if (spinner)
784
+ spinner.succeed('Flights loaded');
785
+ if (options.json) {
786
+ console.log(JSON.stringify({
787
+ query,
788
+ url,
789
+ flights: result.domainData?.structured?.flights || [],
790
+ source: 'Google Flights',
791
+ content: result.content,
792
+ tokens: result.tokens,
793
+ }, null, 2));
794
+ }
795
+ else {
796
+ console.log(result.content);
797
+ }
798
+ await cleanup();
799
+ process.exit(0);
800
+ }
801
+ catch (error) {
802
+ if (spinner)
803
+ spinner.fail('Flight search failed');
804
+ console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
805
+ await cleanup();
806
+ process.exit(1);
807
+ }
808
+ });
809
+ // ── rental command ────────────────────────────────────────────────────────
810
+ program
811
+ .command('rental <query>')
812
+ .alias('car-rental')
813
+ .description('Search for car rentals via Kayak — e.g. "Punta Gorda FL Apr 1-3"')
814
+ .option('--json', 'Output as JSON')
815
+ .option('-s, --silent', 'Silent mode')
816
+ .action(async (query, options) => {
817
+ // Parse location: strip date portion from query
818
+ const location = query.replace(/\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w*\s+\d+.*/i, '').trim();
819
+ const encodedLocation = encodeURIComponent(location.replace(/\s+/g, '-'));
820
+ // Parse dates: try "Apr 1-3" or "Apr 1 to Apr 3" patterns
821
+ const year = new Date().getFullYear();
822
+ let pickupDate = `${year}-04-01`;
823
+ let returnDate = `${year}-04-03`;
824
+ const rangeMatch = query.match(/\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w*\s+(\d+)\s*[-–to]+\s*(?:(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w*\s+)?(\d+)/i);
825
+ if (rangeMatch) {
826
+ const months = {
827
+ jan: '01', feb: '02', mar: '03', apr: '04', may: '05', jun: '06',
828
+ jul: '07', aug: '08', sep: '09', oct: '10', nov: '11', dec: '12',
829
+ };
830
+ const startMonth = months[rangeMatch[1].toLowerCase().slice(0, 3)];
831
+ const startDay = rangeMatch[2].padStart(2, '0');
832
+ const endMonth = rangeMatch[3] ? months[rangeMatch[3].toLowerCase().slice(0, 3)] : startMonth;
833
+ const endDay = rangeMatch[4].padStart(2, '0');
834
+ pickupDate = `${year}-${startMonth}-${startDay}`;
835
+ returnDate = `${year}-${endMonth}-${endDay}`;
836
+ }
837
+ const searchUrl = `https://www.kayak.com/cars/${encodedLocation}/${pickupDate}/${returnDate}?sort=price_a`;
838
+ const spinner = options.silent ? null : (await import('ora')).default(`Searching car rentals: ${query}...`).start();
839
+ try {
840
+ const result = await peel(searchUrl, { render: true, timeout: 40000 });
841
+ if (spinner)
842
+ spinner.succeed('Car rentals loaded');
843
+ if (options.json) {
844
+ console.log(JSON.stringify({
845
+ query,
846
+ location,
847
+ pickupDate,
848
+ returnDate,
849
+ url: searchUrl,
850
+ content: result.content,
851
+ tokens: result.tokens,
852
+ }, null, 2));
853
+ }
854
+ else {
855
+ console.log(result.content);
856
+ }
857
+ await cleanup();
858
+ process.exit(0);
859
+ }
860
+ catch (error) {
861
+ if (spinner)
862
+ spinner.fail('Car rental search failed');
863
+ console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
864
+ await cleanup();
865
+ process.exit(1);
866
+ }
867
+ });
868
+ // ── cars command ──────────────────────────────────────────────────────────
869
+ program
870
+ .command('cars <query>')
871
+ .description('Search for cars to buy via Cars.com — e.g. "Honda Civic"')
872
+ .option('--zip <zip>', 'ZIP code for local search', '10001')
873
+ .option('--distance <miles>', 'Max distance in miles', '30')
874
+ .option('--max-price <price>', 'Maximum listing price')
875
+ .option('--min-price <price>', 'Minimum listing price')
876
+ .option('--json', 'Output as JSON')
877
+ .option('-s, --silent', 'Silent mode')
878
+ .action(async (query, options) => {
879
+ const zip = options.zip || '10001';
880
+ const distance = options.distance || '30';
881
+ const maxPrice = options.maxPrice || '';
882
+ const minPrice = options.minPrice || '';
883
+ const params = new URLSearchParams({
884
+ keyword: query,
885
+ sort: 'list_price',
886
+ stock_type: 'all',
887
+ zip,
888
+ maximum_distance: distance,
889
+ });
890
+ if (maxPrice)
891
+ params.set('list_price_max', maxPrice);
892
+ if (minPrice)
893
+ params.set('list_price_min', minPrice);
894
+ const url = `https://www.cars.com/shopping/results/?${params.toString()}`;
895
+ const spinner = options.silent ? null : (await import('ora')).default(`Searching cars: ${query}...`).start();
896
+ try {
897
+ const result = await peel(url, { timeout: 25000 });
898
+ if (spinner)
899
+ spinner.succeed('Cars loaded');
900
+ if (options.json) {
901
+ console.log(JSON.stringify({
902
+ query,
903
+ zip,
904
+ distance,
905
+ maxPrice,
906
+ url,
907
+ content: result.content,
908
+ tokens: result.tokens,
909
+ }, null, 2));
910
+ }
911
+ else {
912
+ console.log(result.content);
913
+ }
914
+ await cleanup();
915
+ process.exit(0);
916
+ }
917
+ catch (error) {
918
+ if (spinner)
919
+ spinner.fail('Car search failed');
920
+ console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
921
+ await cleanup();
922
+ process.exit(1);
923
+ }
924
+ });
925
+ // ── extractors command ────────────────────────────────────────────────────
926
+ program
927
+ .command('extractors')
928
+ .alias('list-extractors')
929
+ .description('List all supported domain extractors')
930
+ .option('--json', 'Output as JSON')
931
+ .action((options) => {
932
+ const extractors = [
933
+ // Social
934
+ { domain: 'twitter.com / x.com', category: 'Social', description: 'Tweets, threads, profiles' },
935
+ { domain: 'reddit.com', category: 'Social', description: 'Subreddits, posts, comments' },
936
+ { domain: 'instagram.com', category: 'Social', description: 'Photos, reels, profiles' },
937
+ { domain: 'tiktok.com', category: 'Social', description: 'Video metadata, captions' },
938
+ { domain: 'pinterest.com', category: 'Social', description: 'Pins, boards' },
939
+ { domain: 'linkedin.com', category: 'Social', description: 'Profiles, job listings' },
940
+ { domain: 'facebook.com', category: 'Social', description: 'Marketplace listings' },
941
+ // Video / Audio
942
+ { domain: 'youtube.com', category: 'Video', description: 'Transcripts, metadata, comments' },
943
+ { domain: 'twitch.tv', category: 'Video', description: 'Streams, clips, channel info' },
944
+ { domain: 'soundcloud.com', category: 'Audio', description: 'Tracks, playlists' },
945
+ { domain: 'open.spotify.com', category: 'Audio', description: 'Tracks, albums, playlists' },
946
+ // Tech / Dev
947
+ { domain: 'github.com', category: 'Dev', description: 'Repos, issues, PRs, code' },
948
+ { domain: 'stackoverflow.com', category: 'Dev', description: 'Questions, answers' },
949
+ { domain: 'npmjs.com', category: 'Dev', description: 'Package metadata, readme' },
950
+ { domain: 'pypi.org', category: 'Dev', description: 'Package metadata, readme' },
951
+ { domain: 'dev.to', category: 'Dev', description: 'Articles, comments' },
952
+ // News / Articles
953
+ { domain: 'news.ycombinator.com', category: 'News', description: 'HN posts, comments, Ask/Show HN' },
954
+ { domain: 'medium.com', category: 'Articles', description: 'Articles, publications' },
955
+ { domain: 'substack.com / *.substack.com', category: 'Articles', description: 'Newsletters, posts' },
956
+ { domain: 'nytimes.com', category: 'News', description: 'Articles, headlines' },
957
+ { domain: 'bbc.com', category: 'News', description: 'Articles, headlines' },
958
+ { domain: 'cnn.com', category: 'News', description: 'Articles, headlines' },
959
+ // Shopping / E-commerce
960
+ { domain: 'amazon.com', category: 'Shopping', description: 'Products, prices, reviews' },
961
+ { domain: 'bestbuy.com', category: 'Shopping', description: 'Products, prices, specs' },
962
+ { domain: 'walmart.com', category: 'Shopping', description: 'Products, prices' },
963
+ { domain: 'ebay.com', category: 'Shopping', description: 'Listings, prices' },
964
+ { domain: 'etsy.com', category: 'Shopping', description: 'Handmade listings' },
965
+ // Local / Real Estate
966
+ { domain: 'yelp.com', category: 'Local', description: 'Business info, reviews (needs YELP_API_KEY)' },
967
+ { domain: 'craigslist.org', category: 'Local', description: 'Listings, classifieds' },
968
+ { domain: 'zillow.com', category: 'Real Estate', description: 'Property listings, estimates' },
969
+ { domain: 'redfin.com', category: 'Real Estate', description: 'Property listings, prices' },
970
+ { domain: 'cars.com', category: 'Automotive', description: 'Car listings, prices' },
971
+ // Knowledge / Academic
972
+ { domain: 'en.wikipedia.org', category: 'Knowledge', description: 'Articles, structured data' },
973
+ { domain: 'arxiv.org', category: 'Academic', description: 'Papers, abstracts, metadata' },
974
+ { domain: 'semanticscholar.org', category: 'Academic', description: 'Papers, citations' },
975
+ { domain: 'pubmed.ncbi.nlm.nih.gov', category: 'Academic', description: 'Medical papers, abstracts' },
976
+ { domain: 'imdb.com', category: 'Knowledge', description: 'Movies, TV shows, cast' },
977
+ { domain: 'allrecipes.com', category: 'Knowledge', description: 'Recipes, ingredients, steps' },
978
+ // Finance / Markets
979
+ { domain: 'polymarket.com', category: 'Finance', description: 'Prediction markets' },
980
+ { domain: 'kalshi.com', category: 'Finance', description: 'Prediction markets' },
981
+ { domain: 'tradingview.com', category: 'Finance', description: 'Charts, indicators, ideas' },
982
+ { domain: 'coingecko.com', category: 'Finance', description: 'Crypto prices, market data' },
983
+ { domain: 'coinmarketcap.com', category: 'Finance', description: 'Crypto prices, market data' },
984
+ // Sports / Betting
985
+ { domain: 'espn.com', category: 'Sports', description: 'Scores, stats, news' },
986
+ { domain: 'draftkings.com', category: 'Betting', description: 'Odds, lines' },
987
+ { domain: 'fanduel.com', category: 'Betting', description: 'Odds, lines' },
988
+ { domain: 'betmgm.com', category: 'Betting', description: 'Odds, lines' },
989
+ // Entertainment
990
+ { domain: 'producthunt.com', category: 'Tech', description: 'Product launches, upvotes' },
991
+ // Documents
992
+ { domain: '*.pdf URLs', category: 'Documents', description: 'PDF text extraction' },
993
+ // Weather
994
+ { domain: 'weather.com', category: 'Weather', description: 'Forecasts, conditions' },
995
+ { domain: 'accuweather.com', category: 'Weather', description: 'Forecasts, conditions' },
996
+ { domain: 'api.open-meteo.com', category: 'Weather', description: 'Free weather API' },
997
+ ];
998
+ if (options.json) {
999
+ console.log(JSON.stringify(extractors, null, 2));
1000
+ return;
1001
+ }
1002
+ // Group by category
1003
+ const byCategory = new Map();
1004
+ for (const e of extractors) {
1005
+ if (!byCategory.has(e.category))
1006
+ byCategory.set(e.category, []);
1007
+ byCategory.get(e.category).push(e);
1008
+ }
1009
+ console.log(`\n🔌 WebPeel Domain Extractors (${extractors.length} total)\n`);
1010
+ for (const [cat, items] of byCategory) {
1011
+ console.log(` ${cat}`);
1012
+ for (const item of items) {
1013
+ const pad = 35;
1014
+ const domainPad = item.domain.padEnd(pad);
1015
+ console.log(` ${domainPad} ${item.description}`);
1016
+ }
1017
+ console.log('');
1018
+ }
1019
+ console.log(' Run `webpeel <url>` to use these automatically based on the URL.');
1020
+ });
1021
+ }