webpeel 0.19.4 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (544) hide show
  1. package/README.md +2 -2
  2. package/dist/cache.d.ts +0 -1
  3. package/dist/cache.js +0 -1
  4. package/dist/cli/commands/auth.d.ts +5 -0
  5. package/dist/cli/commands/auth.js +476 -0
  6. package/dist/cli/commands/fetch.d.ts +6 -0
  7. package/dist/cli/commands/fetch.js +1015 -0
  8. package/dist/cli/commands/interact.d.ts +5 -0
  9. package/dist/cli/commands/interact.js +839 -0
  10. package/dist/cli/commands/jobs.d.ts +5 -0
  11. package/dist/cli/commands/jobs.js +997 -0
  12. package/dist/cli/commands/screenshot.d.ts +5 -0
  13. package/dist/cli/commands/screenshot.js +273 -0
  14. package/dist/cli/commands/search.d.ts +5 -0
  15. package/dist/cli/commands/search.js +524 -0
  16. package/dist/cli/utils.d.ts +84 -0
  17. package/dist/cli/utils.js +686 -0
  18. package/dist/cli-auth.d.ts +0 -1
  19. package/dist/cli-auth.js +0 -1
  20. package/dist/cli.d.ts +7 -6
  21. package/dist/cli.js +35 -4698
  22. package/dist/core/actions.d.ts +0 -1
  23. package/dist/core/actions.js +0 -1
  24. package/dist/core/agent.d.ts +0 -1
  25. package/dist/core/agent.js +9 -12
  26. package/dist/core/answer.d.ts +0 -1
  27. package/dist/core/answer.js +0 -1
  28. package/dist/core/application-tracker.d.ts +0 -1
  29. package/dist/core/application-tracker.js +0 -1
  30. package/dist/core/apply.d.ts +0 -1
  31. package/dist/core/apply.js +0 -1
  32. package/dist/core/auto-extract.d.ts +0 -1
  33. package/dist/core/auto-extract.js +0 -1
  34. package/dist/core/auto-interact.d.ts +0 -1
  35. package/dist/core/auto-interact.js +0 -1
  36. package/dist/core/bm25-filter.d.ts +0 -1
  37. package/dist/core/bm25-filter.js +0 -1
  38. package/dist/core/branding.d.ts +0 -1
  39. package/dist/core/branding.js +0 -1
  40. package/dist/core/browser-fetch.d.ts +0 -1
  41. package/dist/core/browser-fetch.js +17 -10
  42. package/dist/core/browser-pool.d.ts +0 -1
  43. package/dist/core/browser-pool.js +0 -1
  44. package/dist/core/budget.d.ts +0 -1
  45. package/dist/core/budget.js +0 -1
  46. package/dist/core/cache.d.ts +0 -1
  47. package/dist/core/cache.js +0 -1
  48. package/dist/core/cf-worker-proxy.d.ts +0 -1
  49. package/dist/core/cf-worker-proxy.js +0 -1
  50. package/dist/core/challenge-detection.d.ts +0 -1
  51. package/dist/core/challenge-detection.js +0 -1
  52. package/dist/core/change-tracking.d.ts +0 -1
  53. package/dist/core/change-tracking.js +0 -1
  54. package/dist/core/chunker.d.ts +0 -1
  55. package/dist/core/chunker.js +0 -1
  56. package/dist/core/chunking.d.ts +0 -1
  57. package/dist/core/chunking.js +0 -1
  58. package/dist/core/cloak-fetch.d.ts +0 -1
  59. package/dist/core/cloak-fetch.js +0 -1
  60. package/dist/core/content-pruner.d.ts +0 -1
  61. package/dist/core/content-pruner.js +0 -1
  62. package/dist/core/crawl-checkpoint.d.ts +0 -1
  63. package/dist/core/crawl-checkpoint.js +0 -1
  64. package/dist/core/crawler.d.ts +0 -1
  65. package/dist/core/crawler.js +6 -5
  66. package/dist/core/cycle-fetch.d.ts +0 -1
  67. package/dist/core/cycle-fetch.js +0 -1
  68. package/dist/core/deep-fetch.d.ts +0 -1
  69. package/dist/core/deep-fetch.js +0 -1
  70. package/dist/core/design-analysis.d.ts +0 -1
  71. package/dist/core/design-analysis.js +0 -1
  72. package/dist/core/design-compare.d.ts +0 -1
  73. package/dist/core/design-compare.js +0 -1
  74. package/dist/core/diff.d.ts +0 -1
  75. package/dist/core/diff.js +0 -1
  76. package/dist/core/dns-cache.d.ts +0 -1
  77. package/dist/core/dns-cache.js +0 -1
  78. package/dist/core/documents.d.ts +0 -1
  79. package/dist/core/documents.js +0 -1
  80. package/dist/core/domain-extractors.d.ts +0 -1
  81. package/dist/core/domain-extractors.js +0 -1
  82. package/dist/core/extract-inline.d.ts +0 -1
  83. package/dist/core/extract-inline.js +0 -1
  84. package/dist/core/extract-listings.d.ts +0 -1
  85. package/dist/core/extract-listings.js +0 -1
  86. package/dist/core/extract.d.ts +0 -1
  87. package/dist/core/extract.js +0 -1
  88. package/dist/core/fetcher.d.ts +0 -1
  89. package/dist/core/fetcher.js +0 -1
  90. package/dist/core/google-cache.d.ts +0 -1
  91. package/dist/core/google-cache.js +0 -1
  92. package/dist/core/hotel-search.d.ts +0 -1
  93. package/dist/core/hotel-search.js +0 -1
  94. package/dist/core/http-fetch.d.ts +0 -1
  95. package/dist/core/http-fetch.js +5 -7
  96. package/dist/core/human.d.ts +0 -1
  97. package/dist/core/human.js +0 -1
  98. package/dist/core/jobs.d.ts +0 -1
  99. package/dist/core/jobs.js +0 -1
  100. package/dist/core/json-ld.d.ts +0 -1
  101. package/dist/core/json-ld.js +0 -1
  102. package/dist/core/llm-extract.d.ts +0 -1
  103. package/dist/core/llm-extract.js +0 -1
  104. package/dist/core/logger.d.ts +17 -0
  105. package/dist/core/logger.js +44 -0
  106. package/dist/core/map.d.ts +0 -1
  107. package/dist/core/map.js +0 -1
  108. package/dist/core/markdown.d.ts +0 -1
  109. package/dist/core/markdown.js +0 -1
  110. package/dist/core/metadata.d.ts +0 -1
  111. package/dist/core/metadata.js +0 -1
  112. package/dist/core/paginate.d.ts +0 -1
  113. package/dist/core/paginate.js +0 -1
  114. package/dist/core/pdf.d.ts +0 -1
  115. package/dist/core/pdf.js +0 -1
  116. package/dist/core/peel-tls.d.ts +0 -1
  117. package/dist/core/peel-tls.js +0 -1
  118. package/dist/core/pipeline.d.ts +0 -1
  119. package/dist/core/pipeline.js +22 -25
  120. package/dist/core/profiles.d.ts +0 -1
  121. package/dist/core/profiles.js +0 -1
  122. package/dist/core/quick-answer.d.ts +0 -1
  123. package/dist/core/quick-answer.js +0 -1
  124. package/dist/core/rate-governor.d.ts +0 -1
  125. package/dist/core/rate-governor.js +0 -1
  126. package/dist/core/readability.d.ts +0 -1
  127. package/dist/core/readability.js +0 -1
  128. package/dist/core/research.d.ts +0 -1
  129. package/dist/core/research.js +0 -1
  130. package/dist/core/schema-extraction.d.ts +0 -1
  131. package/dist/core/schema-extraction.js +0 -1
  132. package/dist/core/schema-postprocess.d.ts +0 -1
  133. package/dist/core/schema-postprocess.js +0 -1
  134. package/dist/core/schema-templates.d.ts +0 -1
  135. package/dist/core/schema-templates.js +0 -1
  136. package/dist/core/screenshot.d.ts +0 -1
  137. package/dist/core/screenshot.js +0 -1
  138. package/dist/core/search-fallback.d.ts +0 -1
  139. package/dist/core/search-fallback.js +0 -1
  140. package/dist/core/search-provider.d.ts +0 -1
  141. package/dist/core/search-provider.js +18 -21
  142. package/dist/core/site-search.d.ts +0 -1
  143. package/dist/core/site-search.js +0 -1
  144. package/dist/core/sitemap.d.ts +0 -1
  145. package/dist/core/sitemap.js +0 -1
  146. package/dist/core/stealth-patches.d.ts +0 -1
  147. package/dist/core/stealth-patches.js +0 -1
  148. package/dist/core/stemmer.d.ts +0 -1
  149. package/dist/core/stemmer.js +0 -1
  150. package/dist/core/strategies.d.ts +6 -1
  151. package/dist/core/strategies.js +29 -41
  152. package/dist/core/strategy-hooks.d.ts +0 -1
  153. package/dist/core/strategy-hooks.js +0 -1
  154. package/dist/core/summarize.d.ts +0 -1
  155. package/dist/core/summarize.js +0 -1
  156. package/dist/core/synonyms.d.ts +0 -1
  157. package/dist/core/synonyms.js +0 -1
  158. package/dist/core/table-format.d.ts +0 -1
  159. package/dist/core/table-format.js +0 -1
  160. package/dist/core/timing.d.ts +0 -1
  161. package/dist/core/timing.js +0 -1
  162. package/dist/core/user-agents.d.ts +0 -1
  163. package/dist/core/user-agents.js +0 -1
  164. package/dist/core/watch-manager.d.ts +0 -1
  165. package/dist/core/watch-manager.js +0 -1
  166. package/dist/core/watch.d.ts +0 -1
  167. package/dist/core/watch.js +0 -1
  168. package/dist/core/youtube.d.ts +0 -1
  169. package/dist/core/youtube.js +0 -1
  170. package/dist/index.d.ts +8 -3
  171. package/dist/index.js +27 -3
  172. package/dist/integrations/index.d.ts +0 -1
  173. package/dist/integrations/index.js +0 -1
  174. package/dist/integrations/langchain.d.ts +0 -1
  175. package/dist/integrations/langchain.js +0 -1
  176. package/dist/integrations/llamaindex.d.ts +0 -1
  177. package/dist/integrations/llamaindex.js +0 -1
  178. package/dist/mcp/handlers/act.d.ts +5 -0
  179. package/dist/mcp/handlers/act.js +34 -0
  180. package/dist/mcp/handlers/definitions.d.ts +6 -0
  181. package/dist/mcp/handlers/definitions.js +266 -0
  182. package/dist/mcp/handlers/extract.d.ts +6 -0
  183. package/dist/mcp/handlers/extract.js +102 -0
  184. package/dist/mcp/handlers/fetch.d.ts +6 -0
  185. package/dist/mcp/handlers/fetch.js +98 -0
  186. package/dist/mcp/handlers/find.d.ts +5 -0
  187. package/dist/mcp/handlers/find.js +137 -0
  188. package/dist/mcp/handlers/index.d.ts +13 -0
  189. package/dist/mcp/handlers/index.js +61 -0
  190. package/dist/mcp/handlers/legacy.d.ts +25 -0
  191. package/dist/mcp/handlers/legacy.js +450 -0
  192. package/dist/mcp/handlers/meta.d.ts +6 -0
  193. package/dist/mcp/handlers/meta.js +31 -0
  194. package/dist/mcp/handlers/monitor.d.ts +5 -0
  195. package/dist/mcp/handlers/monitor.js +41 -0
  196. package/dist/mcp/handlers/read.d.ts +6 -0
  197. package/dist/mcp/handlers/read.js +63 -0
  198. package/dist/mcp/handlers/see.d.ts +5 -0
  199. package/dist/mcp/handlers/see.js +75 -0
  200. package/dist/mcp/handlers/types.d.ts +29 -0
  201. package/dist/mcp/handlers/types.js +28 -0
  202. package/dist/mcp/server.d.ts +3 -4
  203. package/dist/mcp/server.js +35 -1101
  204. package/dist/mcp/smart-router.d.ts +0 -1
  205. package/dist/mcp/smart-router.js +3 -1
  206. package/dist/types.d.ts +6 -1
  207. package/dist/types.js +0 -1
  208. package/package.json +3 -13
  209. package/dist/cache.d.ts.map +0 -1
  210. package/dist/cache.js.map +0 -1
  211. package/dist/cli-auth.d.ts.map +0 -1
  212. package/dist/cli-auth.js.map +0 -1
  213. package/dist/cli.bundle.cjs +0 -159248
  214. package/dist/cli.d.ts.map +0 -1
  215. package/dist/cli.js.map +0 -1
  216. package/dist/core/actions.d.ts.map +0 -1
  217. package/dist/core/actions.js.map +0 -1
  218. package/dist/core/agent.d.ts.map +0 -1
  219. package/dist/core/agent.js.map +0 -1
  220. package/dist/core/answer.d.ts.map +0 -1
  221. package/dist/core/answer.js.map +0 -1
  222. package/dist/core/application-tracker.d.ts.map +0 -1
  223. package/dist/core/application-tracker.js.map +0 -1
  224. package/dist/core/apply.d.ts.map +0 -1
  225. package/dist/core/apply.js.map +0 -1
  226. package/dist/core/auto-extract.d.ts.map +0 -1
  227. package/dist/core/auto-extract.js.map +0 -1
  228. package/dist/core/auto-interact.d.ts.map +0 -1
  229. package/dist/core/auto-interact.js.map +0 -1
  230. package/dist/core/bm25-filter.d.ts.map +0 -1
  231. package/dist/core/bm25-filter.js.map +0 -1
  232. package/dist/core/branding.d.ts.map +0 -1
  233. package/dist/core/branding.js.map +0 -1
  234. package/dist/core/browser-fetch.d.ts.map +0 -1
  235. package/dist/core/browser-fetch.js.map +0 -1
  236. package/dist/core/browser-pool.d.ts.map +0 -1
  237. package/dist/core/browser-pool.js.map +0 -1
  238. package/dist/core/budget.d.ts.map +0 -1
  239. package/dist/core/budget.js.map +0 -1
  240. package/dist/core/cache.d.ts.map +0 -1
  241. package/dist/core/cache.js.map +0 -1
  242. package/dist/core/cf-worker-proxy.d.ts.map +0 -1
  243. package/dist/core/cf-worker-proxy.js.map +0 -1
  244. package/dist/core/challenge-detection.d.ts.map +0 -1
  245. package/dist/core/challenge-detection.js.map +0 -1
  246. package/dist/core/change-tracking.d.ts.map +0 -1
  247. package/dist/core/change-tracking.js.map +0 -1
  248. package/dist/core/chunker.d.ts.map +0 -1
  249. package/dist/core/chunker.js.map +0 -1
  250. package/dist/core/chunking.d.ts.map +0 -1
  251. package/dist/core/chunking.js.map +0 -1
  252. package/dist/core/cloak-fetch.d.ts.map +0 -1
  253. package/dist/core/cloak-fetch.js.map +0 -1
  254. package/dist/core/content-pruner.d.ts.map +0 -1
  255. package/dist/core/content-pruner.js.map +0 -1
  256. package/dist/core/crawl-checkpoint.d.ts.map +0 -1
  257. package/dist/core/crawl-checkpoint.js.map +0 -1
  258. package/dist/core/crawler.d.ts.map +0 -1
  259. package/dist/core/crawler.js.map +0 -1
  260. package/dist/core/cycle-fetch.d.ts.map +0 -1
  261. package/dist/core/cycle-fetch.js.map +0 -1
  262. package/dist/core/deep-fetch.d.ts.map +0 -1
  263. package/dist/core/deep-fetch.js.map +0 -1
  264. package/dist/core/design-analysis.d.ts.map +0 -1
  265. package/dist/core/design-analysis.js.map +0 -1
  266. package/dist/core/design-compare.d.ts.map +0 -1
  267. package/dist/core/design-compare.js.map +0 -1
  268. package/dist/core/diff.d.ts.map +0 -1
  269. package/dist/core/diff.js.map +0 -1
  270. package/dist/core/dns-cache.d.ts.map +0 -1
  271. package/dist/core/dns-cache.js.map +0 -1
  272. package/dist/core/documents.d.ts.map +0 -1
  273. package/dist/core/documents.js.map +0 -1
  274. package/dist/core/domain-extractors.d.ts.map +0 -1
  275. package/dist/core/domain-extractors.js.map +0 -1
  276. package/dist/core/extract-inline.d.ts.map +0 -1
  277. package/dist/core/extract-inline.js.map +0 -1
  278. package/dist/core/extract-listings.d.ts.map +0 -1
  279. package/dist/core/extract-listings.js.map +0 -1
  280. package/dist/core/extract.d.ts.map +0 -1
  281. package/dist/core/extract.js.map +0 -1
  282. package/dist/core/fetcher.d.ts.map +0 -1
  283. package/dist/core/fetcher.js.map +0 -1
  284. package/dist/core/google-cache.d.ts.map +0 -1
  285. package/dist/core/google-cache.js.map +0 -1
  286. package/dist/core/hotel-search.d.ts.map +0 -1
  287. package/dist/core/hotel-search.js.map +0 -1
  288. package/dist/core/http-fetch.d.ts.map +0 -1
  289. package/dist/core/http-fetch.js.map +0 -1
  290. package/dist/core/human.d.ts.map +0 -1
  291. package/dist/core/human.js.map +0 -1
  292. package/dist/core/jobs.d.ts.map +0 -1
  293. package/dist/core/jobs.js.map +0 -1
  294. package/dist/core/json-ld.d.ts.map +0 -1
  295. package/dist/core/json-ld.js.map +0 -1
  296. package/dist/core/llm-extract.d.ts.map +0 -1
  297. package/dist/core/llm-extract.js.map +0 -1
  298. package/dist/core/map.d.ts.map +0 -1
  299. package/dist/core/map.js.map +0 -1
  300. package/dist/core/markdown.d.ts.map +0 -1
  301. package/dist/core/markdown.js.map +0 -1
  302. package/dist/core/metadata.d.ts.map +0 -1
  303. package/dist/core/metadata.js.map +0 -1
  304. package/dist/core/paginate.d.ts.map +0 -1
  305. package/dist/core/paginate.js.map +0 -1
  306. package/dist/core/pdf.d.ts.map +0 -1
  307. package/dist/core/pdf.js.map +0 -1
  308. package/dist/core/peel-tls.d.ts.map +0 -1
  309. package/dist/core/peel-tls.js.map +0 -1
  310. package/dist/core/pipeline.d.ts.map +0 -1
  311. package/dist/core/pipeline.js.map +0 -1
  312. package/dist/core/profiles.d.ts.map +0 -1
  313. package/dist/core/profiles.js.map +0 -1
  314. package/dist/core/quick-answer.d.ts.map +0 -1
  315. package/dist/core/quick-answer.js.map +0 -1
  316. package/dist/core/rate-governor.d.ts.map +0 -1
  317. package/dist/core/rate-governor.js.map +0 -1
  318. package/dist/core/readability.d.ts.map +0 -1
  319. package/dist/core/readability.js.map +0 -1
  320. package/dist/core/research.d.ts.map +0 -1
  321. package/dist/core/research.js.map +0 -1
  322. package/dist/core/schema-extraction.d.ts.map +0 -1
  323. package/dist/core/schema-extraction.js.map +0 -1
  324. package/dist/core/schema-postprocess.d.ts.map +0 -1
  325. package/dist/core/schema-postprocess.js.map +0 -1
  326. package/dist/core/schema-templates.d.ts.map +0 -1
  327. package/dist/core/schema-templates.js.map +0 -1
  328. package/dist/core/screenshot.d.ts.map +0 -1
  329. package/dist/core/screenshot.js.map +0 -1
  330. package/dist/core/search-fallback.d.ts.map +0 -1
  331. package/dist/core/search-fallback.js.map +0 -1
  332. package/dist/core/search-provider.d.ts.map +0 -1
  333. package/dist/core/search-provider.js.map +0 -1
  334. package/dist/core/site-search.d.ts.map +0 -1
  335. package/dist/core/site-search.js.map +0 -1
  336. package/dist/core/sitemap.d.ts.map +0 -1
  337. package/dist/core/sitemap.js.map +0 -1
  338. package/dist/core/stealth-patches.d.ts.map +0 -1
  339. package/dist/core/stealth-patches.js.map +0 -1
  340. package/dist/core/stemmer.d.ts.map +0 -1
  341. package/dist/core/stemmer.js.map +0 -1
  342. package/dist/core/strategies.d.ts.map +0 -1
  343. package/dist/core/strategies.js.map +0 -1
  344. package/dist/core/strategy-hooks.d.ts.map +0 -1
  345. package/dist/core/strategy-hooks.js.map +0 -1
  346. package/dist/core/summarize.d.ts.map +0 -1
  347. package/dist/core/summarize.js.map +0 -1
  348. package/dist/core/synonyms.d.ts.map +0 -1
  349. package/dist/core/synonyms.js.map +0 -1
  350. package/dist/core/table-format.d.ts.map +0 -1
  351. package/dist/core/table-format.js.map +0 -1
  352. package/dist/core/timing.d.ts.map +0 -1
  353. package/dist/core/timing.js.map +0 -1
  354. package/dist/core/user-agents.d.ts.map +0 -1
  355. package/dist/core/user-agents.js.map +0 -1
  356. package/dist/core/watch-manager.d.ts.map +0 -1
  357. package/dist/core/watch-manager.js.map +0 -1
  358. package/dist/core/watch.d.ts.map +0 -1
  359. package/dist/core/watch.js.map +0 -1
  360. package/dist/core/youtube.d.ts.map +0 -1
  361. package/dist/core/youtube.js.map +0 -1
  362. package/dist/index.d.ts.map +0 -1
  363. package/dist/index.js.map +0 -1
  364. package/dist/integrations/index.d.ts.map +0 -1
  365. package/dist/integrations/index.js.map +0 -1
  366. package/dist/integrations/langchain.d.ts.map +0 -1
  367. package/dist/integrations/langchain.js.map +0 -1
  368. package/dist/integrations/llamaindex.d.ts.map +0 -1
  369. package/dist/integrations/llamaindex.js.map +0 -1
  370. package/dist/mcp/server.d.ts.map +0 -1
  371. package/dist/mcp/server.js.map +0 -1
  372. package/dist/mcp/smart-router.d.ts.map +0 -1
  373. package/dist/mcp/smart-router.js.map +0 -1
  374. package/dist/server/app.d.ts +0 -15
  375. package/dist/server/app.d.ts.map +0 -1
  376. package/dist/server/app.js +0 -350
  377. package/dist/server/app.js.map +0 -1
  378. package/dist/server/auth-store.d.ts +0 -28
  379. package/dist/server/auth-store.d.ts.map +0 -1
  380. package/dist/server/auth-store.js +0 -89
  381. package/dist/server/auth-store.js.map +0 -1
  382. package/dist/server/email-service.d.ts +0 -22
  383. package/dist/server/email-service.d.ts.map +0 -1
  384. package/dist/server/email-service.js +0 -80
  385. package/dist/server/email-service.js.map +0 -1
  386. package/dist/server/job-queue.d.ts +0 -93
  387. package/dist/server/job-queue.d.ts.map +0 -1
  388. package/dist/server/job-queue.js +0 -146
  389. package/dist/server/job-queue.js.map +0 -1
  390. package/dist/server/logger.d.ts +0 -11
  391. package/dist/server/logger.d.ts.map +0 -1
  392. package/dist/server/logger.js +0 -38
  393. package/dist/server/logger.js.map +0 -1
  394. package/dist/server/middleware/auth.d.ts +0 -29
  395. package/dist/server/middleware/auth.d.ts.map +0 -1
  396. package/dist/server/middleware/auth.js +0 -222
  397. package/dist/server/middleware/auth.js.map +0 -1
  398. package/dist/server/middleware/rate-limit.d.ts +0 -25
  399. package/dist/server/middleware/rate-limit.d.ts.map +0 -1
  400. package/dist/server/middleware/rate-limit.js +0 -168
  401. package/dist/server/middleware/rate-limit.js.map +0 -1
  402. package/dist/server/middleware/url-validator.d.ts +0 -16
  403. package/dist/server/middleware/url-validator.d.ts.map +0 -1
  404. package/dist/server/middleware/url-validator.js +0 -187
  405. package/dist/server/middleware/url-validator.js.map +0 -1
  406. package/dist/server/openapi.yaml +0 -4944
  407. package/dist/server/pg-auth-store.d.ts +0 -133
  408. package/dist/server/pg-auth-store.d.ts.map +0 -1
  409. package/dist/server/pg-auth-store.js +0 -473
  410. package/dist/server/pg-auth-store.js.map +0 -1
  411. package/dist/server/pg-job-queue.d.ts +0 -60
  412. package/dist/server/pg-job-queue.d.ts.map +0 -1
  413. package/dist/server/pg-job-queue.js +0 -365
  414. package/dist/server/pg-job-queue.js.map +0 -1
  415. package/dist/server/premium/domain-intel.d.ts +0 -17
  416. package/dist/server/premium/domain-intel.d.ts.map +0 -1
  417. package/dist/server/premium/domain-intel.js +0 -134
  418. package/dist/server/premium/domain-intel.js.map +0 -1
  419. package/dist/server/premium/index.d.ts +0 -18
  420. package/dist/server/premium/index.d.ts.map +0 -1
  421. package/dist/server/premium/index.js +0 -36
  422. package/dist/server/premium/index.js.map +0 -1
  423. package/dist/server/premium/swr-cache.d.ts +0 -15
  424. package/dist/server/premium/swr-cache.d.ts.map +0 -1
  425. package/dist/server/premium/swr-cache.js +0 -35
  426. package/dist/server/premium/swr-cache.js.map +0 -1
  427. package/dist/server/routes/activity.d.ts +0 -7
  428. package/dist/server/routes/activity.d.ts.map +0 -1
  429. package/dist/server/routes/activity.js +0 -68
  430. package/dist/server/routes/activity.js.map +0 -1
  431. package/dist/server/routes/agent.d.ts +0 -16
  432. package/dist/server/routes/agent.d.ts.map +0 -1
  433. package/dist/server/routes/agent.js +0 -247
  434. package/dist/server/routes/agent.js.map +0 -1
  435. package/dist/server/routes/answer.d.ts +0 -6
  436. package/dist/server/routes/answer.d.ts.map +0 -1
  437. package/dist/server/routes/answer.js +0 -133
  438. package/dist/server/routes/answer.js.map +0 -1
  439. package/dist/server/routes/ask.d.ts +0 -23
  440. package/dist/server/routes/ask.d.ts.map +0 -1
  441. package/dist/server/routes/ask.js +0 -119
  442. package/dist/server/routes/ask.js.map +0 -1
  443. package/dist/server/routes/batch.d.ts +0 -7
  444. package/dist/server/routes/batch.d.ts.map +0 -1
  445. package/dist/server/routes/batch.js +0 -412
  446. package/dist/server/routes/batch.js.map +0 -1
  447. package/dist/server/routes/cli-usage.d.ts +0 -7
  448. package/dist/server/routes/cli-usage.d.ts.map +0 -1
  449. package/dist/server/routes/cli-usage.js +0 -121
  450. package/dist/server/routes/cli-usage.js.map +0 -1
  451. package/dist/server/routes/compat.d.ts +0 -24
  452. package/dist/server/routes/compat.d.ts.map +0 -1
  453. package/dist/server/routes/compat.js +0 -653
  454. package/dist/server/routes/compat.js.map +0 -1
  455. package/dist/server/routes/deep-fetch.d.ts +0 -9
  456. package/dist/server/routes/deep-fetch.d.ts.map +0 -1
  457. package/dist/server/routes/deep-fetch.js +0 -50
  458. package/dist/server/routes/deep-fetch.js.map +0 -1
  459. package/dist/server/routes/demo.d.ts +0 -25
  460. package/dist/server/routes/demo.d.ts.map +0 -1
  461. package/dist/server/routes/demo.js +0 -434
  462. package/dist/server/routes/demo.js.map +0 -1
  463. package/dist/server/routes/extract.d.ts +0 -9
  464. package/dist/server/routes/extract.d.ts.map +0 -1
  465. package/dist/server/routes/extract.js +0 -150
  466. package/dist/server/routes/extract.js.map +0 -1
  467. package/dist/server/routes/fetch.d.ts +0 -8
  468. package/dist/server/routes/fetch.d.ts.map +0 -1
  469. package/dist/server/routes/fetch.js +0 -988
  470. package/dist/server/routes/fetch.js.map +0 -1
  471. package/dist/server/routes/health.d.ts +0 -8
  472. package/dist/server/routes/health.d.ts.map +0 -1
  473. package/dist/server/routes/health.js +0 -20
  474. package/dist/server/routes/health.js.map +0 -1
  475. package/dist/server/routes/jobs.d.ts +0 -8
  476. package/dist/server/routes/jobs.d.ts.map +0 -1
  477. package/dist/server/routes/jobs.js +0 -487
  478. package/dist/server/routes/jobs.js.map +0 -1
  479. package/dist/server/routes/mcp.d.ts +0 -18
  480. package/dist/server/routes/mcp.d.ts.map +0 -1
  481. package/dist/server/routes/mcp.js +0 -1260
  482. package/dist/server/routes/mcp.js.map +0 -1
  483. package/dist/server/routes/oauth.d.ts +0 -10
  484. package/dist/server/routes/oauth.d.ts.map +0 -1
  485. package/dist/server/routes/oauth.js +0 -334
  486. package/dist/server/routes/oauth.js.map +0 -1
  487. package/dist/server/routes/quick-answer.d.ts +0 -9
  488. package/dist/server/routes/quick-answer.d.ts.map +0 -1
  489. package/dist/server/routes/quick-answer.js +0 -93
  490. package/dist/server/routes/quick-answer.js.map +0 -1
  491. package/dist/server/routes/screenshot.d.ts +0 -23
  492. package/dist/server/routes/screenshot.d.ts.map +0 -1
  493. package/dist/server/routes/screenshot.js +0 -819
  494. package/dist/server/routes/screenshot.js.map +0 -1
  495. package/dist/server/routes/search.d.ts +0 -7
  496. package/dist/server/routes/search.d.ts.map +0 -1
  497. package/dist/server/routes/search.js +0 -312
  498. package/dist/server/routes/search.js.map +0 -1
  499. package/dist/server/routes/session.d.ts +0 -16
  500. package/dist/server/routes/session.d.ts.map +0 -1
  501. package/dist/server/routes/session.js +0 -278
  502. package/dist/server/routes/session.js.map +0 -1
  503. package/dist/server/routes/stats.d.ts +0 -7
  504. package/dist/server/routes/stats.d.ts.map +0 -1
  505. package/dist/server/routes/stats.js +0 -65
  506. package/dist/server/routes/stats.js.map +0 -1
  507. package/dist/server/routes/stripe.d.ts +0 -16
  508. package/dist/server/routes/stripe.d.ts.map +0 -1
  509. package/dist/server/routes/stripe.js +0 -283
  510. package/dist/server/routes/stripe.js.map +0 -1
  511. package/dist/server/routes/users.d.ts +0 -9
  512. package/dist/server/routes/users.d.ts.map +0 -1
  513. package/dist/server/routes/users.js +0 -1211
  514. package/dist/server/routes/users.js.map +0 -1
  515. package/dist/server/routes/watch.d.ts +0 -16
  516. package/dist/server/routes/watch.d.ts.map +0 -1
  517. package/dist/server/routes/watch.js +0 -257
  518. package/dist/server/routes/watch.js.map +0 -1
  519. package/dist/server/routes/webhooks.d.ts +0 -16
  520. package/dist/server/routes/webhooks.d.ts.map +0 -1
  521. package/dist/server/routes/webhooks.js +0 -74
  522. package/dist/server/routes/webhooks.js.map +0 -1
  523. package/dist/server/routes/youtube.d.ts +0 -7
  524. package/dist/server/routes/youtube.d.ts.map +0 -1
  525. package/dist/server/routes/youtube.js +0 -93
  526. package/dist/server/routes/youtube.js.map +0 -1
  527. package/dist/server/sentry.d.ts +0 -14
  528. package/dist/server/sentry.d.ts.map +0 -1
  529. package/dist/server/sentry.js +0 -39
  530. package/dist/server/sentry.js.map +0 -1
  531. package/dist/server/types.d.ts +0 -16
  532. package/dist/server/types.d.ts.map +0 -1
  533. package/dist/server/types.js +0 -8
  534. package/dist/server/types.js.map +0 -1
  535. package/dist/server/utils/response.d.ts +0 -45
  536. package/dist/server/utils/response.d.ts.map +0 -1
  537. package/dist/server/utils/response.js +0 -70
  538. package/dist/server/utils/response.js.map +0 -1
  539. package/dist/server/utils/sse.d.ts +0 -23
  540. package/dist/server/utils/sse.d.ts.map +0 -1
  541. package/dist/server/utils/sse.js +0 -39
  542. package/dist/server/utils/sse.js.map +0 -1
  543. package/dist/types.d.ts.map +0 -1
  544. package/dist/types.js.map +0 -1
package/dist/cli.js CHANGED
@@ -1,4729 +1,66 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
- * WebPeel CLI
3
+ * WebPeel CLI — Entry point
4
+ *
5
+ * Registers all command groups and starts the Commander program.
6
+ * The heavy implementation lives in src/cli/commands/*.ts
4
7
  *
5
8
  * Usage:
6
9
  * npx webpeel <url> - Fetch and convert to markdown
7
10
  * npx webpeel <url> --json - Output as JSON
8
- * npx webpeel <url> --html - Output raw HTML
9
11
  * npx webpeel <url> --render - Force browser mode
10
- * npx webpeel <url> --wait 5000 - Wait 5s for JS to load
11
12
  * npx webpeel search "query" - DuckDuckGo search
12
- * npx webpeel serve - Start API server (future)
13
- * npx webpeel mcp - Start MCP server (future)
13
+ * npx webpeel mcp - Start MCP server
14
+ * npx webpeel --help - Condensed help
15
+ * npx webpeel --help-all - Full option reference
14
16
  */
15
17
  import { Command } from 'commander';
16
- import ora from 'ora';
17
- import { writeFileSync, readFileSync, existsSync } from 'fs';
18
- import { getProfilePath, loadStorageState, touchProfile, listProfiles, deleteProfile, createProfile } from './core/profiles.js';
19
- import { peel, peelBatch, cleanup } from './index.js';
20
- import { checkUsage, showUsageFooter, handleLogin, handleLogout, handleUsage, loadConfig, saveConfig } from './cli-auth.js';
21
- import { getCache, setCache, parseTTL, clearCache, cacheStats } from './cache.js';
22
- import { estimateTokens } from './core/markdown.js';
23
- import { distillToBudget, budgetListings } from './core/budget.js';
24
- import { SCHEMA_TEMPLATES, getSchemaTemplate, listSchemaTemplates } from './core/schema-templates.js';
25
- // Intercept verb-first syntax before Commander parses
18
+ import { VERB_ALIASES, cliVersion, checkForUpdates, buildCommanderHelp, buildCondensedHelp, } from './cli/utils.js';
19
+ import { registerFetchCommands } from './cli/commands/fetch.js';
20
+ import { registerSearchCommands } from './cli/commands/search.js';
21
+ import { registerInteractCommands } from './cli/commands/interact.js';
22
+ import { registerAuthCommands } from './cli/commands/auth.js';
23
+ import { registerScreenshotCommands } from './cli/commands/screenshot.js';
24
+ import { registerJobsCommands } from './cli/commands/jobs.js';
25
+ // ── Verb alias intercept (before Commander parses) ────────────────────────────
26
26
  // "webpeel fetch <url>" → "webpeel <url>"
27
- // Note: 'read' is intentionally excluded — it's a registered subcommand with its own behavior.
28
- const VERB_ALIASES = new Set(['fetch', 'get', 'scrape', 'peel']);
27
+ // Note: 'read' is intentionally excluded — it's a registered subcommand.
29
28
  if (process.argv.length >= 3 && VERB_ALIASES.has(process.argv[2]?.toLowerCase())) {
30
- // Remove the verb, shift URL to its position
31
29
  process.argv.splice(2, 1);
32
30
  }
33
- const program = new Command();
34
- // Read version from package.json dynamically
35
- import { fileURLToPath } from 'url';
36
- import { dirname, resolve } from 'path';
37
- let cliVersion = '0.0.0';
38
- try {
39
- const __dirname = dirname(fileURLToPath(import.meta.url));
40
- const pkgPath = resolve(__dirname, '..', 'package.json');
41
- const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8'));
42
- cliVersion = pkg.version;
43
- }
44
- catch { /* fallback */ }
45
- program
46
- .name('webpeel')
47
- .description('Fast web fetcher for AI agents')
48
- .version(cliVersion)
49
- .enablePositionalOptions();
50
- // Check for updates (non-blocking, runs in background)
51
- async function checkForUpdates() {
52
- try {
53
- const res = await fetch('https://registry.npmjs.org/webpeel/latest', {
54
- signal: AbortSignal.timeout(2000),
55
- });
56
- if (!res.ok)
57
- return;
58
- const data = await res.json();
59
- const latest = data.version;
60
- if (latest && latest !== cliVersion && cliVersion !== '0.0.0') {
61
- console.error(`\n💡 WebPeel v${latest} available (you have v${cliVersion}). Update: npm i -g webpeel@latest\n`);
62
- }
63
- }
64
- catch { /* silently ignore — don't slow down the user */ }
65
- }
66
- // Fire and forget — don't await, don't block
67
- void checkForUpdates();
68
- /**
69
- * Parse action strings into PageAction array
70
- * Formats:
71
- * click:.selector — click an element
72
- * type:.selector=text — type text into an input
73
- * fill:.selector=text — fill an input (replaces existing value)
74
- * scroll:down:500 — scroll direction + amount
75
- * scroll:bottom — scroll to bottom (legacy)
76
- * scroll:top — scroll to top (legacy)
77
- * wait:2000 — wait N ms
78
- * press:Enter — press a keyboard key
79
- * hover:.selector — hover over an element
80
- * waitFor:.selector — wait for a selector to appear
81
- * select:.selector=value — select dropdown option
82
- * screenshot — take a screenshot
83
- */
84
- function parseActions(actionStrings) {
85
- return actionStrings.map(str => {
86
- const [type, ...rest] = str.split(':');
87
- const value = rest.join(':');
88
- switch (type) {
89
- case 'wait':
90
- return { type: 'wait', ms: parseInt(value) || 1000 };
91
- case 'click':
92
- return { type: 'click', selector: value };
93
- case 'scroll': {
94
- // scroll:down:500 or scroll:bottom or scroll:500 or scroll:0,1500
95
- const parts = value.split(':');
96
- const dir = parts[0];
97
- // Handle scroll:x,y format (e.g., scroll:0,1500)
98
- if (dir && dir.includes(',')) {
99
- const [x, y] = dir.split(',').map(Number);
100
- if (!isNaN(x) && !isNaN(y)) {
101
- return { type: 'scroll', to: { x, y } };
102
- }
103
- }
104
- if (dir === 'top' || dir === 'bottom') {
105
- return { type: 'scroll', to: dir };
106
- }
107
- if (dir === 'down' || dir === 'up' || dir === 'left' || dir === 'right') {
108
- const amount = parseInt(parts[1] || '500', 10);
109
- return { type: 'scroll', direction: dir, amount };
110
- }
111
- // Bare number: absolute position
112
- const num = parseInt(dir, 10);
113
- if (!isNaN(num)) {
114
- return { type: 'scroll', to: num };
115
- }
116
- // Default: scroll to bottom
117
- return { type: 'scroll', to: 'bottom' };
118
- }
119
- case 'type': {
120
- const [sel, ...text] = value.split('=');
121
- return { type: 'type', selector: sel, value: text.join('=') };
122
- }
123
- case 'fill': {
124
- const [sel, ...text] = value.split('=');
125
- return { type: 'fill', selector: sel, value: text.join('=') };
126
- }
127
- case 'select': {
128
- const [sel, ...vals] = value.split('=');
129
- return { type: 'select', selector: sel, value: vals.join('=') };
130
- }
131
- case 'press':
132
- return { type: 'press', key: value };
133
- case 'hover':
134
- return { type: 'hover', selector: value };
135
- case 'waitFor':
136
- return { type: 'waitForSelector', selector: value };
137
- case 'wait-for':
138
- return { type: 'waitForSelector', selector: value, timeout: 10000 };
139
- case 'screenshot':
140
- return { type: 'screenshot' };
141
- default:
142
- throw new Error(`Unknown action type: ${type}`);
143
- }
144
- });
145
- }
146
- /**
147
- * Format an error with actionable suggestions based on error type
148
- */
149
- function formatError(error, _url, options) {
150
- const msg = error.message || String(error);
151
- const lines = [`\x1b[31m✖ ${msg}\x1b[0m`];
152
- if (msg.includes('net::ERR_') || msg.includes('ECONNREFUSED') || msg.includes('ENOTFOUND')) {
153
- lines.push('\x1b[33m💡 Check the URL is correct and the site is accessible.\x1b[0m');
154
- }
155
- else if (msg.includes('timeout') || msg.includes('Timeout') || msg.includes('Navigation timeout')) {
156
- lines.push('\x1b[33m💡 Try increasing timeout: --timeout 60000\x1b[0m');
157
- if (!options.render) {
158
- lines.push('\x1b[33m💡 Site may need browser rendering: --render\x1b[0m');
159
- }
160
- }
161
- else if (msg.includes('blocked') || msg.includes('403') || msg.includes('Access Denied') || msg.includes('challenge')) {
162
- if (!options.stealth) {
163
- lines.push('\x1b[33m💡 Try stealth mode to bypass bot detection: --stealth\x1b[0m');
164
- }
165
- lines.push('\x1b[33m💡 Try a different user agent: --ua "Mozilla/5.0..."\x1b[0m');
166
- }
167
- else if (msg.includes('empty') || msg.includes('no content') || msg.includes('0 tokens')) {
168
- if (!options.render) {
169
- lines.push('\x1b[33m💡 Page may be JavaScript-rendered. Try: --render\x1b[0m');
170
- }
171
- else if (!options.stealth) {
172
- lines.push('\x1b[33m💡 Content may be behind bot detection. Try: --stealth\x1b[0m');
173
- }
174
- lines.push('\x1b[33m💡 Try waiting longer for content: --wait 5000\x1b[0m');
175
- }
176
- else if (msg.includes('captcha') || msg.includes('CAPTCHA') || msg.includes('Captcha')) {
177
- lines.push('\x1b[33m💡 This site requires CAPTCHA solving. Try a browser profile: --profile mysite --headed\x1b[0m');
178
- }
179
- else if (msg.includes('rate limit') || msg.includes('429')) {
180
- lines.push('\x1b[33m💡 Rate limited. Wait a moment and try again, or use --proxy.\x1b[0m');
181
- }
182
- else if (msg.toLowerCase().includes('enotfound') || msg.toLowerCase().includes('getaddrinfo')) {
183
- lines.push('\x1b[33m💡 Could not resolve hostname. Check the URL is correct.\x1b[0m');
184
- }
185
- else if (msg.toLowerCase().includes('certificate') || msg.toLowerCase().includes('ssl') || msg.toLowerCase().includes('tls')) {
186
- lines.push('\x1b[33m💡 SSL/TLS error. The site may have an invalid certificate.\x1b[0m');
187
- }
188
- else if (msg.toLowerCase().includes('usage') || msg.toLowerCase().includes('quota') || msg.toLowerCase().includes('limit')) {
189
- lines.push('\x1b[33m💡 Run `webpeel usage` to check your quota, or `webpeel login` to authenticate.\x1b[0m');
190
- }
191
- return lines.join('\n');
192
- }
193
- program
194
- .argument('[url]', 'URL to fetch')
195
- .option('-r, --render', 'Use headless browser (for JS-heavy sites)')
196
- .option('--stealth', 'Use stealth mode to bypass bot detection (auto-enables --render)')
197
- .option('--cloaked', 'Use CloakBrowser stealth (requires: npm install cloakbrowser)')
198
- .option('--tls', 'Use PeelTLS TLS fingerprint spoofing (built-in, no install needed)')
199
- .option('--cycle', 'Use PeelTLS TLS fingerprint spoofing (alias for --tls)', false)
200
- .option('--proxy <url>', 'Proxy URL for requests (http://host:port, socks5://user:pass@host:port)')
201
- .option('--proxies <urls>', 'Comma-separated list of proxy URLs for rotation (tried in order on failure)', (val) => val.split(',').map((s) => s.trim()).filter(Boolean))
202
- .option('-w, --wait <ms>', 'Wait time after page load (ms)', parseInt)
203
- .option('--html', 'Output raw HTML instead of markdown')
204
- .option('--text', 'Output plain text instead of markdown')
205
- .option('--clean', 'Clean output — article content only, no links or metadata (alias for --readable with URL-stripped markdown)')
206
- .option('--json', 'Output as JSON')
207
- .option('-t, --timeout <ms>', 'Request timeout (ms)', (v) => parseInt(v, 10), 30000)
208
- .option('--ua <agent>', 'Custom user agent')
209
- .option('-s, --silent', 'Silent mode (no spinner)')
210
- .option('--screenshot [path]', 'Take a screenshot (optionally save to file path)')
211
- .option('--full-page', 'Full-page screenshot (use with --screenshot)')
212
- .option('--selector <css>', 'CSS selector to extract (e.g., "article", ".content")')
213
- .option('--exclude <selectors...>', 'CSS selectors to exclude (e.g., ".sidebar" ".ads")')
214
- .option('--include-tags <tags>', 'Comma-separated HTML tags/selectors to include (e.g., "main,article,.content")')
215
- .option('--exclude-tags <tags>', 'Comma-separated HTML tags/selectors to exclude (e.g., "nav,footer,aside")')
216
- .option('--only-main-content', 'Shortcut for --include-tags main,article')
217
- .option('--full-content', 'Return full page content (disable automatic content density pruning)')
218
- .option('--readable', 'Reader mode — extract only the main article content, strip all noise (like browser Reader Mode)')
219
- .option('--full-nav', 'Keep full navigation/content (disable auto-readability when piped or in agent mode)')
220
- .option('--focus <query>', 'Query-focused filtering — only return content relevant to this query (BM25 ranking)')
221
- .option('--chunk', 'Split content into RAG-ready chunks')
222
- .option('--chunk-size <tokens>', 'Max tokens per chunk (default: 512)', parseInt)
223
- .option('--chunk-overlap <tokens>', 'Overlap tokens between chunks (default: 50)', parseInt)
224
- .option('--chunk-strategy <strategy>', 'Chunking strategy: section (default), paragraph, fixed')
225
- .option('-H, --header <header...>', 'Custom headers (e.g., "Authorization: Bearer token")')
226
- .option('--cookie <cookie...>', 'Cookies to set (e.g., "session=abc123")')
227
- .option('--cache <ttl>', 'Cache results locally (e.g., "5m", "1h", "1d") — default: 5m')
228
- .option('--no-cache', 'Disable automatic caching for this request')
229
- .option('--links', 'Output only the links found on the page')
230
- .option('--images', 'Output image URLs from the page')
231
- .option('--meta', 'Output only the page metadata (title, description, author, etc.)')
232
- .option('--raw', 'Return full page without smart content extraction')
233
- .option('--full', 'Alias for --raw — full page content, no budget')
234
- .option('--lite', 'Lite mode — minimal processing, maximum speed (skip pruning, budget, metadata)')
235
- .option('--action <actions...>', 'Page actions before scraping (e.g., "click:.btn" "wait:2000" "scroll:bottom")')
236
- .option('--extract <json>', 'Extract structured data using CSS selectors (JSON object of field:selector pairs)')
237
- .option('--llm-extract [instruction]', 'Extract structured data using LLM (optional instruction, e.g. "extract hotel names and prices")')
238
- .option('--extract-schema <schema>', 'JSON schema for structured extraction (requires LLM key). Pass inline JSON or @file.json')
239
- .option('--llm-key <key>', 'LLM API key for AI features (or use OPENAI_API_KEY env var)')
240
- .option('--llm-model <model>', 'LLM model to use (default: gpt-4o-mini)')
241
- .option('--llm-base-url <url>', 'LLM API base URL (default: https://api.openai.com/v1)')
242
- .option('--summary', 'Generate AI summary of content (requires --llm-key or OPENAI_API_KEY)')
243
- .option('--location <country>', 'ISO country code for geo-targeting (e.g., "US", "DE", "JP")')
244
- .option('--language <lang>', 'Language preference (e.g., "en", "de", "ja")')
245
- .option('--max-tokens <n>', 'Maximum token count for output (truncate if exceeded)', parseInt)
246
- .option('--budget <n>', 'Smart token budget — distill content to fit within N tokens (heuristic, no LLM key needed)', parseInt)
247
- .option('--extract-all', 'Auto-detect and extract repeated listing items (e.g., search results)')
248
- .option('--schema <name>', 'Force a specific extraction schema by name or domain (e.g., "booking.com", "amazon")')
249
- .option('--list-schemas', 'List all available extraction schemas and their supported domains')
250
- .option('--scroll-extract [count]', 'Scroll page N times to load lazy content (bare flag = smart auto-scroll until stable), then extract (implies --render)', (v) => parseInt(v, 10))
251
- .option('--scroll-extract-timeout <ms>', 'Total timeout in ms for auto-scroll (default: 30000, only used with bare --scroll-extract)', parseInt)
252
- .option('--csv', 'Output extraction results as CSV')
253
- .option('--table', 'Output extraction results as a formatted table')
254
- .option('--pages <n>', 'Follow pagination "Next" links for N pages (max 10)', (v) => parseInt(v, 10))
255
- .option('--profile <path>', 'Use a persistent browser profile directory (cookies/sessions survive between calls)')
256
- .option('--headed', 'Run browser in headed (visible) mode — useful for profile setup and debugging')
257
- .option('-q, --question <q>', 'Ask a question about the page content (BM25-powered, no LLM key needed)')
258
- .option('--agent', 'Agent mode: sets --json, --silent, --extract-all, and --budget 4000 (override with --budget N)')
259
- .option('--device <type>', 'Device emulation: desktop (default), mobile, tablet (auto-enables --render)')
260
- .option('--viewport <WxH>', 'Browser viewport size (e.g., "1920x1080") (auto-enables --render)', (val) => {
261
- const [w, h] = val.split('x').map(Number);
262
- return { width: w, height: h };
263
- })
264
- .option('--wait-until <event>', 'Page load event: domcontentloaded, networkidle, load, commit (auto-enables --render)')
265
- .option('--wait-selector <css>', 'Wait for CSS selector before extracting (auto-enables --render)')
266
- .option('--block-resources <types>', 'Block resource types, comma-separated: image,stylesheet,font,media,script (auto-enables --render)')
267
- .option('--format <type>', 'Output format: markdown (default), text, html, json');
268
- // ─── Help System ─────────────────────────────────────────────────────────────
269
- // Detect --help-all early, before Commander parses argv.
31
+ // ── --help-all detection (must happen before Commander parses) ────────────────
270
32
  const isHelpAll = process.argv.slice(2).some(a => a === '--help-all');
271
33
  if (isHelpAll) {
272
- // Translate --help-all → --help so Commander generates its standard output.
273
34
  const idx = process.argv.indexOf('--help-all');
274
35
  if (idx !== -1)
275
36
  process.argv[idx] = '--help';
276
37
  }
277
- // ANSI helpers (fall back gracefully when colors are disabled).
278
- const NO_COLOR = process.env.NO_COLOR !== undefined || !process.stdout.isTTY;
279
- const bold = (s) => NO_COLOR ? s : `\x1b[1m${s}\x1b[0m`;
280
- const dim = (s) => NO_COLOR ? s : `\x1b[2m${s}\x1b[0m`;
281
- const cyan = (s) => NO_COLOR ? s : `\x1b[36m${s}\x1b[0m`;
282
- /**
283
- * Reconstruct the standard Commander help layout for --help-all and subcommands.
284
- * This mirrors Commander's own default formatHelp() so subcommand help keeps working.
285
- */
286
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
287
- function buildCommanderHelp(cmd, helper) {
288
- const termWidth = helper.padWidth(cmd, helper);
289
- const helpWidth = helper.helpWidth ?? 80;
290
- const pad = ' ';
291
- const formatItem = (term, description) => {
292
- if (description) {
293
- const full = `${term.padEnd(termWidth + 2)}${description}`;
294
- return helper.wrap(full, helpWidth - pad.length, termWidth + 2);
295
- }
296
- return term;
297
- };
298
- const formatList = (items) => items.join('\n').replace(/^/gm, pad);
299
- let out = [`Usage: ${helper.commandUsage(cmd)}`, ''];
300
- const desc = helper.commandDescription(cmd);
301
- if (desc.length > 0) {
302
- out = out.concat([helper.wrap(desc, helpWidth, 0), '']);
303
- }
304
- // Arguments
305
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
306
- const args = helper.visibleArguments(cmd).map(a => formatItem(helper.argumentTerm(a), helper.argumentDescription(a)));
307
- if (args.length > 0)
308
- out = out.concat(['Arguments:', formatList(args), '']);
309
- // Options
310
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
311
- const opts = helper.visibleOptions(cmd).map(o => formatItem(helper.optionTerm(o), helper.optionDescription(o)));
312
- if (opts.length > 0)
313
- out = out.concat(['Options:', formatList(opts), '']);
314
- // Subcommands
315
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
316
- const cmds = helper.visibleCommands(cmd).map(c => formatItem(helper.subcommandTerm(c), helper.subcommandDescription(c)));
317
- if (cmds.length > 0)
318
- out = out.concat(['Commands:', formatList(cmds), '']);
319
- // Append grouped option sections only on root command (--help-all)
320
- if (cmd.parent === null) {
321
- out = out.concat([`
322
- Output Formats:
323
- --json JSON output with full metadata
324
- --html Raw HTML output
325
- --text Plain text output
326
- --csv / --table Tabular output for extractions
327
- -s, --silent No spinner or progress output
328
-
329
- Content Control:
330
- --readable Reader mode — clean article content only
331
- --budget <n> Smart token budget (no LLM key needed)
332
- --focus <query> BM25 query-focused filtering
333
- --selector <css> Extract specific CSS selector
334
- --only-main-content Just main/article content
335
- --full-content Disable content pruning
336
- -q, --question <q> Ask a question about the content
337
-
338
- Rendering:
339
- -r, --render Browser rendering for JS-heavy sites
340
- --stealth Stealth mode for bot-protected sites
341
- --profile <path> Persistent browser profile
342
- --headed Visible browser (for debugging)
343
- --action <actions> Browser automation (click, type, scroll...)
344
-
345
- Extraction:
346
- --extract <json> CSS selector extraction
347
- --extract-all Auto-detect listing items
348
- --schema <name> Named extraction schema
349
- --llm-extract [inst] LLM-powered extraction (BYOK)
350
-
351
- Examples:
352
- $ webpeel "https://example.com" Basic fetch
353
- $ webpeel "https://youtube.com/watch?v=..." --json YouTube transcript
354
- $ webpeel "https://openai.com/pricing" -q "GPT-4 cost?" Quick answer
355
- $ webpeel "https://nytimes.com/article" --readable Reader mode
356
- $ webpeel search "best restaurants in NYC" Web search
357
- $ webpeel hotels "Manhattan" --checkin tomorrow Hotel search
358
-
359
- Agent Integration:
360
- $ webpeel mcp Start MCP server
361
- $ cat urls.txt | webpeel batch Batch from stdin
362
- $ webpeel pipe "https://example.com" | jq .content Pipe-friendly JSON
363
- $ webpeel "https://site.com" --json --silent Same as pipe
364
- $ curl https://webpeel.dev/llms.txt AI-readable docs
365
- `]);
366
- }
367
- return out.join('\n');
368
- }
369
- /**
370
- * Condensed, Anthropic-style help for the root command (default --help).
371
- */
372
- function buildCondensedHelp() {
373
- const v = cliVersion;
374
- return [
375
- '',
376
- ` ${bold('◆ WebPeel')} ${dim(`v${v}`)}`,
377
- ` ${dim('The web data platform for AI agents')}`,
378
- '',
379
- ` ${bold('Usage:')} webpeel [url] [options]`,
380
- ` webpeel <command> [options]`,
381
- '',
382
- ` ${bold('Examples:')}`,
383
- ` webpeel https://example.com ${dim('Clean content (reader mode)')}`,
384
- ` webpeel read https://example.com ${dim('Explicit reader mode')}`,
385
- ` webpeel screenshot https://example.com ${dim('Screenshot any page')}`,
386
- ` webpeel ask https://news.com "summary" ${dim('Ask about any page')}`,
387
- ` webpeel search "webpeel vs jina" ${dim('Web search')}`,
388
- ` echo "url" | webpeel ${dim('Pipe mode (auto JSON)')}`,
389
- '',
390
- ` ${bold('Commands:')}`,
391
- ` fetch (default) Fetch a URL as clean markdown`,
392
- ` read <url> Reader mode (article content only)`,
393
- ` screenshot <url> Take a screenshot`,
394
- ` ask <url> <question> Ask about any page`,
395
- ` search <query> Search the web (DuckDuckGo + sources)`,
396
- ` crawl <url> Crawl a website`,
397
- ` mcp Start MCP server for AI tools`,
398
- ` ${dim('... (use --help-all for all 25+ commands)')}`,
399
- '',
400
- ` ${bold('Common Options:')}`,
401
- ` -r, --render Browser rendering (JS-heavy sites)`,
402
- ` --stealth Stealth mode (anti-bot bypass)`,
403
- ` --raw Full page (disable auto reader mode)`,
404
- ` --full Full page, no budget limit`,
405
- ` --json JSON output with metadata`,
406
- ` --budget: 4000)`,
407
- ` -q, --question <q> Ask about the content`,
408
- ` -s, --silent No spinner output`,
409
- '',
410
- ` Use ${cyan("'webpeel <command> --help'")} for command-specific options.`,
411
- ` Use ${cyan("'webpeel --help-all'")} for the full option reference.`,
412
- '',
413
- ` Docs: ${cyan('https://webpeel.dev/docs')}`,
414
- '',
415
- ].join('\n');
416
- }
38
+ // ── Program setup ─────────────────────────────────────────────────────────────
39
+ const program = new Command();
40
+ program
41
+ .name('webpeel')
42
+ .description('Fast web fetcher for AI agents')
43
+ .version(cliVersion)
44
+ .enablePositionalOptions();
45
+ // ── Help formatting ───────────────────────────────────────────────────────────
417
46
  program.configureHelp({
418
47
  sortSubcommands: true,
419
48
  showGlobalOptions: false,
420
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
421
49
  formatHelp: (cmd, helper) => {
422
- // Subcommands always get standard Commander help.
423
- // Root command with --help-all also gets standard full help.
424
50
  if (cmd.parent !== null || isHelpAll) {
425
51
  return buildCommanderHelp(cmd, helper);
426
52
  }
427
- // Root command default: beautiful condensed help.
428
53
  return buildCondensedHelp();
429
54
  },
430
55
  });
431
- // ============================================================
432
- // API-based fetch (routes through WebPeel API, no local Playwright)
433
- // ============================================================
434
- async function fetchViaApi(url, options, apiKey, apiUrl) {
435
- // --format is a CLI output flag; API format is always the content extraction format
436
- const apiFormat = (['text', 'html', 'markdown', 'md'].includes((options.format || '').toLowerCase()))
437
- ? (options.format.toLowerCase() === 'md' ? 'markdown' : options.format.toLowerCase())
438
- : (options.html ? 'html' : options.text ? 'text' : 'markdown');
439
- const params = new URLSearchParams({ url, format: apiFormat });
440
- if (options.render)
441
- params.set('render', 'true');
442
- if (options.stealth)
443
- params.set('stealth', 'true');
444
- if (options.wait)
445
- params.set('wait', String(options.wait));
446
- if (options.selector)
447
- params.set('selector', options.selector);
448
- if (options.readable)
449
- params.set('readable', 'true');
450
- if (options.summary)
451
- params.set('summary', 'true');
452
- if (options.budget)
453
- params.set('budget', String(options.budget));
454
- if (options.question)
455
- params.set('question', options.question);
456
- const res = await fetch(`${apiUrl}/v1/fetch?${params}`, {
457
- headers: { Authorization: `Bearer ${apiKey}` },
458
- signal: AbortSignal.timeout(60000),
459
- });
460
- if (res.status === 401) {
461
- throw Object.assign(new Error('API key invalid or expired. Run: webpeel auth <new-key>'), { code: 'AUTH_FAILED' });
462
- }
463
- if (res.status === 429) {
464
- throw Object.assign(new Error('Rate limit exceeded. Check your plan at https://app.webpeel.dev/billing'), { code: 'RATE_LIMITED' });
465
- }
466
- if (!res.ok) {
467
- const body = await res.text().catch(() => '');
468
- throw new Error(`API error ${res.status}: ${body.slice(0, 200)}`);
469
- }
470
- const data = await res.json();
471
- // Map API response to PeelResult shape that the CLI already handles
472
- return {
473
- url: data.url || url,
474
- title: data.metadata?.title || data.title || '',
475
- content: data.content || '',
476
- method: data.method || 'simple',
477
- tokens: data.tokenCount || data.tokens || 0,
478
- elapsed: data.fetchTimeMs || data.elapsed || 0,
479
- tokenSavingsPercent: data.tokenSavingsPercent,
480
- rawTokenEstimate: data.rawTokenEstimate,
481
- metadata: data.metadata || {},
482
- links: data.links || [],
483
- answer: data.answer,
484
- summary: data.summary,
485
- format: options.format || 'markdown',
486
- };
487
- }
488
- // Main fetch handler — shared with the `pipe` subcommand
489
- async function runFetch(url, options) {
490
- // Handle --format flag: maps to existing boolean flags
491
- if (options.format) {
492
- const fmt = options.format.toLowerCase();
493
- if (fmt === 'text')
494
- options.text = true;
495
- else if (fmt === 'html')
496
- options.html = true;
497
- else if (fmt === 'json')
498
- options.json = true;
499
- else if (fmt === 'markdown' || fmt === 'md') { /* default, do nothing */ }
500
- else {
501
- console.error(`Unknown format: ${options.format}. Use: text, markdown, html, or json`);
502
- process.exit(1);
503
- }
504
- }
505
- // Smart defaults: when piped (not a TTY), default to silent JSON + budget
506
- // BUT respect explicit --format flag (user chose the output format)
507
- const isPiped = !process.stdout.isTTY;
508
- const hasExplicitFormat = options.format && ['text', 'html', 'markdown', 'md'].includes(options.format.toLowerCase());
509
- if (isPiped && !options.html && !options.text && !hasExplicitFormat) {
510
- if (!options.json)
511
- options.json = true;
512
- if (!options.silent)
513
- options.silent = true;
514
- // Auto-enable readability for AI consumers — clean content by default
515
- if (!options.readable && !options.fullNav) {
516
- options.readable = true;
517
- }
518
- // Auto token budget for piped mode (AI consumers want concise content)
519
- if (options.budget === undefined && !options.fullContent && !options.raw && !options.full) {
520
- options.budget = 4000;
521
- }
522
- }
523
- // --full alias: sets raw + fullContent
524
- if (options.full) {
525
- options.raw = true;
526
- options.fullContent = true;
527
- }
528
- // Smart defaults for terminal (interactive) mode
529
- const isTerminal = process.stdout.isTTY && !isPiped;
530
- if (isTerminal && !options.raw && !options.html && !options.text) {
531
- // Auto-readable: clean content by default (like browser Reader Mode)
532
- if (!options.readable && !options.fullNav && !options.selector) {
533
- options.readable = true;
534
- }
535
- // Default token budget: don't flood the terminal with 20K tokens
536
- if (options.budget === undefined && !options.fullContent && !options.raw) {
537
- options.budget = 4000;
538
- }
539
- }
540
- // --agent sets sensible defaults for AI agents; explicit flags override
541
- if (options.agent) {
542
- if (!options.json)
543
- options.json = true;
544
- if (!options.silent)
545
- options.silent = true;
546
- if (!options.extractAll)
547
- options.extractAll = true;
548
- if (options.budget === undefined)
549
- options.budget = 4000;
550
- // Agent mode = clean content by default
551
- if (!options.readable && !options.fullNav) {
552
- options.readable = true;
553
- }
554
- }
555
- const isJson = options.json;
556
- // --- --list-schemas: print all available schemas and exit ---
557
- if (options.listSchemas) {
558
- const { loadBundledSchemas } = await import('./core/schema-extraction.js');
559
- const schemas = loadBundledSchemas();
560
- if (isJson) {
561
- await writeStdout(JSON.stringify(schemas.map(s => ({
562
- name: s.name,
563
- version: s.version,
564
- domains: s.domains,
565
- urlPatterns: s.urlPatterns,
566
- })), null, 2) + '\n');
567
- }
568
- else {
569
- console.log(`\nAvailable extraction schemas (${schemas.length}):\n`);
570
- for (const s of schemas) {
571
- console.log(` ${s.name} (v${s.version})`);
572
- console.log(` Domains: ${s.domains.join(', ')}`);
573
- if (s.urlPatterns && s.urlPatterns.length > 0) {
574
- console.log(` URL patterns: ${s.urlPatterns.join(', ')}`);
575
- }
576
- console.log('');
577
- }
578
- }
579
- process.exit(0);
580
- }
581
- // --- #5: Concise error for missing URL (no help dump) ---
582
- if (!url || url.trim() === '') {
583
- if (isJson) {
584
- await writeStdout(JSON.stringify({ success: false, error: { type: 'invalid_request', message: 'URL is required' } }) + '\n');
585
- }
586
- else {
587
- console.error('Error: URL is required');
588
- console.error('Usage: webpeel <url> [options]');
589
- console.error('Run "webpeel --help" for full usage.');
590
- }
591
- process.exit(1);
592
- }
593
- // --- #6: Helper to output JSON errors and exit ---
594
- function exitWithJsonError(message, code) {
595
- if (isJson) {
596
- process.stdout.write(JSON.stringify({
597
- success: false,
598
- error: { type: code.toLowerCase(), message },
599
- }) + '\n');
600
- }
601
- else {
602
- console.error(`Error: ${message}`);
603
- }
604
- process.exit(1);
605
- }
606
- // SECURITY: Enhanced URL validation
607
- if (url.length > 2048) {
608
- exitWithJsonError('URL too long (max 2048 characters)', 'INVALID_URL');
609
- }
610
- // Check for control characters
611
- if (/[\x00-\x1F\x7F]/.test(url)) {
612
- exitWithJsonError('URL contains invalid control characters', 'INVALID_URL');
613
- }
614
- // Validate URL format
615
- try {
616
- const parsed = new URL(url);
617
- if (!['http:', 'https:'].includes(parsed.protocol)) {
618
- exitWithJsonError('Only HTTP and HTTPS protocols are allowed', 'INVALID_URL');
619
- }
620
- }
621
- catch {
622
- // Check if it looks like a command/verb the user typed by mistake
623
- const commonVerbs = ['fetch', 'get', 'scrape', 'read', 'download', 'curl', 'wget', 'peel'];
624
- if (commonVerbs.includes(url.toLowerCase())) {
625
- exitWithJsonError(`Did you mean: webpeel "${program.args[1] || '<url>'}"?\nThe URL goes directly after webpeel — no verb needed.\nExample: webpeel "https://example.com" --json`, 'INVALID_URL');
626
- }
627
- else {
628
- exitWithJsonError(`Invalid URL: "${url}"\nMake sure to include the protocol (https://)\nExample: webpeel "https://${url}" --json`, 'INVALID_URL');
629
- }
630
- }
631
- const useStealth = options.stealth || false;
632
- // Check usage quota
633
- const usageCheck = await checkUsage();
634
- if (!usageCheck.allowed) {
635
- if (isJson) {
636
- await writeStdout(JSON.stringify({ success: false, error: { type: 'rate_limited', message: usageCheck.message } }) + '\n');
637
- process.exit(1);
638
- }
639
- console.error(usageCheck.message);
640
- process.exit(1);
641
- }
642
- // Check cache first (before spinner/network)
643
- // Default: 5m TTL for all CLI fetches unless --no-cache is set
644
- let cacheTtlMs;
645
- const cacheDisabled = options.cache === false; // --no-cache sets options.cache to false
646
- const explicitTtl = typeof options.cache === 'string' ? options.cache : undefined;
647
- if (!cacheDisabled) {
648
- const ttlStr = explicitTtl || '5m';
649
- try {
650
- cacheTtlMs = parseTTL(ttlStr);
651
- }
652
- catch (e) {
653
- exitWithJsonError(e.message, 'FETCH_FAILED');
654
- }
655
- const cacheOptions = {
656
- render: options.render,
657
- stealth: options.stealth,
658
- selector: options.selector,
659
- format: options.html ? 'html' : options.text ? 'text' : options.clean ? 'clean' : 'markdown',
660
- budget: null, // Budget excluded from cache key — cache stores full content
661
- readable: options.readable || false,
662
- };
663
- const cachedResult = getCache(url, cacheOptions);
664
- if (cachedResult) {
665
- if (!options.silent) {
666
- console.error(`\x1b[36m⚡ Cache hit\x1b[0m (TTL: ${ttlStr})`);
667
- }
668
- // Apply budget to cached content (cache stores full, budget is post-process)
669
- if (options.budget && options.budget > 0 && cachedResult.content) {
670
- const { distillToBudget } = await import('./core/budget.js');
671
- const fmt = options.text ? 'text' : 'markdown';
672
- cachedResult.content = distillToBudget(cachedResult.content, options.budget, fmt);
673
- cachedResult.tokens = Math.ceil(cachedResult.content.length / 4);
674
- }
675
- // LLM extraction from cached content
676
- if (options.llmExtract || options.extractSchema) {
677
- const { extractWithLLM } = await import('./core/llm-extract.js');
678
- const llmCfgCached = loadConfig();
679
- const llmApiKeyCached = options.llmKey || llmCfgCached.llm?.apiKey || process.env.OPENAI_API_KEY;
680
- if (!llmApiKeyCached) {
681
- console.error('Error: LLM extraction requires an API key.\nSet OPENAI_API_KEY environment variable or use --llm-key <key>');
682
- process.exit(1);
683
- }
684
- const llmModelCached = options.llmModel || llmCfgCached.llm?.model || process.env.WEBPEEL_LLM_MODEL || 'gpt-4o-mini';
685
- const llmBaseUrlCached = options.llmBaseUrl || llmCfgCached.llm?.baseUrl || process.env.WEBPEEL_LLM_BASE_URL || 'https://api.openai.com/v1';
686
- const llmInstructionCached = typeof options.llmExtract === 'string' ? options.llmExtract : undefined;
687
- // Parse schema if provided
688
- let llmSchemaCached;
689
- if (options.extractSchema) {
690
- let schemaStr = options.extractSchema;
691
- if (schemaStr.startsWith('@')) {
692
- schemaStr = readFileSync(schemaStr.slice(1), 'utf-8');
693
- }
694
- try {
695
- llmSchemaCached = JSON.parse(schemaStr);
696
- }
697
- catch {
698
- console.error('Error: --extract-schema must be valid JSON or a valid @file.json path');
699
- process.exit(1);
700
- }
701
- }
702
- const llmResultCached = await extractWithLLM({
703
- content: cachedResult.content,
704
- instruction: llmInstructionCached,
705
- schema: llmSchemaCached,
706
- apiKey: llmApiKeyCached,
707
- model: llmModelCached,
708
- baseUrl: llmBaseUrlCached,
709
- });
710
- await writeStdout(JSON.stringify(llmResultCached.items, null, 2) + '\n');
711
- if (!options.silent) {
712
- const { input, output } = llmResultCached.tokensUsed;
713
- const costStr = llmResultCached.cost !== undefined ? ` | Est. cost: $${llmResultCached.cost.toFixed(6)}` : '';
714
- console.error(`\n🤖 LLM extraction: ${llmResultCached.items.length} items | ${input} input + ${output} output tokens${costStr} | model: ${llmResultCached.model}`);
715
- }
716
- process.exit(0);
717
- }
718
- // --- LLM-free Quick Answer (also on cached content) ---
719
- if (options.question && cachedResult.content) {
720
- const { quickAnswer } = await import('./core/quick-answer.js');
721
- const qa = quickAnswer({
722
- question: options.question,
723
- content: cachedResult.content,
724
- url: cachedResult.url,
725
- });
726
- cachedResult.quickAnswer = qa;
727
- if (!isJson) {
728
- const conf = (qa.confidence * 100).toFixed(0);
729
- await writeStdout(`\n\x1b[36m📋 ${qa.question}\x1b[0m\n\n`);
730
- if (qa.answer) {
731
- await writeStdout(`\x1b[32m💡 Answer (${conf}% confidence):\x1b[0m\n${qa.answer}\n`);
732
- }
733
- else {
734
- await writeStdout(`\x1b[33m💡 No relevant answer found (${conf}% confidence)\x1b[0m\n`);
735
- }
736
- if (qa.passages && qa.passages.length > 1) {
737
- await writeStdout(`\n\x1b[33m📝 Supporting evidence:\x1b[0m\n`);
738
- for (const p of qa.passages.slice(1, 4)) {
739
- await writeStdout(` • [${(p.score * 100).toFixed(0)}%] ${p.text.substring(0, 200)}${p.text.length > 200 ? '...' : ''}\n`);
740
- }
741
- }
742
- await writeStdout('\n');
743
- await cleanup();
744
- process.exit(0);
745
- }
746
- }
747
- // --- BM25 Schema Template Extraction (cached path) ---
748
- if (options.schema && cachedResult.content) {
749
- const { getSchemaTemplate: getSchTmplCached } = await import('./core/schema-templates.js');
750
- const schTemplateCached = getSchTmplCached(options.schema);
751
- if (schTemplateCached) {
752
- const { quickAnswer: qaCached } = await import('./core/quick-answer.js');
753
- const { smartExtractSchemaFields: smartExtractCached } = await import('./core/schema-postprocess.js');
754
- const extractedCached = smartExtractCached(cachedResult.content, schTemplateCached.fields, qaCached, {
755
- pageTitle: cachedResult.title,
756
- pageUrl: cachedResult.url,
757
- metadata: cachedResult.metadata,
758
- });
759
- cachedResult.extracted = extractedCached;
760
- }
761
- }
762
- await outputResult(cachedResult, options, { cached: true });
763
- process.exit(0);
764
- }
765
- }
766
- const spinner = options.silent ? null : ora('Fetching...').start();
767
- try {
768
- // Validate options
769
- if (options.wait && (options.wait < 0 || options.wait > 60000)) {
770
- throw Object.assign(new Error('Wait time must be between 0 and 60000ms'), { _code: 'FETCH_FAILED' });
771
- }
772
- // Parse custom headers
773
- let headers;
774
- if (options.header && options.header.length > 0) {
775
- headers = {};
776
- for (const header of options.header) {
777
- const colonIndex = header.indexOf(':');
778
- if (colonIndex === -1) {
779
- throw Object.assign(new Error(`Invalid header format: ${header}. Expected "Key: Value"`), { _code: 'FETCH_FAILED' });
780
- }
781
- const key = header.slice(0, colonIndex).trim();
782
- const value = header.slice(colonIndex + 1).trim();
783
- headers[key] = value;
784
- }
785
- }
786
- // Parse actions
787
- let actions;
788
- if (options.action && options.action.length > 0) {
789
- try {
790
- actions = parseActions(options.action);
791
- }
792
- catch (e) {
793
- throw Object.assign(new Error(e.message), { _code: 'FETCH_FAILED' });
794
- }
795
- }
796
- // --extract-schema auto-enables JSON output
797
- if (options.extractSchema) {
798
- options.json = true;
799
- }
800
- // Parse extract
801
- let extract;
802
- if (options.llmExtract || options.extractSchema) {
803
- // LLM-based extraction is handled post-fetch (after peel returns markdown).
804
- // Early-validate that an API key is available so we fail fast.
805
- const llmCfg = loadConfig();
806
- const llmApiKey = options.llmKey || llmCfg.llm?.apiKey || process.env.OPENAI_API_KEY;
807
- if (!llmApiKey) {
808
- throw Object.assign(new Error('LLM extraction requires an API key.\n' +
809
- 'Set OPENAI_API_KEY environment variable or use --llm-key <key>'), { _code: 'FETCH_FAILED' });
810
- }
811
- // Do NOT set extract here — peel runs normally, LLM extraction happens below.
812
- }
813
- else if (options.extract) {
814
- // CSS-based extraction
815
- try {
816
- extract = { selectors: JSON.parse(options.extract) };
817
- }
818
- catch {
819
- throw Object.assign(new Error('--extract must be valid JSON (e.g., \'{"title": "h1", "price": ".price"}\')'), { _code: 'FETCH_FAILED' });
820
- }
821
- }
822
- // Validate maxTokens
823
- if (options.maxTokens !== undefined) {
824
- if (isNaN(options.maxTokens) || options.maxTokens < 100) {
825
- throw Object.assign(new Error('--max-tokens must be at least 100'), { _code: 'FETCH_FAILED' });
826
- }
827
- }
828
- // Parse include-tags and exclude-tags
829
- let includeTags;
830
- let excludeTags;
831
- if (options.onlyMainContent) {
832
- includeTags = ['main', 'article'];
833
- }
834
- else if (options.includeTags) {
835
- includeTags = options.includeTags.split(',').map((t) => t.trim());
836
- }
837
- if (options.excludeTags) {
838
- excludeTags = options.excludeTags.split(',').map((t) => t.trim());
839
- }
840
- // Build location options
841
- let locationOptions;
842
- if (options.location || options.language) {
843
- locationOptions = {};
844
- if (options.location) {
845
- locationOptions.country = options.location;
846
- }
847
- if (options.language) {
848
- locationOptions.languages = [options.language];
849
- }
850
- }
851
- // ── Resolve --profile: name → path + storage state ─────────────────
852
- let resolvedProfileDir;
853
- let resolvedStorageState;
854
- let resolvedProfileName;
855
- if (options.profile) {
856
- const profilePath = getProfilePath(options.profile);
857
- if (profilePath) {
858
- // It's a named profile in ~/.webpeel/profiles/
859
- resolvedProfileDir = profilePath;
860
- resolvedStorageState = loadStorageState(options.profile) ?? undefined;
861
- resolvedProfileName = options.profile;
862
- }
863
- else if (existsSync(options.profile)) {
864
- // It's a raw directory path (backward compat)
865
- resolvedProfileDir = options.profile;
866
- }
867
- else {
868
- exitWithJsonError(`Profile "${options.profile}" not found. Run "webpeel profile list" to see available profiles.`, 'PROFILE_NOT_FOUND');
869
- }
870
- }
871
- // Build peel options
872
- // --stealth auto-enables --render (stealth requires browser)
873
- // --action auto-enables --render (actions require browser)
874
- // --scroll-extract implies --render (needs browser)
875
- //
876
- // Bare --scroll-extract (no number) → smart autoScroll (detects stable height)
877
- // --scroll-extract N (with number) → legacy fixed N scrolls via actions
878
- const scrollExtractRaw = options.scrollExtract;
879
- const isAutoScroll = scrollExtractRaw !== undefined && typeof scrollExtractRaw !== 'number';
880
- const scrollExtractCount = isAutoScroll
881
- ? 0
882
- : (scrollExtractRaw !== undefined ? scrollExtractRaw : 0);
883
- const useRender = options.render || options.stealth || (actions && actions.length > 0) || scrollExtractCount > 0 || isAutoScroll
884
- || (options.device && options.device !== 'desktop')
885
- || !!options.viewport
886
- || !!options.waitUntil
887
- || !!options.waitSelector
888
- || !!options.blockResources
889
- || !!options.screenshot // Auto-enable render for screenshot (needs browser)
890
- || false;
891
- // Inject scroll actions when --scroll-extract N (fixed count) is used
892
- if (scrollExtractCount > 0) {
893
- const scrollActions = [];
894
- for (let i = 0; i < scrollExtractCount; i++) {
895
- scrollActions.push({ type: 'scroll', to: 'bottom' });
896
- scrollActions.push({ type: 'wait', ms: 1500 });
897
- }
898
- actions = actions ? [...actions, ...scrollActions] : scrollActions;
899
- }
900
- const peelOptions = {
901
- render: useRender,
902
- stealth: options.stealth || false,
903
- wait: options.wait || 0,
904
- timeout: options.timeout,
905
- userAgent: options.ua,
906
- screenshot: options.screenshot !== undefined,
907
- screenshotFullPage: options.fullPage || false,
908
- selector: options.selector,
909
- exclude: options.exclude,
910
- includeTags,
911
- excludeTags,
912
- headers,
913
- cookies: options.cookie,
914
- raw: options.raw || false,
915
- lite: options.lite || false,
916
- actions,
917
- maxTokens: options.maxTokens,
918
- // Note: budget is applied AFTER caching (so cache stores full content)
919
- // We pass it to peel() for programmatic API compatibility, but the CLI
920
- // also applies it post-fetch (see below) to ensure cache stores full result.
921
- extract,
922
- images: options.images || false,
923
- location: locationOptions,
924
- profileDir: resolvedProfileDir,
925
- headed: options.headed || false,
926
- storageState: resolvedStorageState,
927
- proxy: options.proxy,
928
- proxies: options.proxies,
929
- fullPage: options.fullContent || false,
930
- readable: options.readable || false,
931
- // Smart auto-scroll (bare --scroll-extract flag)
932
- autoScroll: isAutoScroll
933
- ? { timeout: options.scrollExtractTimeout }
934
- : undefined,
935
- device: options.device,
936
- viewportWidth: options.viewport ? options.viewport.width : undefined,
937
- viewportHeight: options.viewport ? options.viewport.height : undefined,
938
- waitUntil: options.waitUntil,
939
- waitSelector: options.waitSelector,
940
- blockResources: options.blockResources ? options.blockResources.split(',').map((s) => s.trim()) : undefined,
941
- cloaked: options.cloaked ? true : undefined,
942
- cycle: options.cycle ? true : undefined,
943
- tls: (options.tls || options.cycle) ? true : undefined,
944
- };
945
- if (options.cloaked) {
946
- peelOptions.render = true; // CloakBrowser is a browser
947
- }
948
- // Add chunk option if requested
949
- if (options.chunk) {
950
- peelOptions.chunk = {
951
- maxTokens: options.chunkSize || 512,
952
- overlap: options.chunkOverlap || 50,
953
- strategy: options.chunkStrategy || 'section',
954
- };
955
- }
956
- // Add summary option if requested
957
- if (options.summary) {
958
- const llmApiKey = options.llmKey || process.env.OPENAI_API_KEY;
959
- if (!llmApiKey) {
960
- throw Object.assign(new Error('--summary requires --llm-key or OPENAI_API_KEY environment variable'), { _code: 'FETCH_FAILED' });
961
- }
962
- peelOptions.summary = true;
963
- peelOptions.llm = {
964
- apiKey: llmApiKey,
965
- model: process.env.WEBPEEL_LLM_MODEL || 'gpt-4o-mini',
966
- baseUrl: process.env.WEBPEEL_LLM_BASE_URL || 'https://api.openai.com/v1',
967
- };
968
- }
969
- // Determine format
970
- if (options.html) {
971
- peelOptions.format = 'html';
972
- }
973
- else if (options.text) {
974
- peelOptions.format = 'text';
975
- }
976
- else if (options.clean) {
977
- peelOptions.format = 'clean';
978
- // --clean implies readable mode (article content only, no navs/footers)
979
- peelOptions.readable = true;
980
- }
981
- else {
982
- peelOptions.format = 'markdown';
983
- }
984
- // Fetch the page — route through API if key is configured, otherwise require auth
985
- const fetchCfg = loadConfig();
986
- const fetchApiKey = fetchCfg.apiKey || process.env.WEBPEEL_API_KEY;
987
- const fetchApiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
988
- let result;
989
- if (fetchApiKey) {
990
- // Use the WebPeel API — no local Playwright needed
991
- result = await fetchViaApi(url, peelOptions, fetchApiKey, fetchApiUrl);
992
- }
993
- else {
994
- // No API key — show helpful message instead of trying local mode
995
- if (spinner)
996
- spinner.fail('Authentication required');
997
- console.error('No API key configured. Run: webpeel auth <your-key>');
998
- console.error('Get a free key at: https://app.webpeel.dev/keys');
999
- await cleanup();
1000
- process.exit(2);
1001
- }
1002
- // Update lastUsed timestamp for named profiles
1003
- if (resolvedProfileName) {
1004
- touchProfile(resolvedProfileName);
1005
- }
1006
- if (spinner) {
1007
- const domainTag = result.domainData
1008
- ? ` [${result.domainData.domain}:${result.domainData.type}]`
1009
- : '';
1010
- spinner.succeed(`Fetched in ${result.elapsed}ms using ${result.method} method${domainTag}`);
1011
- }
1012
- // Show metadata header
1013
- const pageTitle = result.metadata?.title || result.title;
1014
- if (!options.silent && !options.json && pageTitle) {
1015
- const parts = [];
1016
- if (result.metadata?.author)
1017
- parts.push(`by ${result.metadata.author}`);
1018
- if (result.readability?.readingTime)
1019
- parts.push(result.readability.readingTime);
1020
- if (result.tokens)
1021
- parts.push(`${result.tokens.toLocaleString()} tokens`);
1022
- const subtitle = parts.length ? ` · ${parts.join(' · ')}` : '';
1023
- console.error(`\x1b[36m📄 ${pageTitle}${subtitle}\x1b[0m`);
1024
- }
1025
- // Show usage footer for free/anonymous users
1026
- if (usageCheck.usageInfo && !options.silent) {
1027
- showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, useStealth);
1028
- }
1029
- // Handle screenshot saving
1030
- if (options.screenshot && result.screenshot) {
1031
- const screenshotPath = typeof options.screenshot === 'string'
1032
- ? options.screenshot
1033
- : 'screenshot.png';
1034
- const screenshotBuffer = Buffer.from(result.screenshot, 'base64');
1035
- writeFileSync(screenshotPath, screenshotBuffer);
1036
- if (!options.silent) {
1037
- console.error(`Screenshot saved to: ${screenshotPath}`);
1038
- }
1039
- // Remove screenshot from JSON output if saving to file
1040
- if (typeof options.screenshot === 'string') {
1041
- delete result.screenshot;
1042
- }
1043
- }
1044
- // Store full result in cache (before budget distillation so cache is reusable)
1045
- if (cacheTtlMs && !cacheDisabled) {
1046
- setCache(url, result, cacheTtlMs, {
1047
- render: options.render,
1048
- stealth: useStealth,
1049
- selector: options.selector,
1050
- format: peelOptions.format,
1051
- budget: null, // Budget excluded — cache stores full content, budget applied post-cache
1052
- readable: options.readable || false,
1053
- });
1054
- }
1055
- // Apply smart budget distillation AFTER caching (cache always stores full content)
1056
- // When --agent is set, always apply budget even with --extract-all (listings will be budgeted
1057
- // separately, but if no listings are found the content itself still needs trimming).
1058
- const skipBudgetForExtract = (options.extractAll || options.scrollExtract !== undefined) && !options.agent;
1059
- let contentTruncated = false;
1060
- if (options.budget && options.budget > 0 && !skipBudgetForExtract) {
1061
- const budgetFormat = peelOptions.format === 'text' ? 'text' : 'markdown';
1062
- const distilled = distillToBudget(result.content, options.budget, budgetFormat);
1063
- if (distilled !== result.content) {
1064
- contentTruncated = true;
1065
- result.content = distilled;
1066
- result.tokens = estimateTokens(distilled);
1067
- }
1068
- }
1069
- // --- BM25 Query-Focused Filtering ---
1070
- if (options.focus && result.content) {
1071
- const { filterByRelevance } = await import('./core/bm25-filter.js');
1072
- const focusResult = filterByRelevance(result.content, { query: options.focus });
1073
- result.content = focusResult.content;
1074
- result.tokens = estimateTokens(focusResult.content);
1075
- if (isJson) {
1076
- result.focusQuery = options.focus;
1077
- result.focusReduction = focusResult.reductionPercent;
1078
- }
1079
- }
1080
- // --- LLM-free Quick Answer ---
1081
- if (options.question && result.content) {
1082
- const { quickAnswer } = await import('./core/quick-answer.js');
1083
- const qa = quickAnswer({
1084
- question: options.question,
1085
- content: result.content,
1086
- url: result.url,
1087
- });
1088
- result.quickAnswer = qa;
1089
- if (!isJson) {
1090
- // Display answer prominently in human-readable mode
1091
- const conf = (qa.confidence * 100).toFixed(0);
1092
- await writeStdout(`\n\x1b[36m📋 ${qa.question}\x1b[0m\n\n`);
1093
- if (qa.answer) {
1094
- await writeStdout(`\x1b[32m💡 Answer (${conf}% confidence):\x1b[0m\n${qa.answer}\n`);
1095
- }
1096
- else {
1097
- await writeStdout(`\x1b[33m💡 No relevant answer found (${conf}% confidence)\x1b[0m\n`);
1098
- }
1099
- if (qa.passages && qa.passages.length > 1) {
1100
- await writeStdout(`\n\x1b[33m📝 Supporting evidence:\x1b[0m\n`);
1101
- for (const p of qa.passages.slice(1, 4)) {
1102
- await writeStdout(` • [${(p.score * 100).toFixed(0)}%] ${p.text.substring(0, 200)}${p.text.length > 200 ? '...' : ''}\n`);
1103
- }
1104
- }
1105
- await writeStdout('\n');
1106
- await cleanup();
1107
- process.exit(0);
1108
- }
1109
- }
1110
- // --- RAG Chunking output (chunks come from pipeline via peelOptions.chunk) ---
1111
- if (result.chunks && result.chunks.length > 0 && !isJson) {
1112
- console.log(`\n${'─'.repeat(60)}`);
1113
- console.log(`📦 ${result.chunks.length} chunks (${options.chunkStrategy || 'section'} strategy)\n`);
1114
- for (const chunk of result.chunks) {
1115
- const sectionLabel = chunk.section ? ` [${chunk.section}]` : '';
1116
- console.log(`── Chunk ${chunk.index + 1}${sectionLabel} (${chunk.tokenCount} tokens, ${chunk.wordCount} words) ──`);
1117
- console.log(chunk.text.substring(0, 200) + (chunk.text.length > 200 ? '...' : ''));
1118
- console.log('');
1119
- }
1120
- }
1121
- // --- #4: Content quality warning ---
1122
- const isHtmlContent = result.contentType ? result.contentType.toLowerCase().includes('html') : true;
1123
- const isRedirect = false; // peel() follows redirects — final result is always 200
1124
- if (result.tokens < 20 && !useRender && isHtmlContent && !isRedirect) {
1125
- const warningMsg = `Low content detected (${result.tokens} tokens). Try: webpeel ${url} --render`;
1126
- if (isJson) {
1127
- result.warning = warningMsg;
1128
- }
1129
- else {
1130
- console.error(`⚠ ${warningMsg}`);
1131
- }
1132
- }
1133
- // --- LLM-based extraction (post-peel) ---
1134
- if (options.llmExtract || options.extractSchema) {
1135
- const { extractWithLLM } = await import('./core/llm-extract.js');
1136
- const llmCfg = loadConfig();
1137
- const llmApiKey = options.llmKey || llmCfg.llm?.apiKey || process.env.OPENAI_API_KEY;
1138
- const llmModel = options.llmModel || llmCfg.llm?.model || process.env.WEBPEEL_LLM_MODEL || 'gpt-4o-mini';
1139
- const llmBaseUrl = options.llmBaseUrl || llmCfg.llm?.baseUrl || process.env.WEBPEEL_LLM_BASE_URL || 'https://api.openai.com/v1';
1140
- const llmInstruction = typeof options.llmExtract === 'string' ? options.llmExtract : undefined;
1141
- // Parse --extract-schema if provided
1142
- let llmSchema;
1143
- if (options.extractSchema) {
1144
- let schemaStr = options.extractSchema;
1145
- if (schemaStr.startsWith('@')) {
1146
- schemaStr = readFileSync(schemaStr.slice(1), 'utf-8');
1147
- }
1148
- try {
1149
- llmSchema = JSON.parse(schemaStr);
1150
- }
1151
- catch {
1152
- exitWithJsonError('--extract-schema must be valid JSON or a valid @file.json path', 'FETCH_FAILED');
1153
- }
1154
- }
1155
- const llmResult = await extractWithLLM({
1156
- content: result.content,
1157
- instruction: llmInstruction,
1158
- schema: llmSchema,
1159
- apiKey: llmApiKey,
1160
- model: llmModel,
1161
- baseUrl: llmBaseUrl,
1162
- });
1163
- // Output structured items as JSON
1164
- await writeStdout(JSON.stringify(llmResult.items, null, 2) + '\n');
1165
- // Show token usage and estimated cost
1166
- if (!options.silent) {
1167
- const { input, output } = llmResult.tokensUsed;
1168
- const costStr = llmResult.cost !== undefined
1169
- ? ` | Est. cost: $${llmResult.cost.toFixed(6)}`
1170
- : '';
1171
- console.error(`\n🤖 LLM extraction: ${llmResult.items.length} items | ${input} input + ${output} output tokens${costStr} | model: ${llmResult.model}`);
1172
- }
1173
- await cleanup();
1174
- process.exit(0);
1175
- }
1176
- // --- Extract-all / pagination / output formatting ---
1177
- const wantsExtractAll = options.extractAll || options.scrollExtract !== undefined;
1178
- const pagesCount = Math.min(Math.max(options.pages || 1, 1), 10);
1179
- if (wantsExtractAll) {
1180
- const { extractListings } = await import('./core/extract-listings.js');
1181
- const { findNextPageUrl } = await import('./core/paginate.js');
1182
- const { findSchemaForUrl, extractWithSchema, loadBundledSchemas } = await import('./core/schema-extraction.js');
1183
- // Resolve which schema to use (explicit --schema flag or auto-detect)
1184
- let activeSchema = null;
1185
- if (options.schema) {
1186
- // Find schema by name or domain match
1187
- const schemaQuery = options.schema.toLowerCase();
1188
- const allSchemas = loadBundledSchemas();
1189
- activeSchema = allSchemas.find(s => s.name.toLowerCase().includes(schemaQuery) ||
1190
- s.domains.some(d => d.toLowerCase().includes(schemaQuery))) ?? null;
1191
- if (!activeSchema && !options.silent) {
1192
- console.error(`Warning: No schema found for "${options.schema}", falling back to auto-detection`);
1193
- }
1194
- }
1195
- else {
1196
- // Auto-detect from URL
1197
- activeSchema = findSchemaForUrl(result.url || url);
1198
- }
1199
- // We need the raw HTML for extraction. Re-fetch with format=html if needed.
1200
- let allListings = [];
1201
- // Fetch HTML for extraction
1202
- const htmlResult = peelOptions.format === 'html'
1203
- ? result
1204
- : await peel(url, { ...peelOptions, format: 'html', maxTokens: undefined });
1205
- // Try schema extraction first, fall back to generic
1206
- if (activeSchema) {
1207
- const schemaListings = extractWithSchema(htmlResult.content, activeSchema, result.url);
1208
- if (schemaListings.length > 0) {
1209
- allListings.push(...schemaListings);
1210
- }
1211
- else {
1212
- // Schema returned nothing — fall back to generic
1213
- allListings.push(...extractListings(htmlResult.content, result.url));
1214
- }
1215
- }
1216
- else {
1217
- allListings.push(...extractListings(htmlResult.content, result.url));
1218
- }
1219
- // Pagination: follow "Next" links
1220
- if (pagesCount > 1) {
1221
- let currentHtml = htmlResult.content;
1222
- let currentUrl = result.url;
1223
- for (let page = 1; page < pagesCount; page++) {
1224
- const nextUrl = findNextPageUrl(currentHtml, currentUrl);
1225
- if (!nextUrl)
1226
- break;
1227
- try {
1228
- const nextResult = await peel(nextUrl, { ...peelOptions, format: 'html', maxTokens: undefined });
1229
- let pageListings;
1230
- if (activeSchema) {
1231
- const schemaPage = extractWithSchema(nextResult.content, activeSchema, nextResult.url);
1232
- pageListings = schemaPage.length > 0
1233
- ? schemaPage
1234
- : extractListings(nextResult.content, nextResult.url);
1235
- }
1236
- else {
1237
- pageListings = extractListings(nextResult.content, nextResult.url);
1238
- }
1239
- allListings.push(...pageListings);
1240
- currentHtml = nextResult.content;
1241
- currentUrl = nextResult.url;
1242
- }
1243
- catch {
1244
- break; // Stop paginating on error
1245
- }
1246
- }
1247
- }
1248
- // Apply budget to listings if requested
1249
- let listingsTruncated = false;
1250
- let totalAvailableListings;
1251
- if (options.budget && options.budget > 0 && allListings.length > 0) {
1252
- const { maxItems, truncated, totalAvailable } = budgetListings(allListings.length, options.budget);
1253
- if (truncated) {
1254
- listingsTruncated = true;
1255
- totalAvailableListings = totalAvailable;
1256
- allListings = allListings.slice(0, maxItems);
1257
- }
1258
- }
1259
- // Output based on format flags
1260
- if (options.csv) {
1261
- const csvOutput = formatListingsCsv(allListings);
1262
- await writeStdout(csvOutput);
1263
- }
1264
- else if (options.table) {
1265
- const { formatTable } = await import('./core/table-format.js');
1266
- const tableRows = allListings.map(item => {
1267
- const row = {};
1268
- for (const [k, v] of Object.entries(item)) {
1269
- if (v !== undefined)
1270
- row[k] = v;
1271
- }
1272
- return row;
1273
- });
1274
- await writeStdout(formatTable(tableRows) + '\n');
1275
- }
1276
- else if (isJson) {
1277
- // Use unified envelope for JSON output
1278
- const structured = allListings;
1279
- const envelope = buildEnvelope(result, {
1280
- cached: false,
1281
- structured,
1282
- truncated: listingsTruncated || undefined,
1283
- totalAvailable: totalAvailableListings,
1284
- });
1285
- // Also include legacy fields for backward compat
1286
- envelope.listings = allListings;
1287
- envelope.count = allListings.length;
1288
- await writeStdout(JSON.stringify(envelope, null, 2) + '\n');
1289
- }
1290
- else {
1291
- // Formatted text output
1292
- if (allListings.length === 0) {
1293
- await writeStdout('No listings found.\n');
1294
- }
1295
- else {
1296
- const truncNote = listingsTruncated && totalAvailableListings
1297
- ? ` (${totalAvailableListings} total — budget limited to ${allListings.length})`
1298
- : '';
1299
- await writeStdout(`Found ${allListings.length} listings${truncNote}:\n\n`);
1300
- allListings.forEach((item, i) => {
1301
- const pricePart = item.price ? ` — ${item.price}` : '';
1302
- const line = `${i + 1}. ${item.title}${pricePart}\n`;
1303
- process.stdout.write(line);
1304
- if (item.link) {
1305
- process.stdout.write(` ${item.link}\n`);
1306
- }
1307
- process.stdout.write('\n');
1308
- });
1309
- }
1310
- }
1311
- }
1312
- else if (options.csv || options.table) {
1313
- // CSV / table output for --extract (CSS selector extraction)
1314
- if (result.extracted) {
1315
- const rows = normaliseExtractedToRows(result.extracted);
1316
- if (options.csv) {
1317
- await writeStdout(formatListingsCsv(rows));
1318
- }
1319
- else {
1320
- const { formatTable } = await import('./core/table-format.js');
1321
- await writeStdout(formatTable(rows) + '\n');
1322
- }
1323
- }
1324
- else {
1325
- console.error('--csv / --table require --extract-all or --extract to produce structured data.');
1326
- }
1327
- }
1328
- else {
1329
- // --- BM25 Schema Template Extraction (no LLM needed) ---
1330
- if (options.schema && result.content) {
1331
- const { getSchemaTemplate: getSchTmpl } = await import('./core/schema-templates.js');
1332
- const schTemplate = getSchTmpl(options.schema);
1333
- if (schTemplate) {
1334
- const { quickAnswer: qa } = await import('./core/quick-answer.js');
1335
- const { smartExtractSchemaFields } = await import('./core/schema-postprocess.js');
1336
- const extracted = smartExtractSchemaFields(result.content, schTemplate.fields, qa, {
1337
- pageTitle: result.title,
1338
- pageUrl: result.url,
1339
- metadata: result.metadata,
1340
- });
1341
- result.extracted = extracted;
1342
- }
1343
- }
1344
- // Output results (default path)
1345
- await outputResult(result, options, {
1346
- cached: false,
1347
- truncated: contentTruncated || undefined,
1348
- });
1349
- }
1350
- // Clean up and exit
1351
- await cleanup();
1352
- process.exit(0);
1353
- }
1354
- catch (error) {
1355
- if (spinner) {
1356
- spinner.fail('Failed to fetch');
1357
- }
1358
- // --- #6: Consistent JSON error output ---
1359
- if (isJson) {
1360
- const errMsg = error instanceof Error ? error.message : 'Unknown error';
1361
- const errCode = classifyErrorCode(error);
1362
- await writeStdout(JSON.stringify({ success: false, error: { type: errCode.toLowerCase(), message: errMsg } }) + '\n');
1363
- await cleanup();
1364
- process.exit(1);
1365
- }
1366
- if (error instanceof Error) {
1367
- console.error('\n' + formatError(error, url || '', options));
1368
- }
1369
- else {
1370
- console.error('\x1b[31m✖ Unknown error occurred\x1b[0m');
1371
- }
1372
- await cleanup();
1373
- process.exit(1);
1374
- }
1375
- }
1376
- program
1377
- .action(async (url, options) => {
1378
- await runFetch(url, options);
1379
- });
1380
- // Read subcommand (explicit readable mode)
1381
- program
1382
- .command('read <url>')
1383
- .description('Read a page in clean reader mode (like browser Reader View)')
1384
- .option('--json', 'Output as JSON')
1385
- .option('-s, --silent', 'Silent mode')
1386
- .option('--budget <n>', 'Token budget (default: 4000)', parseInt)
1387
- .option('--focus <query>', 'Focus on content relevant to this query')
1388
- .action(async (url, opts) => {
1389
- await runFetch(url, {
1390
- ...opts,
1391
- readable: true,
1392
- budget: 4000,
1393
- });
1394
- });
1395
- // Ask subcommand (question mode)
1396
- program
1397
- .command('ask <url> <question>')
1398
- .description('Ask a question about any page')
1399
- .option('--json', 'Output as JSON')
1400
- .option('-s, --silent', 'Silent mode')
1401
- .action(async (url, question, opts) => {
1402
- await runFetch(url, {
1403
- ...opts,
1404
- question,
1405
- readable: true,
1406
- });
1407
- });
1408
- // Search command
1409
- program
1410
- .command('search <query>')
1411
- .description('Search the web (DuckDuckGo by default, or use --site for site-specific search)')
1412
- .option('-n, --count <n>', 'Number of results (1-10)', '5')
1413
- .option('--top <n>', 'Limit results (alias for --count)')
1414
- .option('--provider <provider>', 'Search provider: duckduckgo (default) or brave')
1415
- .option('--search-api-key <key>', 'API key for the search provider (or env WEBPEEL_BRAVE_API_KEY)')
1416
- .option('--site <site>', 'Search a specific site (e.g. ebay, amazon, github). Run "webpeel sites" for full list.')
1417
- .option('--json', 'Output as JSON')
1418
- .option('--urls-only', 'Output only URLs, one per line (pipe-friendly)')
1419
- .option('--table', 'Output site-search results as a formatted table (requires --site)')
1420
- .option('--csv', 'Output site-search results as CSV (requires --site)')
1421
- .option('--budget <n>', 'Token budget for site-search result content', parseInt)
1422
- .option('-s, --silent', 'Silent mode')
1423
- .option('--proxy <url>', 'Proxy URL for requests (http://host:port, socks5://user:pass@host:port)')
1424
- .option('--agent', 'Agent mode: sets --json, --silent, and --budget 4000 (override with --budget N)')
1425
- .action(async (query, options) => {
1426
- // --agent sets sensible defaults for AI agents; explicit flags override
1427
- if (options.agent) {
1428
- if (!options.json)
1429
- options.json = true;
1430
- if (!options.silent)
1431
- options.silent = true;
1432
- if (options.budget === undefined)
1433
- options.budget = 4000;
1434
- }
1435
- const isJson = options.json;
1436
- const isSilent = options.silent;
1437
- // --top overrides --count when both are provided
1438
- const count = parseInt(options.top ?? options.count) || 5;
1439
- // Check usage quota
1440
- const usageCheck = await checkUsage();
1441
- if (!usageCheck.allowed) {
1442
- console.error(usageCheck.message);
1443
- process.exit(1);
1444
- }
1445
- // ── --site: site-specific structured search ───────────────────────────
1446
- if (options.site) {
1447
- const spinner = isSilent ? null : ora(`Searching ${options.site}...`).start();
1448
- try {
1449
- const { buildSiteSearchUrl } = await import('./core/site-search.js');
1450
- const siteResult = buildSiteSearchUrl(options.site, query);
1451
- // Fetch the raw HTML (needed for listing extraction)
1452
- const htmlResult = await peel(siteResult.url, {
1453
- format: 'html',
1454
- timeout: 30000,
1455
- proxy: options.proxy,
1456
- });
1457
- if (spinner) {
1458
- spinner.succeed(`Fetched ${siteResult.site} in ${htmlResult.elapsed}ms`);
1459
- }
1460
- // Extract listings from the HTML
1461
- const { extractListings } = await import('./core/extract-listings.js');
1462
- let listings = extractListings(htmlResult.content, siteResult.url);
1463
- // Apply budget if requested
1464
- if (options.budget && options.budget > 0 && listings.length > 0) {
1465
- const { budgetListings } = await import('./core/budget.js');
1466
- const { maxItems } = budgetListings(listings.length, options.budget);
1467
- listings = listings.slice(0, maxItems);
1468
- }
1469
- // Show usage footer
1470
- if (usageCheck.usageInfo && !isSilent) {
1471
- showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, false);
1472
- }
1473
- // Output
1474
- if (options.csv) {
1475
- const rows = listings.map(item => {
1476
- const row = {};
1477
- for (const [k, v] of Object.entries(item)) {
1478
- if (v !== undefined)
1479
- row[k] = v;
1480
- }
1481
- return row;
1482
- });
1483
- await writeStdout(formatListingsCsv(rows));
1484
- }
1485
- else if (options.table) {
1486
- const { formatTable } = await import('./core/table-format.js');
1487
- const rows = listings.map(item => {
1488
- const row = {};
1489
- for (const [k, v] of Object.entries(item)) {
1490
- if (v !== undefined)
1491
- row[k] = v;
1492
- }
1493
- return row;
1494
- });
1495
- await writeStdout(formatTable(rows) + '\n');
1496
- }
1497
- else if (isJson) {
1498
- const envelope = {
1499
- site: siteResult.site,
1500
- query: siteResult.query,
1501
- url: siteResult.url,
1502
- count: listings.length,
1503
- items: listings,
1504
- elapsed: htmlResult.elapsed,
1505
- };
1506
- await writeStdout(JSON.stringify(envelope, null, 2) + '\n');
1507
- }
1508
- else {
1509
- if (listings.length === 0) {
1510
- await writeStdout('No listings found.\n');
1511
- }
1512
- else {
1513
- await writeStdout(`Found ${listings.length} listings on ${siteResult.site}:\n\n`);
1514
- for (const [i, item] of listings.entries()) {
1515
- const pricePart = item.price ? ` — ${item.price}` : '';
1516
- process.stdout.write(`${i + 1}. ${item.title}${pricePart}\n`);
1517
- if (item.link)
1518
- process.stdout.write(` ${item.link}\n`);
1519
- process.stdout.write('\n');
1520
- }
1521
- }
1522
- }
1523
- await cleanup();
1524
- process.exit(0);
1525
- }
1526
- catch (error) {
1527
- if (spinner)
1528
- spinner.fail('Site search failed');
1529
- if (error instanceof Error) {
1530
- console.error(`\nError: ${error.message}`);
1531
- }
1532
- else {
1533
- console.error('\nError: Unknown error occurred');
1534
- }
1535
- await cleanup();
1536
- process.exit(1);
1537
- }
1538
- }
1539
- const spinner = isSilent ? null : ora('Searching...').start();
1540
- try {
1541
- // Route search through the WebPeel API when a key is configured
1542
- const searchCfg = loadConfig();
1543
- const searchApiKey = searchCfg.apiKey || process.env.WEBPEEL_API_KEY;
1544
- const searchApiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
1545
- if (!searchApiKey) {
1546
- if (spinner)
1547
- spinner.fail('Authentication required');
1548
- console.error('No API key configured. Run: webpeel auth <your-key>');
1549
- console.error('Get a free key at: https://app.webpeel.dev/keys');
1550
- process.exit(2);
1551
- }
1552
- const searchParams = new URLSearchParams({ q: query });
1553
- searchParams.set('limit', String(Math.min(Math.max(count, 1), 10)));
1554
- if (options.budget)
1555
- searchParams.set('budget', String(options.budget));
1556
- const searchRes = await fetch(`${searchApiUrl}/v1/search?${searchParams}`, {
1557
- headers: { Authorization: `Bearer ${searchApiKey}` },
1558
- signal: AbortSignal.timeout(30000),
1559
- });
1560
- if (searchRes.status === 401) {
1561
- if (spinner)
1562
- spinner.fail('Authentication failed');
1563
- console.error('API key invalid or expired. Run: webpeel auth <new-key>');
1564
- process.exit(1);
1565
- }
1566
- if (searchRes.status === 429) {
1567
- if (spinner)
1568
- spinner.fail('Rate limited');
1569
- console.error('Rate limit exceeded. Check your plan at https://app.webpeel.dev/billing');
1570
- process.exit(1);
1571
- }
1572
- if (!searchRes.ok) {
1573
- const body = await searchRes.text().catch(() => '');
1574
- throw new Error(`Search API error ${searchRes.status}: ${body.slice(0, 200)}`);
1575
- }
1576
- const searchData = await searchRes.json();
1577
- // API returns { success: true, data: { web: [...] } } or { results: [...] }
1578
- let results = searchData.data?.web || searchData.data?.results || searchData.results || [];
1579
- if (spinner) {
1580
- spinner.succeed(`Found ${results.length} results`);
1581
- }
1582
- // Show usage footer for free/anonymous users
1583
- if (usageCheck.usageInfo && !isSilent) {
1584
- showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, false);
1585
- }
1586
- if (options.urlsOnly) {
1587
- // Pipe-friendly: one URL per line
1588
- for (const result of results) {
1589
- await writeStdout(result.url + '\n');
1590
- }
1591
- }
1592
- else if (isJson) {
1593
- const jsonStr = JSON.stringify({ query, results, count: results.length }, null, 2);
1594
- await writeStdout(jsonStr + '\n');
1595
- }
1596
- else {
1597
- for (const result of results) {
1598
- console.log(`\n${result.title}`);
1599
- console.log(result.url);
1600
- console.log(result.snippet);
1601
- }
1602
- }
1603
- process.exit(0);
1604
- }
1605
- catch (error) {
1606
- if (spinner) {
1607
- spinner.fail('Search failed');
1608
- }
1609
- if (error instanceof Error) {
1610
- console.error(`\nError: ${error.message}`);
1611
- const msg = error.message.toLowerCase();
1612
- if (msg.includes('brave') && msg.includes('api key')) {
1613
- console.error('\n💡 Hint: Set your Brave API key: webpeel config set braveApiKey YOUR_KEY');
1614
- console.error(' Or use free DuckDuckGo search (default, no key needed).');
1615
- }
1616
- else if (msg.includes('timeout') || msg.includes('timed out')) {
1617
- console.error('\n💡 Hint: Search timed out. Try a more specific query or try again.');
1618
- }
1619
- }
1620
- else {
1621
- console.error('\nError: Unknown error occurred');
1622
- }
1623
- process.exit(1);
1624
- }
1625
- });
1626
- // Sites command — list all supported site templates
1627
- program
1628
- .command('sites')
1629
- .description('List all sites supported by "webpeel search --site <site>"')
1630
- .option('--json', 'Output as JSON')
1631
- .option('--category <cat>', 'Filter by category (shopping, social, tech, jobs, general, real-estate, food)')
1632
- .action(async (options) => {
1633
- const { listSites } = await import('./core/site-search.js');
1634
- let sites = listSites();
1635
- if (options.category) {
1636
- sites = sites.filter(s => s.category === options.category);
1637
- }
1638
- if (options.json) {
1639
- await writeStdout(JSON.stringify(sites, null, 2) + '\n');
1640
- process.exit(0);
1641
- }
1642
- // Group by category for pretty output
1643
- const byCategory = new Map();
1644
- for (const site of sites) {
1645
- if (!byCategory.has(site.category))
1646
- byCategory.set(site.category, []);
1647
- byCategory.get(site.category).push(site);
1648
- }
1649
- const categoryOrder = ['shopping', 'general', 'social', 'tech', 'jobs', 'real-estate', 'food'];
1650
- const sortedCategories = categoryOrder.filter(c => byCategory.has(c));
1651
- console.log('\nWebPeel Site-Aware Search — supported sites\n');
1652
- console.log('Usage: webpeel search --site <id> "<query>"\n');
1653
- for (const cat of sortedCategories) {
1654
- const catSites = byCategory.get(cat);
1655
- const label = cat.charAt(0).toUpperCase() + cat.slice(1);
1656
- console.log(` ${label}:`);
1657
- for (const s of catSites) {
1658
- console.log(` ${s.id.padEnd(16)} ${s.name}`);
1659
- }
1660
- console.log('');
1661
- }
1662
- process.exit(0);
1663
- });
1664
- // Batch command
1665
- program
1666
- .command('batch [file]')
1667
- .description('Fetch multiple URLs from file or stdin pipe')
1668
- .option('-c, --concurrency <n>', 'Max concurrent fetches (default: 3)', '3')
1669
- .option('-o, --output <dir>', 'Output directory (one file per URL)')
1670
- .option('--json', 'Output as JSON array')
1671
- .option('-s, --silent', 'Silent mode')
1672
- .option('-r, --render', 'Use headless browser')
1673
- .option('--selector <css>', 'CSS selector to extract')
1674
- .action(async (file, options) => {
1675
- const isJson = options.json;
1676
- const isSilent = options.silent;
1677
- const shouldRender = options.render;
1678
- const selector = options.selector;
1679
- // Check usage quota
1680
- const usageCheck = await checkUsage();
1681
- if (!usageCheck.allowed) {
1682
- console.error(usageCheck.message);
1683
- process.exit(1);
1684
- }
1685
- const spinner = isSilent ? null : ora('Loading URLs...').start();
1686
- try {
1687
- // Read URLs from file or stdin
1688
- let urls;
1689
- if (file) {
1690
- // Read from file
1691
- try {
1692
- const content = readFileSync(file, 'utf-8');
1693
- urls = content.split('\n')
1694
- .map(line => line.trim())
1695
- .filter(line => line && !line.startsWith('#'));
1696
- }
1697
- catch (error) {
1698
- throw new Error(`Failed to read file: ${file}`);
1699
- }
1700
- }
1701
- else if (!process.stdin.isTTY) {
1702
- // Read from stdin pipe
1703
- const chunks = [];
1704
- for await (const chunk of process.stdin) {
1705
- chunks.push(chunk);
1706
- }
1707
- const content = Buffer.concat(chunks).toString('utf-8');
1708
- urls = content.split('\n')
1709
- .map(line => line.trim())
1710
- .filter(line => line && !line.startsWith('#'));
1711
- }
1712
- else {
1713
- throw new Error('Provide a file path or pipe URLs via stdin.\n Example: cat urls.txt | webpeel batch');
1714
- }
1715
- if (urls.length === 0) {
1716
- throw new Error('No URLs found in file');
1717
- }
1718
- if (spinner) {
1719
- spinner.text = `Fetching ${urls.length} URLs (concurrency: ${options.concurrency})...`;
1720
- }
1721
- // Batch fetch
1722
- const results = await peelBatch(urls, {
1723
- concurrency: parseInt(options.concurrency) || 3,
1724
- render: shouldRender,
1725
- selector: selector,
1726
- });
1727
- if (spinner) {
1728
- const successCount = results.filter(r => 'content' in r).length;
1729
- spinner.succeed(`Completed: ${successCount}/${urls.length} successful`);
1730
- }
1731
- // Show usage footer for free/anonymous users
1732
- if (usageCheck.usageInfo && !isSilent) {
1733
- showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, false);
1734
- }
1735
- // Output results
1736
- if (isJson) {
1737
- const jsonStr = JSON.stringify(results, null, 2);
1738
- await new Promise((resolve, reject) => {
1739
- process.stdout.write(jsonStr + '\n', (err) => {
1740
- if (err)
1741
- reject(err);
1742
- else
1743
- resolve();
1744
- });
1745
- });
1746
- }
1747
- else if (options.output) {
1748
- const { writeFileSync, mkdirSync } = await import('fs');
1749
- const { join } = await import('path');
1750
- // Create output directory
1751
- mkdirSync(options.output, { recursive: true });
1752
- results.forEach((result, i) => {
1753
- const urlObj = new URL(urls[i]);
1754
- const filename = `${i + 1}_${urlObj.hostname.replace(/[^a-z0-9]/gi, '_')}.md`;
1755
- const filepath = join(options.output, filename);
1756
- if ('content' in result) {
1757
- writeFileSync(filepath, result.content);
1758
- }
1759
- else {
1760
- writeFileSync(filepath, `Error: ${result.error}`);
1761
- }
1762
- });
1763
- if (!isSilent) {
1764
- console.log(`\nResults saved to: ${options.output}`);
1765
- }
1766
- }
1767
- else {
1768
- // Print results to stdout
1769
- results.forEach((result, i) => {
1770
- console.log(`\n=== ${urls[i]} ===\n`);
1771
- if ('content' in result) {
1772
- console.log(result.content.slice(0, 500) + '...');
1773
- }
1774
- else {
1775
- console.log(`Error: ${result.error}`);
1776
- }
1777
- });
1778
- }
1779
- await cleanup();
1780
- process.exit(0);
1781
- }
1782
- catch (error) {
1783
- if (spinner) {
1784
- spinner.fail('Batch fetch failed');
1785
- }
1786
- if (error instanceof Error) {
1787
- console.error(`\nError: ${error.message}`);
1788
- }
1789
- else {
1790
- console.error('\nError: Unknown error occurred');
1791
- }
1792
- await cleanup();
1793
- process.exit(1);
1794
- }
1795
- });
1796
- program
1797
- .command('crawl <url>')
1798
- .description('Crawl a website starting from a URL')
1799
- .option('--max-pages <number>', 'Maximum number of pages to crawl (default: 10, max: 100)', (v) => parseInt(v, 10), 10)
1800
- .option('--max-depth <number>', 'Maximum depth to crawl (default: 2, max: 5)', (v) => parseInt(v, 10), 2)
1801
- .option('--allowed-domains <domains...>', 'Only crawl these domains (default: same as starting URL)')
1802
- .option('--exclude <patterns...>', 'Exclude URLs matching these regex patterns')
1803
- .option('--ignore-robots', 'Ignore robots.txt (default: respect robots.txt)')
1804
- .option('--rate-limit <ms>', 'Rate limit between requests in ms (default: 1000)', (v) => parseInt(v, 10), 1000)
1805
- .option('-r, --render', 'Use headless browser for all pages')
1806
- .option('--stealth', 'Use stealth mode for all pages')
1807
- .option('-s, --silent', 'Silent mode (no spinner)')
1808
- .option('--json', 'Output as JSON')
1809
- .option('--resume', 'Resume an interrupted crawl from its last checkpoint')
1810
- .action(async (url, options) => {
1811
- // Check usage quota
1812
- const usageCheck = await checkUsage();
1813
- if (!usageCheck.allowed) {
1814
- console.error(usageCheck.message);
1815
- process.exit(1);
1816
- }
1817
- const { crawl } = await import('./core/crawler.js');
1818
- const spinner = options.silent ? null : ora('Crawling...').start();
1819
- try {
1820
- const results = await crawl(url, {
1821
- maxPages: options.maxPages,
1822
- maxDepth: options.maxDepth,
1823
- allowedDomains: options.allowedDomains,
1824
- excludePatterns: options.exclude,
1825
- respectRobotsTxt: !options.ignoreRobots,
1826
- rateLimitMs: options.rateLimit,
1827
- render: options.render || false,
1828
- stealth: options.stealth || false,
1829
- resume: options.resume || false,
1830
- });
1831
- if (spinner) {
1832
- spinner.succeed(`Crawled ${results.length} pages`);
1833
- }
1834
- // Show usage footer for free/anonymous users
1835
- if (usageCheck.usageInfo && !options.silent) {
1836
- showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, options.stealth || false);
1837
- }
1838
- if (options.json) {
1839
- console.log(JSON.stringify({ pages: results, count: results.length }, null, 2));
1840
- }
1841
- else {
1842
- results.forEach((result, i) => {
1843
- console.log(`\n${'='.repeat(60)}`);
1844
- console.log(`[${i + 1}/${results.length}] ${result.title}`);
1845
- console.log(`URL: ${result.url}`);
1846
- console.log(`Depth: ${result.depth}${result.parent ? ` (from: ${result.parent})` : ''}`);
1847
- console.log(`Links found: ${result.links.length}`);
1848
- console.log(`Elapsed: ${result.elapsed}ms`);
1849
- if (result.error) {
1850
- console.log(`ERROR: ${result.error}`);
1851
- }
1852
- else {
1853
- console.log(`\n${result.markdown.slice(0, 500)}${result.markdown.length > 500 ? '...' : ''}`);
1854
- }
1855
- });
1856
- }
1857
- await cleanup();
1858
- process.exit(0);
1859
- }
1860
- catch (error) {
1861
- if (spinner) {
1862
- spinner.fail('Crawl failed');
1863
- }
1864
- if (error instanceof Error) {
1865
- console.error(`\nError: ${error.message}`);
1866
- }
1867
- else {
1868
- console.error('\nError: Unknown error occurred');
1869
- }
1870
- await cleanup();
1871
- process.exit(1);
1872
- }
1873
- });
1874
- program
1875
- .command('map <url>')
1876
- .description('Discover all URLs on a domain (sitemap + crawl)')
1877
- .option('--no-sitemap', 'Skip sitemap.xml discovery')
1878
- .option('--no-crawl', 'Skip homepage crawl')
1879
- .option('--max <n>', 'Maximum URLs to discover (default: 5000)', (v) => parseInt(v, 10), 5000)
1880
- .option('--include <patterns...>', 'Include only URLs matching these regex patterns')
1881
- .option('--exclude <patterns...>', 'Exclude URLs matching these regex patterns')
1882
- .option('--json', 'Output as JSON')
1883
- .option('-s, --silent', 'Silent mode')
1884
- .action(async (url, options) => {
1885
- const { mapDomain } = await import('./core/map.js');
1886
- const spinner = options.silent ? null : ora('Discovering URLs...').start();
1887
- try {
1888
- const result = await mapDomain(url, {
1889
- useSitemap: options.sitemap !== false,
1890
- crawlHomepage: options.crawl !== false,
1891
- maxUrls: options.max,
1892
- includePatterns: options.include,
1893
- excludePatterns: options.exclude,
1894
- });
1895
- if (spinner)
1896
- spinner.succeed(`Found ${result.total} URLs in ${result.elapsed}ms`);
1897
- if (options.json) {
1898
- console.log(JSON.stringify(result, null, 2));
1899
- }
1900
- else {
1901
- for (const url of result.urls) {
1902
- console.log(url);
1903
- }
1904
- if (!options.silent) {
1905
- console.error(`\nTotal: ${result.total} URLs`);
1906
- if (result.sitemapUrls.length > 0) {
1907
- console.error(`Sitemaps used: ${result.sitemapUrls.join(', ')}`);
1908
- }
1909
- }
1910
- }
1911
- process.exit(0);
1912
- }
1913
- catch (error) {
1914
- if (spinner)
1915
- spinner.fail('URL discovery failed');
1916
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
1917
- process.exit(1);
1918
- }
1919
- });
1920
- // Watch command - monitor a URL for changes / assertion failures
1921
- program
1922
- .command('watch <url>')
1923
- .description('Monitor a URL for changes and assertion failures')
1924
- .option('--interval <duration>', 'Check interval (e.g. 30s, 5m, 1h)', '5m')
1925
- .option('--assert <condition...>', 'Assertion(s) to check (e.g. "status=200" "body.health=ok")')
1926
- .option('--webhook <url>', 'POST this URL on assertion failure or content change')
1927
- .option('-t, --timeout <ms>', 'Per-request timeout in ms', (v) => parseInt(v, 10), 10000)
1928
- .option('--max-checks <n>', 'Stop after N checks (default: unlimited)', (v) => parseInt(v, 10))
1929
- .option('--json', 'Output each check as NDJSON to stdout')
1930
- .option('-s, --silent', 'Only output on failures/changes')
1931
- .option('-r, --render', 'Use browser rendering for checks')
1932
- .action(async (url, options) => {
1933
- const { watch: runWatch, parseDuration, parseAssertion } = await import('./core/watch.js');
1934
- // Validate URL
1935
- try {
1936
- const parsed = new URL(url);
1937
- if (!['http:', 'https:'].includes(parsed.protocol)) {
1938
- console.error('Error: Only HTTP and HTTPS protocols are allowed');
1939
- process.exit(1);
1940
- }
1941
- }
1942
- catch {
1943
- console.error(`Error: Invalid URL format: ${url}`);
1944
- process.exit(1);
1945
- }
1946
- // Parse interval
1947
- let intervalMs;
1948
- try {
1949
- intervalMs = parseDuration(options.interval);
1950
- }
1951
- catch (e) {
1952
- console.error(`Error: ${e.message}`);
1953
- process.exit(1);
1954
- }
1955
- // Parse assertions
1956
- const assertions = [];
1957
- if (options.assert && Array.isArray(options.assert)) {
1958
- for (const expr of options.assert) {
1959
- try {
1960
- assertions.push(parseAssertion(expr));
1961
- }
1962
- catch (e) {
1963
- console.error(`Error: ${e.message}`);
1964
- process.exit(1);
1965
- }
1966
- }
1967
- }
1968
- if (!options.json && !options.silent) {
1969
- const intervalLabel = options.interval;
1970
- const assertLabel = assertions.length > 0
1971
- ? ` with ${assertions.length} assertion(s)`
1972
- : '';
1973
- process.stderr.write(`Watching ${url} every ${intervalLabel}${assertLabel}. Press Ctrl+C to stop.\n`);
1974
- }
1975
- const watchOptions = {
1976
- url,
1977
- intervalMs,
1978
- assertions,
1979
- webhookUrl: options.webhook,
1980
- timeout: options.timeout,
1981
- maxChecks: options.maxChecks,
1982
- render: options.render || false,
1983
- json: options.json || false,
1984
- silent: options.silent || false,
1985
- };
1986
- try {
1987
- await runWatch(watchOptions);
1988
- }
1989
- catch (error) {
1990
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
1991
- process.exit(1);
1992
- }
1993
- process.exit(0);
1994
- });
1995
- // Diff command - semantic diff against last snapshot
1996
- program
1997
- .command('diff <url>')
1998
- .description('Show semantic diff between current content and the last tracked snapshot')
1999
- .option('--last', 'Compare against last tracked snapshot (default)')
2000
- .option('--against <snapshot-url>', 'Compare against the snapshot stored for a different URL')
2001
- .option('--fields <fields>', 'For JSON responses: only diff these fields (comma-separated dot-notation)')
2002
- .option('--json', 'Output diff as JSON')
2003
- .option('-r, --render', 'Use browser rendering')
2004
- .option('-t, --timeout <ms>', 'Request timeout in ms', (v) => parseInt(v, 10), 30000)
2005
- .option('-s, --silent', 'Silent mode (no spinner)')
2006
- .action(async (url, options) => {
2007
- const isJson = options.json;
2008
- // Validate URL
2009
- try {
2010
- const parsed = new URL(url);
2011
- if (!['http:', 'https:'].includes(parsed.protocol)) {
2012
- if (isJson) {
2013
- await writeStdout(JSON.stringify({ success: false, error: { type: 'invalid_url', message: 'Only HTTP and HTTPS protocols are allowed' } }) + '\n');
2014
- }
2015
- else {
2016
- console.error('Error: Only HTTP and HTTPS protocols are allowed');
2017
- }
2018
- process.exit(1);
2019
- }
2020
- }
2021
- catch {
2022
- if (isJson) {
2023
- await writeStdout(JSON.stringify({ success: false, error: { type: 'invalid_url', message: `Invalid URL format: ${url}` } }) + '\n');
2024
- }
2025
- else {
2026
- console.error(`Error: Invalid URL format: ${url}`);
2027
- }
2028
- process.exit(1);
2029
- }
2030
- const spinner = options.silent ? null : ora('Fetching and diffing...').start();
2031
- try {
2032
- const { diffUrl } = await import('./core/diff.js');
2033
- const fields = options.fields
2034
- ? options.fields.split(',').map((f) => f.trim()).filter(Boolean)
2035
- : undefined;
2036
- const result = await diffUrl(url, {
2037
- render: options.render || false,
2038
- timeout: options.timeout,
2039
- fields,
2040
- });
2041
- if (spinner) {
2042
- spinner.succeed(`Diff completed in ${result.changed ? 'CHANGED' : 'no change'}`);
2043
- }
2044
- if (isJson) {
2045
- await writeStdout(JSON.stringify(result, null, 2) + '\n');
2046
- }
2047
- else {
2048
- // Human-readable output
2049
- const ago = result.previousTimestamp
2050
- ? formatRelativeTime(new Date(result.previousTimestamp))
2051
- : 'unknown';
2052
- console.log(`\nComparing ${result.url} (now vs ${ago})\n`);
2053
- if (!result.changed) {
2054
- console.log(' No changes detected.');
2055
- }
2056
- else {
2057
- for (const change of result.changes) {
2058
- const label = change.field ?? change.path ?? '(unknown)';
2059
- if (change.type === 'modified') {
2060
- console.log(` Modified: ${label} ${change.before} → ${change.after}`);
2061
- }
2062
- else if (change.type === 'added') {
2063
- console.log(` Added: ${label} ${change.after}`);
2064
- }
2065
- else if (change.type === 'removed') {
2066
- console.log(` Removed: ${label} ${change.before}`);
2067
- }
2068
- }
2069
- }
2070
- console.log(`\nSummary: ${result.summary}`);
2071
- }
2072
- await cleanup();
2073
- process.exit(0);
2074
- }
2075
- catch (error) {
2076
- if (spinner)
2077
- spinner.fail('Diff failed');
2078
- if (isJson) {
2079
- await writeStdout(JSON.stringify({
2080
- error: error instanceof Error ? error.message : 'Unknown error',
2081
- code: 'FETCH_FAILED',
2082
- }) + '\n');
2083
- }
2084
- else {
2085
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
2086
- }
2087
- await cleanup();
2088
- process.exit(1);
2089
- }
2090
- });
2091
- // auth command — set and verify API key in one step
2092
- program
2093
- .command('auth [key]')
2094
- .description('Set and verify your WebPeel API key')
2095
- .option('--json', 'Output as JSON')
2096
- .action(async (key, opts) => {
2097
- const config = loadConfig();
2098
- // If no key provided, show current auth status (or error if not set)
2099
- if (!key) {
2100
- const currentKey = config.apiKey;
2101
- if (!currentKey) {
2102
- if (opts.json) {
2103
- console.log(JSON.stringify({ authenticated: false, error: 'No API key set. Run: webpeel auth <key>' }));
2104
- }
2105
- else {
2106
- console.error('No API key set. Run: webpeel auth <your-key>');
2107
- console.error('Get a free key at: https://app.webpeel.dev/keys');
2108
- }
2109
- process.exit(2);
2110
- }
2111
- // Fall through to verify current key
2112
- key = currentKey;
2113
- }
2114
- // Save the key first
2115
- config.apiKey = key;
2116
- saveConfig(config);
2117
- // Verify by calling the API
2118
- const apiUrl = (process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev');
2119
- try {
2120
- const res = await fetch(`${apiUrl}/v1/usage`, {
2121
- headers: { Authorization: `Bearer ${key}` },
2122
- signal: AbortSignal.timeout(8000),
2123
- });
2124
- if (res.status === 401) {
2125
- if (opts.json) {
2126
- console.log(JSON.stringify({ authenticated: false, error: 'Invalid API key' }));
2127
- }
2128
- else {
2129
- console.error('❌ Invalid API key. Get a valid key at: https://app.webpeel.dev/keys');
2130
- }
2131
- // Revert the key save
2132
- config.apiKey = undefined;
2133
- saveConfig(config);
2134
- process.exit(2);
2135
- }
2136
- if (res.ok) {
2137
- const data = await res.json();
2138
- const plan = data.tier || (typeof data.plan === 'string' ? data.plan : data.plan?.tier) || 'free';
2139
- const used = data.used ?? data.totalRequests ?? data.weekly?.used ?? 0;
2140
- const limit = data.limit ?? data.weeklyLimit ?? data.weekly?.limit ?? 500;
2141
- const remaining = limit - used;
2142
- if (opts.json) {
2143
- console.log(JSON.stringify({
2144
- authenticated: true,
2145
- plan,
2146
- used,
2147
- limit,
2148
- remaining,
2149
- keyPrefix: key.slice(0, 12) + '...',
2150
- }));
2151
- }
2152
- else {
2153
- console.log(`✅ API key verified`);
2154
- console.log(` Plan: ${plan}`);
2155
- console.log(` Usage: ${used} / ${limit} this week (${remaining} remaining)`);
2156
- console.log(` Key: ${key.slice(0, 12)}...`);
2157
- }
2158
- process.exit(0);
2159
- }
2160
- // Non-200 non-401 — still save key but warn
2161
- if (opts.json) {
2162
- console.log(JSON.stringify({ authenticated: 'unknown', warning: `API returned ${res.status}` }));
2163
- }
2164
- else {
2165
- console.log(`⚠️ Key saved but couldn't verify (API returned ${res.status})`);
2166
- }
2167
- }
2168
- catch (e) {
2169
- if (opts.json) {
2170
- console.log(JSON.stringify({ authenticated: 'unknown', warning: 'Network error', error: e.message }));
2171
- }
2172
- else {
2173
- console.log(`⚠️ Key saved but couldn't verify (network error: ${e.message})`);
2174
- }
2175
- }
2176
- });
2177
- // status command — check auth status and API health
2178
- program
2179
- .command('status')
2180
- .description('Check authentication status and API usage')
2181
- .option('--json', 'Output as JSON')
2182
- .action(async (opts) => {
2183
- const config = loadConfig();
2184
- const key = config.apiKey;
2185
- if (!key) {
2186
- if (opts.json) {
2187
- console.log(JSON.stringify({ authenticated: false, error: 'No API key configured' }));
2188
- }
2189
- else {
2190
- console.error('Not authenticated. Run: webpeel auth <your-key>');
2191
- console.error('Get a free key at: https://app.webpeel.dev/keys');
2192
- }
2193
- process.exit(2);
2194
- }
2195
- const apiUrl = (process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev');
2196
- try {
2197
- const [healthRes, usageRes] = await Promise.all([
2198
- fetch(`${apiUrl}/health`, { signal: AbortSignal.timeout(5000) }).catch(() => null),
2199
- fetch(`${apiUrl}/v1/usage`, {
2200
- headers: { Authorization: `Bearer ${key}` },
2201
- signal: AbortSignal.timeout(8000),
2202
- }),
2203
- ]);
2204
- const apiOnline = healthRes?.ok ?? false;
2205
- if (usageRes.status === 401) {
2206
- if (opts.json) {
2207
- console.log(JSON.stringify({ authenticated: false, apiOnline, error: 'API key is invalid or expired' }));
2208
- }
2209
- else {
2210
- console.error('❌ API key is invalid. Run: webpeel auth <new-key>');
2211
- }
2212
- process.exit(2);
2213
- }
2214
- const usage = usageRes.ok ? await usageRes.json() : null;
2215
- const plan = usage?.tier || (typeof usage?.plan === 'string' ? usage?.plan : usage?.plan?.tier) || 'free';
2216
- const used = usage?.used ?? usage?.totalRequests ?? usage?.weekly?.used ?? 0;
2217
- const limit = usage?.limit ?? usage?.weeklyLimit ?? usage?.weekly?.limit ?? 500;
2218
- const remaining = limit - used;
2219
- if (opts.json) {
2220
- console.log(JSON.stringify({
2221
- authenticated: true,
2222
- apiOnline,
2223
- plan,
2224
- used,
2225
- limit,
2226
- remaining,
2227
- keyPrefix: key.slice(0, 12) + '...',
2228
- }));
2229
- }
2230
- else {
2231
- console.log(`✅ Authenticated`);
2232
- console.log(` API: ${apiOnline ? '🟢 online' : '🔴 offline'}`);
2233
- console.log(` Plan: ${plan}`);
2234
- console.log(` Usage: ${used} / ${limit} this week (${remaining} remaining)`);
2235
- console.log(` Key: ${key.slice(0, 12)}...`);
2236
- }
2237
- }
2238
- catch (e) {
2239
- if (opts.json) {
2240
- console.log(JSON.stringify({ authenticated: 'unknown', error: e.message }));
2241
- }
2242
- else {
2243
- console.error(`❌ Could not reach API: ${e.message}`);
2244
- }
2245
- process.exit(1);
2246
- }
2247
- });
2248
- program
2249
- .command('doctor')
2250
- .description('Diagnose WebPeel installation (API key, connectivity, fetch test)')
2251
- .action(async () => {
2252
- const cfg = loadConfig();
2253
- const apiKey = cfg.apiKey || process.env.WEBPEEL_API_KEY;
2254
- const apiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
2255
- console.log('WebPeel Doctor\n');
2256
- console.log(`Version: ${cliVersion}`);
2257
- console.log(`API URL: ${apiUrl}`);
2258
- console.log(`API Key: ${apiKey ? apiKey.slice(0, 12) + '...' : '❌ Not configured'}`);
2259
- if (!apiKey) {
2260
- console.log('\n❌ No API key. Run: webpeel auth <your-key>');
2261
- console.log(' Get a free key at: https://app.webpeel.dev/keys');
2262
- process.exit(1);
2263
- }
2264
- // Check API connectivity
2265
- console.log('\nChecking API connectivity...');
2266
- try {
2267
- const healthRes = await fetch(`${apiUrl}/health`, { signal: AbortSignal.timeout(10000) });
2268
- const health = await healthRes.json();
2269
- console.log(`API Health: ✅ ${health.status || 'ok'} (uptime: ${Math.round((health.uptime || 0) / 60)}min)`);
2270
- }
2271
- catch (err) {
2272
- console.log(`API Health: ❌ Cannot reach ${apiUrl} (${err.message})`);
2273
- }
2274
- // Check API key validity
2275
- console.log('Checking API key...');
2276
- try {
2277
- const usageRes = await fetch(`${apiUrl}/v1/usage`, {
2278
- headers: { Authorization: `Bearer ${apiKey}` },
2279
- signal: AbortSignal.timeout(10000),
2280
- });
2281
- if (usageRes.ok) {
2282
- const usage = await usageRes.json();
2283
- const plan = usage?.tier || (typeof usage?.plan === 'string' ? usage?.plan : usage?.plan?.tier) || 'free';
2284
- const used = usage?.used ?? usage?.totalRequests ?? usage?.weekly?.used ?? 0;
2285
- const limit = usage?.limit ?? usage?.weeklyLimit ?? usage?.weekly?.limit ?? 500;
2286
- console.log(`API Key: ✅ Valid (${plan} plan, ${used}/${limit} used this week)`);
2287
- }
2288
- else if (usageRes.status === 401) {
2289
- console.log('API Key: ❌ Invalid or expired. Run: webpeel auth <new-key>');
2290
- }
2291
- else {
2292
- console.log(`API Key: ⚠️ Unexpected response (${usageRes.status})`);
2293
- }
2294
- }
2295
- catch (err) {
2296
- console.log(`API Key: ❌ Check failed (${err.message})`);
2297
- }
2298
- // Quick fetch test
2299
- console.log('Testing fetch...');
2300
- try {
2301
- const testRes = await fetch(`${apiUrl}/v1/fetch?url=https://example.com`, {
2302
- headers: { Authorization: `Bearer ${apiKey}` },
2303
- signal: AbortSignal.timeout(15000),
2304
- });
2305
- if (testRes.ok) {
2306
- const data = await testRes.json();
2307
- console.log(`Fetch Test: ✅ OK (${data.tokenCount || data.tokens || '?'} tokens, ${data.fetchTimeMs || data.elapsed || '?'}ms)`);
2308
- }
2309
- else {
2310
- console.log(`Fetch Test: ❌ Failed (${testRes.status})`);
2311
- }
2312
- }
2313
- catch (err) {
2314
- console.log(`Fetch Test: ❌ Failed (${err.message})`);
2315
- }
2316
- // Check YouTube
2317
- console.log('Testing YouTube...');
2318
- try {
2319
- const ytRes = await fetch(`${apiUrl}/v1/fetch?url=${encodeURIComponent('https://www.youtube.com/watch?v=dQw4w9WgXcQ')}`, {
2320
- headers: { Authorization: `Bearer ${apiKey}` },
2321
- signal: AbortSignal.timeout(15000),
2322
- });
2323
- if (ytRes.ok) {
2324
- const data = await ytRes.json();
2325
- const hasContent = (data.content || '').length > 100;
2326
- console.log(`YouTube: ${hasContent ? '✅' : '⚠️'} ${hasContent ? `Content extracted (${data.tokenCount || data.tokens || '?'} tokens)` : 'Content limited'}`);
2327
- }
2328
- else {
2329
- console.log(`YouTube: ⚠️ Response ${ytRes.status}`);
2330
- }
2331
- }
2332
- catch (err) {
2333
- console.log(`YouTube: ⚠️ ${err.message}`);
2334
- }
2335
- console.log('\n✅ WebPeel is ready to use!');
2336
- console.log(' Try: webpeel "https://news.ycombinator.com" --json');
2337
- });
2338
- program
2339
- .command('login')
2340
- .description('Authenticate the CLI with your API key')
2341
- .action(async () => {
2342
- try {
2343
- await handleLogin();
2344
- process.exit(0);
2345
- }
2346
- catch (error) {
2347
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
2348
- process.exit(1);
2349
- }
2350
- });
2351
- program
2352
- .command('whoami')
2353
- .description('Show your current authentication status')
2354
- .action(async () => {
2355
- try {
2356
- const { loadConfig } = await import('./cli-auth.js');
2357
- const config = loadConfig();
2358
- if (!config.apiKey) {
2359
- console.log('Not logged in. Run `webpeel login` to authenticate.');
2360
- }
2361
- else {
2362
- const masked = config.apiKey.slice(0, 7) + '...' + config.apiKey.slice(-4);
2363
- console.log(`Logged in with API key: ${masked}`);
2364
- if (config.planTier) {
2365
- const tierLabel = config.planTier.charAt(0).toUpperCase() + config.planTier.slice(1);
2366
- console.log(`Plan: ${tierLabel}`);
2367
- }
2368
- console.log(`Config: ~/.webpeel/config.json`);
2369
- }
2370
- process.exit(0);
2371
- }
2372
- catch (error) {
2373
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
2374
- process.exit(1);
2375
- }
2376
- });
2377
- program
2378
- .command('logout')
2379
- .description('Clear your saved credentials')
2380
- .action(() => {
2381
- try {
2382
- handleLogout();
2383
- process.exit(0);
2384
- }
2385
- catch (error) {
2386
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
2387
- process.exit(1);
2388
- }
2389
- });
2390
- program
2391
- .command('usage')
2392
- .description('Show your current usage and quota')
2393
- .action(async () => {
2394
- try {
2395
- await handleUsage();
2396
- process.exit(0);
2397
- }
2398
- catch (error) {
2399
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
2400
- process.exit(1);
2401
- }
2402
- });
2403
- program
2404
- .command('serve')
2405
- .description('Start API server')
2406
- .option('-p, --port <port>', 'Port number', '3000')
2407
- .action(async (options) => {
2408
- const { startServer } = await import('./server/app.js');
2409
- startServer({ port: parseInt(options.port, 10) });
2410
- });
2411
- program
2412
- .command('mcp')
2413
- .description('Start MCP server for Claude Desktop / Cursor')
2414
- .action(async () => {
2415
- await import('./mcp/server.js');
2416
- });
2417
- // Pipe command — always JSON, no UI (agent-friendly)
2418
- program
2419
- .command('pipe <url>')
2420
- .description('Pipe-friendly fetch (always JSON, no UI). Alias for: webpeel <url> --json --silent')
2421
- .option('-r, --render', 'Use headless browser')
2422
- .option('--stealth', 'Stealth mode')
2423
- .option('--budget <n>', 'Token budget', parseInt)
2424
- .option('--clean', 'Clean format for AI')
2425
- .option('-q, --question <q>', 'Quick answer')
2426
- .option('--proxy <url>', 'Proxy URL')
2427
- .option('--timeout <ms>', 'Timeout in ms', parseInt)
2428
- .option('-s, --silent', 'Silent mode (always on for pipe, accepted for compatibility)')
2429
- .action(async (url, opts) => {
2430
- // Force JSON + silent — always, unconditionally
2431
- opts.json = true;
2432
- opts.silent = true;
2433
- await runFetch(url, opts);
2434
- });
2435
- // Config command — webpeel config [get|set] [key] [value]
2436
- program
2437
- .command('config')
2438
- .description('View or update CLI configuration')
2439
- .argument('[action]', '"list", "get <key>", "set <key> <value>", or omit for overview')
2440
- .argument('[key]', 'Config key')
2441
- .argument('[value]', 'Value to set')
2442
- .action(async (action, key, value) => {
2443
- const config = loadConfig();
2444
- // Settable config keys (safe for user modification)
2445
- // Supports dot-notation for nested keys (e.g., llm.apiKey)
2446
- const SETTABLE_KEYS = {
2447
- apiKey: 'WebPeel API key (tip: use `webpeel auth <key>` to set and verify in one step)',
2448
- braveApiKey: 'Brave Search API key',
2449
- 'llm.apiKey': 'LLM API key for AI-powered extraction (OpenAI-compatible)',
2450
- 'llm.model': 'LLM model name (default: gpt-4o-mini)',
2451
- 'llm.baseUrl': 'LLM API base URL (default: https://api.openai.com/v1)',
2452
- };
2453
- const maskSecret = (k, v) => {
2454
- if (!v)
2455
- return '(not set)';
2456
- if (k === 'apiKey' || k === 'braveApiKey' || k === 'llm.apiKey') {
2457
- return v.slice(0, 4) + '...' + v.slice(-4);
2458
- }
2459
- return String(v);
2460
- };
2461
- /** Get a potentially nested value using dot-notation (e.g., "llm.apiKey") */
2462
- function getNestedValue(obj, path) {
2463
- const parts = path.split('.');
2464
- let cur = obj;
2465
- for (const part of parts) {
2466
- if (cur == null || typeof cur !== 'object')
2467
- return undefined;
2468
- cur = cur[part];
2469
- }
2470
- return cur;
2471
- }
2472
- /** Set a potentially nested value using dot-notation (e.g., "llm.apiKey") */
2473
- function setNestedValue(obj, path, val) {
2474
- const parts = path.split('.');
2475
- let cur = obj;
2476
- for (let i = 0; i < parts.length - 1; i++) {
2477
- const part = parts[i];
2478
- if (cur[part] == null || typeof cur[part] !== 'object')
2479
- cur[part] = {};
2480
- cur = cur[part];
2481
- }
2482
- cur[parts[parts.length - 1]] = val;
2483
- }
2484
- if (!action || action === 'list') {
2485
- // Show all config (also triggered by `webpeel config list`)
2486
- console.log('WebPeel CLI Configuration');
2487
- console.log(` Config file: ~/.webpeel/config.json`);
2488
- console.log('');
2489
- console.log(` apiKey: ${maskSecret('apiKey', config.apiKey)}`);
2490
- console.log(` braveApiKey: ${maskSecret('braveApiKey', config.braveApiKey)}`);
2491
- console.log(` planTier: ${config.planTier || 'free'}`);
2492
- console.log(` anonymousUsage: ${config.anonymousUsage}`);
2493
- console.log('');
2494
- console.log(' LLM:');
2495
- console.log(` llm.apiKey: ${maskSecret('llm.apiKey', config.llm?.apiKey)}`);
2496
- console.log(` llm.model: ${config.llm?.model || '(not set, default: gpt-4o-mini)'}`);
2497
- console.log(` llm.baseUrl: ${config.llm?.baseUrl || '(not set, default: https://api.openai.com/v1)'}`);
2498
- const stats = cacheStats();
2499
- console.log('');
2500
- console.log(' Cache:');
2501
- console.log(` entries: ${stats.entries}`);
2502
- console.log(` size: ${(stats.sizeBytes / 1024).toFixed(1)} KB`);
2503
- console.log(` dir: ${stats.dir}`);
2504
- console.log('');
2505
- console.log(' Settable keys: ' + Object.keys(SETTABLE_KEYS).join(', '));
2506
- console.log(' Usage: webpeel config set <key> <value>');
2507
- if (!config.apiKey) {
2508
- console.log('');
2509
- console.log(' Tip: Run `webpeel auth <your-key>` to set and verify your API key.');
2510
- console.log(' Get a free key at: https://app.webpeel.dev/keys');
2511
- }
2512
- process.exit(0);
2513
- }
2514
- if (action === 'set') {
2515
- if (!key) {
2516
- console.error('Usage: webpeel config set <key> <value>');
2517
- console.error('Settable keys: ' + Object.keys(SETTABLE_KEYS).join(', '));
2518
- process.exit(1);
2519
- }
2520
- if (!(key in SETTABLE_KEYS)) {
2521
- console.error(`Cannot set "${key}". Settable keys: ${Object.keys(SETTABLE_KEYS).join(', ')}`);
2522
- process.exit(1);
2523
- }
2524
- if (!value) {
2525
- console.error(`Usage: webpeel config set ${key} <value>`);
2526
- process.exit(1);
2527
- }
2528
- setNestedValue(config, key, value);
2529
- saveConfig(config);
2530
- console.log(`✓ ${key} saved`);
2531
- process.exit(0);
2532
- }
2533
- if (action === 'get') {
2534
- const lookupKey = key || '';
2535
- const val = getNestedValue(config, lookupKey) ?? config[lookupKey];
2536
- if (val !== undefined) {
2537
- console.log(maskSecret(lookupKey, String(val)));
2538
- }
2539
- else {
2540
- console.error(`Unknown config key: ${lookupKey}`);
2541
- process.exit(1);
2542
- }
2543
- process.exit(0);
2544
- }
2545
- // Legacy: `webpeel config <key>` — treat action as the key name
2546
- const val = getNestedValue(config, action) ?? config[action];
2547
- if (val !== undefined) {
2548
- console.log(maskSecret(action, String(val)));
2549
- }
2550
- else {
2551
- console.error(`Unknown config key or action: ${action}`);
2552
- console.error('Usage: webpeel config [get|set] [key] [value]');
2553
- process.exit(1);
2554
- }
2555
- process.exit(0);
2556
- });
2557
- // Cache management command
2558
- program
2559
- .command('cache')
2560
- .description('Manage the local response cache')
2561
- .argument('<action>', '"stats", "clear", or "purge" (clear expired / clear all)')
2562
- .action(async (action) => {
2563
- switch (action) {
2564
- case 'stats': {
2565
- const stats = cacheStats();
2566
- console.log(`Cache: ${stats.entries} entries, ${(stats.sizeBytes / 1024).toFixed(1)} KB`);
2567
- console.log(`Location: ${stats.dir}`);
2568
- break;
2569
- }
2570
- case 'clear': {
2571
- const cleared = clearCache(false);
2572
- console.log(`Cleared ${cleared} expired cache entries.`);
2573
- break;
2574
- }
2575
- case 'purge': {
2576
- const cleared = clearCache(true);
2577
- console.log(`Purged all ${cleared} cache entries.`);
2578
- break;
2579
- }
2580
- default:
2581
- console.error('Unknown cache action. Use: stats, clear, or purge');
2582
- process.exit(1);
2583
- }
2584
- process.exit(0);
2585
- });
2586
- // Brand command - extract branding/design system
2587
- program
2588
- .command('brand <url>')
2589
- .description('Extract branding and design system from a URL')
2590
- .option('-s, --silent', 'Silent mode (no spinner)')
2591
- .option('--json', 'Output as JSON (default)')
2592
- .action(async (url, options) => {
2593
- const spinner = options.silent ? null : ora('Extracting branding...').start();
2594
- try {
2595
- const result = await peel(url, {
2596
- extract: {
2597
- selectors: {
2598
- primaryColor: 'meta[name="theme-color"]',
2599
- title: 'title',
2600
- logo: 'img[class*="logo"], img[alt*="logo"]',
2601
- },
2602
- },
2603
- });
2604
- if (spinner) {
2605
- spinner.succeed(`Extracted branding in ${result.elapsed}ms`);
2606
- }
2607
- // Extract branding data from metadata and page
2608
- const branding = {
2609
- url: result.url,
2610
- title: result.title,
2611
- colors: extractColors(result.content),
2612
- fonts: extractFonts(result.content),
2613
- extracted: result.extracted,
2614
- metadata: result.metadata,
2615
- };
2616
- console.log(JSON.stringify(branding, null, 2));
2617
- await cleanup();
2618
- process.exit(0);
2619
- }
2620
- catch (error) {
2621
- if (spinner)
2622
- spinner.fail('Branding extraction failed');
2623
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
2624
- await cleanup();
2625
- process.exit(1);
2626
- }
2627
- });
2628
- // Track command - track changes on a URL
2629
- program
2630
- .command('track <url>')
2631
- .description('Track changes on a URL (saves snapshot for use with `webpeel diff`)')
2632
- .option('-s, --silent', 'Silent mode (no spinner)')
2633
- .option('--json', 'Output as JSON')
2634
- .option('-r, --render', 'Use browser rendering')
2635
- .action(async (url, options) => {
2636
- const spinner = options.silent ? null : ora('Fetching and tracking...').start();
2637
- try {
2638
- // changeTracking: true saves the snapshot to ~/.webpeel/snapshots/ so that
2639
- // `webpeel diff` can compare against it later.
2640
- const result = await peel(url, {
2641
- render: options.render || false,
2642
- changeTracking: true,
2643
- });
2644
- if (spinner) {
2645
- spinner.succeed(`Tracked in ${result.elapsed}ms`);
2646
- }
2647
- const changeStatus = result.changeTracking?.changeStatus ?? 'new';
2648
- const previousScrapeAt = result.changeTracking?.previousScrapeAt ?? null;
2649
- if (options.json) {
2650
- await writeStdout(JSON.stringify({
2651
- url: result.url,
2652
- title: result.title,
2653
- fingerprint: result.fingerprint,
2654
- tokens: result.tokens,
2655
- contentType: result.contentType,
2656
- changeStatus,
2657
- previousScrapeAt,
2658
- lastChecked: new Date().toISOString(),
2659
- }, null, 2) + '\n');
2660
- }
2661
- else {
2662
- console.log(`URL: ${result.url}`);
2663
- console.log(`Title: ${result.title}`);
2664
- console.log(`Fingerprint: ${result.fingerprint}`);
2665
- console.log(`Tokens: ${result.tokens}`);
2666
- console.log(`Status: ${changeStatus}`);
2667
- if (previousScrapeAt)
2668
- console.log(`Previous check: ${previousScrapeAt}`);
2669
- console.log(`Last checked: ${new Date().toISOString()}`);
2670
- console.log('\nSnapshot saved. Run `webpeel diff <url> --last` to compare future changes.');
2671
- }
2672
- await cleanup();
2673
- process.exit(0);
2674
- }
2675
- catch (error) {
2676
- if (spinner)
2677
- spinner.fail('Tracking failed');
2678
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
2679
- await cleanup();
2680
- process.exit(1);
2681
- }
2682
- });
2683
- // Summarize command - AI-powered summary
2684
- program
2685
- .command('summarize <url>')
2686
- .description('Generate an AI-powered summary of a URL')
2687
- .option('--llm-key <key>', 'LLM API key (or use OPENAI_API_KEY env var)')
2688
- .option('--llm-model <model>', 'LLM model to use (default: gpt-4o-mini)')
2689
- .option('--llm-base-url <url>', 'LLM API base URL (default: https://api.openai.com/v1)')
2690
- .option('--prompt <prompt>', 'Custom summary prompt')
2691
- .option('-s, --silent', 'Silent mode (no spinner)')
2692
- .option('--json', 'Output as JSON')
2693
- .action(async (url, options) => {
2694
- const llmApiKey = options.llmKey || process.env.OPENAI_API_KEY;
2695
- if (!llmApiKey) {
2696
- console.error('Error: --llm-key or OPENAI_API_KEY environment variable is required');
2697
- process.exit(1);
2698
- }
2699
- const spinner = options.silent ? null : ora('Fetching and summarizing...').start();
2700
- try {
2701
- const result = await peel(url, {
2702
- extract: {
2703
- prompt: options.prompt || 'Summarize this webpage in 2-3 sentences.',
2704
- llmApiKey,
2705
- llmModel: options.llmModel || 'gpt-4o-mini',
2706
- llmBaseUrl: options.llmBaseUrl || 'https://api.openai.com/v1',
2707
- },
2708
- });
2709
- if (spinner) {
2710
- spinner.succeed(`Summarized in ${result.elapsed}ms`);
2711
- }
2712
- if (options.json) {
2713
- console.log(JSON.stringify({
2714
- url: result.url,
2715
- title: result.title,
2716
- summary: result.extracted,
2717
- }, null, 2));
2718
- }
2719
- else {
2720
- console.log(`\n${result.title}\n`);
2721
- console.log(result.extracted);
2722
- }
2723
- await cleanup();
2724
- process.exit(0);
2725
- }
2726
- catch (error) {
2727
- if (spinner)
2728
- spinner.fail('Summary generation failed');
2729
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
2730
- await cleanup();
2731
- process.exit(1);
2732
- }
2733
- });
2734
- // Agent command - autonomous web research
2735
- program
2736
- .command('agent <prompt>')
2737
- .description('Web research agent — LLM-free by default, add --llm-key for AI synthesis')
2738
- .option('--llm-key <key>', 'LLM API key (or use OPENAI_API_KEY env var)')
2739
- .option('--llm-model <model>', 'LLM model to use (default: gpt-4o-mini)')
2740
- .option('--llm-base-url <url>', 'LLM API base URL')
2741
- .option('--urls <urls>', 'Comma-separated starting URLs')
2742
- .option('--max-pages <n>', 'Maximum pages to visit (default: 10)', '10')
2743
- .option('--schema <json>', 'Schema template name (e.g. product, article) or JSON schema for structured output')
2744
- .option('-s, --silent', 'Silent mode (no spinner)')
2745
- .option('--json', 'Output as JSON')
2746
- .action(async (prompt, options) => {
2747
- const llmApiKey = options.llmKey || process.env.OPENAI_API_KEY;
2748
- const urls = options.urls ? options.urls.split(',').map((u) => u.trim()) : undefined;
2749
- // Parse schema (support templates)
2750
- let schema;
2751
- if (options.schema) {
2752
- const template = getSchemaTemplate(options.schema);
2753
- if (template) {
2754
- schema = template.fields;
2755
- }
2756
- else {
2757
- try {
2758
- schema = JSON.parse(options.schema);
2759
- }
2760
- catch {
2761
- console.error(`Error: --schema must be a template name (${listSchemaTemplates().join(', ')}) or valid JSON`);
2762
- process.exit(1);
2763
- }
2764
- }
2765
- }
2766
- if (llmApiKey) {
2767
- // Full LLM agent mode (existing code)
2768
- const spinner = options.silent ? null : ora('Running agent research...').start();
2769
- try {
2770
- const { runAgent } = await import('./core/agent.js');
2771
- const result = await runAgent({
2772
- prompt,
2773
- urls,
2774
- schema,
2775
- llmApiKey,
2776
- llmModel: options.llmModel,
2777
- llmApiBase: options.llmBaseUrl,
2778
- maxPages: parseInt(options.maxPages, 10),
2779
- onProgress: (progress) => {
2780
- if (spinner)
2781
- spinner.text = progress.message;
2782
- },
2783
- });
2784
- if (spinner)
2785
- spinner.succeed(`Agent finished: ${result.pagesVisited} pages`);
2786
- if (options.json) {
2787
- console.log(JSON.stringify(result, null, 2));
2788
- }
2789
- else {
2790
- console.log(`\nSources (${result.sources.length}):`);
2791
- result.sources.forEach(s => console.log(` • ${s}`));
2792
- console.log(`\nResults:`);
2793
- console.log(JSON.stringify(result.data, null, 2));
2794
- }
2795
- await cleanup();
2796
- process.exit(0);
2797
- }
2798
- catch (e) {
2799
- if (spinner)
2800
- spinner.fail('Agent failed');
2801
- console.error(e instanceof Error ? e.message : e);
2802
- await cleanup();
2803
- process.exit(1);
2804
- }
2805
- }
2806
- else {
2807
- // LLM-free mode: search + fetch + BM25 extraction
2808
- const spinner = options.silent ? null : ora('Running LLM-free research...').start();
2809
- try {
2810
- // Import needed modules
2811
- const { quickAnswer } = await import('./core/quick-answer.js');
2812
- // Step 1: Get URLs to process
2813
- let targetUrls = urls || [];
2814
- // If no URLs, search the web
2815
- if (targetUrls.length === 0) {
2816
- if (spinner)
2817
- spinner.text = 'Searching the web...';
2818
- try {
2819
- const { getBestSearchProvider } = await import('./core/search-provider.js');
2820
- const { provider, apiKey: searchApiKey } = getBestSearchProvider();
2821
- const searchResults = await provider.searchWeb(prompt, {
2822
- count: Math.min(parseInt(options.maxPages, 10) || 5, 10),
2823
- apiKey: searchApiKey,
2824
- });
2825
- targetUrls = searchResults.map((r) => r.url);
2826
- }
2827
- catch {
2828
- // Fallback: try DuckDuckGo HTML
2829
- if (spinner)
2830
- spinner.text = 'Searching via DuckDuckGo...';
2831
- try {
2832
- const duckUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(prompt)}`;
2833
- const searchResult = await peel(duckUrl, { budget: 4000 });
2834
- // Extract URLs from search results content
2835
- const urlMatches = searchResult.content.match(/https?:\/\/[^\s\)]+/g) || [];
2836
- targetUrls = urlMatches
2837
- .filter((u) => !u.includes('duckduckgo.com'))
2838
- .slice(0, parseInt(options.maxPages, 10) || 5);
2839
- }
2840
- catch {
2841
- // No search results
2842
- }
2843
- }
2844
- }
2845
- if (targetUrls.length === 0) {
2846
- if (spinner)
2847
- spinner.fail('No URLs found. Provide --urls or a more specific prompt.');
2848
- process.exit(1);
2849
- }
2850
- if (spinner)
2851
- spinner.text = `Processing ${targetUrls.length} pages...`;
2852
- // Step 2: Fetch and extract from each URL
2853
- const results = [];
2854
- for (const url of targetUrls) {
2855
- try {
2856
- if (spinner)
2857
- spinner.text = `Fetching: ${url.substring(0, 60)}...`;
2858
- const pageResult = await peel(url, { budget: 4000 });
2859
- let extracted = null;
2860
- let confidence = 0;
2861
- if (schema) {
2862
- // Extract each schema field using smartExtractSchemaFields
2863
- const { smartExtractSchemaFields: smartExtractResearch } = await import('./core/schema-postprocess.js');
2864
- extracted = smartExtractResearch(pageResult.content, schema, quickAnswer, {
2865
- pageTitle: pageResult.title,
2866
- pageUrl: url,
2867
- metadata: pageResult.metadata,
2868
- });
2869
- // Calculate confidence from quickAnswer for any field
2870
- for (const question of Object.values(schema)) {
2871
- try {
2872
- const qa = quickAnswer({ content: pageResult.content, question: typeof question === 'string' ? question : '' });
2873
- confidence = Math.max(confidence, qa.confidence || 0);
2874
- }
2875
- catch { /* ignore */ }
2876
- break; // just need one confidence estimate
2877
- }
2878
- }
2879
- else {
2880
- // Answer the prompt directly
2881
- try {
2882
- const qa = quickAnswer({ content: pageResult.content, question: prompt });
2883
- extracted = { answer: qa.answer || '' };
2884
- confidence = qa.confidence || 0;
2885
- }
2886
- catch {
2887
- extracted = null;
2888
- }
2889
- }
2890
- results.push({
2891
- url,
2892
- title: pageResult.metadata?.title || url,
2893
- extracted,
2894
- content: pageResult.content.substring(0, 500),
2895
- confidence,
2896
- });
2897
- }
2898
- catch (e) {
2899
- // Skip failed URLs
2900
- if (process.env.DEBUG) {
2901
- console.debug('[webpeel]', `Failed to fetch ${url}:`, e instanceof Error ? e.message : e);
2902
- }
2903
- }
2904
- }
2905
- if (spinner)
2906
- spinner.succeed(`Processed ${results.length}/${targetUrls.length} pages (LLM-free)`);
2907
- if (options.json) {
2908
- console.log(JSON.stringify({
2909
- mode: 'llm-free',
2910
- prompt,
2911
- schema: schema || null,
2912
- results,
2913
- sources: results.map(r => r.url),
2914
- pagesVisited: results.length,
2915
- }, null, 2));
2916
- }
2917
- else {
2918
- console.log(`\n📊 Results (${results.length} pages, LLM-free):\n`);
2919
- for (const r of results) {
2920
- console.log(`── ${r.title} ──`);
2921
- console.log(` ${r.url}`);
2922
- if (r.extracted) {
2923
- for (const [k, v] of Object.entries(r.extracted)) {
2924
- if (v)
2925
- console.log(` ${k}: ${v}`);
2926
- }
2927
- }
2928
- console.log(` Confidence: ${(r.confidence * 100).toFixed(0)}%\n`);
2929
- }
2930
- }
2931
- await cleanup();
2932
- process.exit(0);
2933
- }
2934
- catch (e) {
2935
- if (spinner)
2936
- spinner.fail('Research failed');
2937
- console.error(e instanceof Error ? e.message : e);
2938
- await cleanup();
2939
- process.exit(1);
2940
- }
2941
- }
2942
- });
2943
- // ── Jobs command group ─────────────────────────────────────────────────────
2944
- const jobsCmd = program
2945
- .command('jobs')
2946
- .description('Job board operations: search listings and auto-apply (LinkedIn, Indeed, Glassdoor, Upwork)')
2947
- .argument('[keywords]', 'Search keywords — shorthand for "jobs search <keywords>"')
2948
- .option('-l, --location <location>', 'Location filter')
2949
- .option('-s, --source <source>', 'Job board: glassdoor, indeed, linkedin, or upwork (default: linkedin)', 'linkedin')
2950
- .option('-n, --limit <number>', 'Max results (default: 25)', '25')
2951
- .option('-d, --details <number>', 'Fetch full details for top N results (default: 0)', '0')
2952
- .option('--json', 'Output raw JSON')
2953
- .option('--timeout <ms>', 'Request timeout in ms (default: 30000)', '30000')
2954
- .option('--silent', 'Silent mode (no spinner)')
2955
- .action(async (keywords, options) => {
2956
- // Default action: when called as `webpeel jobs <keywords>`, act as search
2957
- if (!keywords) {
2958
- jobsCmd.help();
2959
- process.exit(0);
2960
- }
2961
- // Delegate to shared search logic
2962
- await runJobSearch(keywords, options);
2963
- });
2964
- // ── Shared job-search logic (used by both `jobs` default and `jobs search`) ───
2965
- async function runJobSearch(keywords, options) {
2966
- const spinner = options.silent ? null : ora('Searching jobs...').start();
2967
- try {
2968
- const { searchJobs } = await import('./core/jobs.js');
2969
- const VALID_SOURCES = ['glassdoor', 'indeed', 'linkedin', 'upwork'];
2970
- const source = (VALID_SOURCES.includes((options.source ?? 'linkedin'))
2971
- ? options.source
2972
- : 'linkedin');
2973
- const limit = Math.min(Math.max(parseInt(options.limit ?? '25', 10) || 25, 1), 100);
2974
- const fetchDetails = Math.min(Math.max(parseInt(options.details ?? '0', 10) || 0, 0), limit);
2975
- const timeout = parseInt(options.timeout ?? '30000', 10) || 30000;
2976
- const result = await searchJobs({
2977
- keywords,
2978
- location: options.location,
2979
- source,
2980
- limit,
2981
- fetchDetails,
2982
- timeout,
2983
- });
2984
- if (spinner)
2985
- spinner.stop();
2986
- if (options.json) {
2987
- await writeStdout(JSON.stringify(result, null, 2) + '\n');
2988
- process.exit(0);
2989
- }
2990
- const totalLabel = result.totalFound >= 1000
2991
- ? `${(result.totalFound / 1000).toFixed(0).replace(/\.0$/, '')}k+`
2992
- : String(result.totalFound);
2993
- const locationLabel = options.location ? ` in ${options.location}` : '';
2994
- console.log(`\n🔍 Found ${totalLabel} ${keywords} jobs${locationLabel} (${result.source})\n`);
2995
- if (result.jobs.length === 0) {
2996
- console.log(' No jobs found.\n');
2997
- process.exit(0);
2998
- }
2999
- const colNum = 3;
3000
- const colTitle = 40;
3001
- const colCompany = 18;
3002
- const colLocation = 16;
3003
- const colSalary = 14;
3004
- const colPosted = 10;
3005
- const pad = (s, w) => s.length > w ? s.slice(0, w - 1) + '…' : s.padEnd(w);
3006
- const rpad = (s, w) => s.padStart(w);
3007
- console.log(` ${rpad('#', colNum)} ${pad('Title', colTitle)} ${pad('Company', colCompany)} ${pad('Location', colLocation)} ${pad('Salary/Budget', colSalary)} ${pad('Posted', colPosted)}`);
3008
- result.jobs.forEach((job, i) => {
3009
- const titleStr = job.title + (job.remote ? ' 🏠' : '');
3010
- const salaryStr = job.salary ?? ('budget' in job ? job.budget : '') ?? '';
3011
- console.log(` ${rpad(String(i + 1), colNum)} ${pad(titleStr, colTitle)} ${pad(job.company, colCompany)} ${pad(job.location, colLocation)} ${pad(salaryStr, colSalary)} ${pad(job.postedAt ?? '', colPosted)}`);
3012
- });
3013
- const timeSec = (result.timeTakenMs / 1000).toFixed(1);
3014
- const detailsNote = fetchDetails > 0 ? ` | Details: ${result.detailsFetched} fetched` : '';
3015
- console.log(`\nFetched ${result.jobs.length} jobs in ${timeSec}s${detailsNote}\n`);
3016
- const detailedJobs = result.jobs.filter((j) => 'description' in j);
3017
- for (let i = 0; i < detailedJobs.length; i++) {
3018
- const job = detailedJobs[i];
3019
- console.log(`━━━ Job #${i + 1}: ${job.title} ━━━`);
3020
- const metaParts = [`Company: ${job.company}`, `Location: ${job.location}`];
3021
- if (job.salary)
3022
- metaParts.push(`Salary: ${job.salary}`);
3023
- console.log(metaParts.join(' | '));
3024
- const typeParts = [];
3025
- if (job.employmentType)
3026
- typeParts.push(`Type: ${job.employmentType}`);
3027
- if (job.experienceLevel)
3028
- typeParts.push(`Level: ${job.experienceLevel}`);
3029
- if (job.postedAt)
3030
- typeParts.push(`Posted: ${job.postedAt}`);
3031
- if (typeParts.length > 0)
3032
- console.log(typeParts.join(' | '));
3033
- if (job.description) {
3034
- console.log(`\nDescription:\n ${job.description.slice(0, 500).replace(/\n/g, '\n ')}`);
3035
- }
3036
- if (job.requirements && job.requirements.length > 0) {
3037
- console.log(`\nRequirements:`);
3038
- job.requirements.forEach(r => console.log(` • ${r}`));
3039
- }
3040
- if (job.responsibilities && job.responsibilities.length > 0) {
3041
- console.log(`\nResponsibilities:`);
3042
- job.responsibilities.forEach(r => console.log(` • ${r}`));
3043
- }
3044
- if (job.benefits && job.benefits.length > 0) {
3045
- console.log(`\nBenefits:`);
3046
- job.benefits.forEach(b => console.log(` • ${b}`));
3047
- }
3048
- if (job.applyUrl) {
3049
- console.log(`\nApply: ${job.applyUrl}`);
3050
- }
3051
- console.log('');
3052
- }
3053
- process.exit(0);
3054
- }
3055
- catch (error) {
3056
- if (spinner)
3057
- spinner.fail?.('Job search failed');
3058
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
3059
- process.exit(1);
3060
- }
3061
- }
3062
- // jobs search <keywords> — explicit subcommand (same logic as default action)
3063
- jobsCmd
3064
- .command('search <keywords>')
3065
- .description('Search job boards for listings (LinkedIn, Indeed, Glassdoor, Upwork)')
3066
- .alias('s')
3067
- .option('-l, --location <location>', 'Location filter')
3068
- .option('-s, --source <source>', 'Job board: glassdoor, indeed, linkedin, or upwork (default: linkedin)', 'linkedin')
3069
- .option('-n, --limit <number>', 'Max results (default: 25)', '25')
3070
- .option('-d, --details <number>', 'Fetch full details for top N results (default: 0)', '0')
3071
- .option('--json', 'Output raw JSON')
3072
- .option('--timeout <ms>', 'Request timeout in ms (default: 30000)', '30000')
3073
- .option('--silent', 'Silent mode (no spinner)')
3074
- .action(async (keywords, options) => {
3075
- await runJobSearch(keywords, options);
3076
- });
3077
- // ── jobs apply <url> ─────────────────────────────────────────────────────────
3078
- // Stealth automated job application using human behavior simulation
3079
- jobsCmd
3080
- .command('apply <url>')
3081
- .description('Stealth automated job application using human behavior simulation')
3082
- .option('--profile <path>', 'Path to profile JSON file', `${process.env.HOME ?? '~'}/.webpeel/profile.json`)
3083
- .option('--resume <path>', 'Path to resume PDF (overrides profile.resumePath)')
3084
- .option('--mode <mode>', 'Submission mode: auto | review | dry-run (default: review)', 'review')
3085
- .option('--session-dir <path>', 'Browser session directory (preserves login cookies)')
3086
- .option('--llm-key <key>', 'LLM API key for custom question answers')
3087
- .option('--llm-provider <name>', 'LLM provider: openai | anthropic (default: openai)', 'openai')
3088
- .option('--daily-limit <n>', 'Max applications per day (default: 8)', '8')
3089
- .option('--no-warmup', 'Skip browsing warmup phase')
3090
- .option('--json', 'Output result as JSON')
3091
- .option('--silent', 'Minimal output')
3092
- .action(async (url, options) => {
3093
- const isSilent = options.silent;
3094
- const isJson = options.json;
3095
- const mode = (['auto', 'review', 'dry-run'].includes(options.mode)
3096
- ? options.mode
3097
- : 'review');
3098
- if (!isSilent) {
3099
- console.log(`\n🤖 WebPeel Auto-Apply — mode: ${mode}`);
3100
- console.log(` URL: ${url}\n`);
3101
- }
3102
- // Load profile
3103
- const profilePath = options.profile;
3104
- let profile;
3105
- try {
3106
- const raw = readFileSync(profilePath, 'utf-8');
3107
- profile = JSON.parse(raw);
3108
- }
3109
- catch {
3110
- console.error(`Error: Could not load profile from ${profilePath}`);
3111
- console.error(`Run "webpeel jobs apply-setup" to create a profile.`);
3112
- process.exit(1);
3113
- }
3114
- if (options.resume) {
3115
- profile.resumePath = options.resume;
3116
- }
3117
- const spinner = isSilent ? null : ora('Applying...').start();
3118
- try {
3119
- const { applyToJob } = await import('./core/apply.js');
3120
- const result = await applyToJob({
3121
- url,
3122
- profile,
3123
- mode,
3124
- sessionDir: options.sessionDir,
3125
- llmKey: options.llmKey,
3126
- llmProvider: options.llmProvider,
3127
- dailyLimit: parseInt(options.dailyLimit, 10) || 8,
3128
- warmup: options.warmup !== false,
3129
- onProgress: isSilent
3130
- ? undefined
3131
- : (event) => {
3132
- if (spinner)
3133
- spinner.text = `[${event.stage}] ${event.message}`;
3134
- else
3135
- console.log(` [${event.stage}] ${event.message}`);
3136
- },
3137
- });
3138
- if (spinner)
3139
- spinner.stop();
3140
- if (isJson) {
3141
- await writeStdout(JSON.stringify(result, null, 2) + '\n');
3142
- process.exit(result.error ? 1 : 0);
3143
- }
3144
- const statusIcon = result.submitted ? '✅' : result.error ? '❌' : '📋';
3145
- console.log(`\n${statusIcon} ${result.submitted
3146
- ? 'Application submitted!'
3147
- : result.error
3148
- ? `Error: ${result.error}`
3149
- : 'Application completed (not submitted)'}`);
3150
- if (result.job.title || result.job.company) {
3151
- console.log(` ${result.job.title}${result.job.company ? ` @ ${result.job.company}` : ''}`);
3152
- }
3153
- console.log(`\n Fields filled: ${result.fieldsFilled}`);
3154
- if (result.llmAnswers > 0)
3155
- console.log(` LLM answers: ${result.llmAnswers}`);
3156
- if (result.fieldsSkipped.length > 0)
3157
- console.log(` Skipped: ${result.fieldsSkipped.join(', ')}`);
3158
- if (result.warnings.length > 0 && !isSilent) {
3159
- console.log(`\n Warnings:`);
3160
- result.warnings.forEach(w => console.log(` ⚠️ ${w}`));
3161
- }
3162
- console.log(` Time: ${(result.elapsed / 1000).toFixed(1)}s\n`);
3163
- process.exit(result.error ? 1 : 0);
3164
- }
3165
- catch (error) {
3166
- if (spinner)
3167
- spinner.fail('Application failed');
3168
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
3169
- process.exit(1);
3170
- }
3171
- });
3172
- // ── jobs apply-setup ─────────────────────────────────────────────────────────
3173
- // Interactive wizard to create ~/.webpeel/profile.json
3174
- jobsCmd
3175
- .command('apply-setup')
3176
- .description('Interactive setup wizard — creates ~/.webpeel/profile.json')
3177
- .action(async () => {
3178
- const { createInterface } = await import('readline');
3179
- const rl = createInterface({ input: process.stdin, output: process.stdout });
3180
- const ask = (q) => new Promise(resolve => rl.question(q, ans => resolve(ans.trim())));
3181
- console.log('\n🤖 WebPeel Apply Setup — Create your applicant profile\n');
3182
- console.log('This creates ~/.webpeel/profile.json used by "webpeel jobs apply".\n');
3183
- try {
3184
- const name = await ask('Full name: ');
3185
- const email = await ask('Email address: ');
3186
- const phone = await ask('Phone number: ');
3187
- const linkedin = await ask('LinkedIn URL (optional, press Enter to skip): ');
3188
- const website = await ask('Portfolio/website URL (optional): ');
3189
- const location = await ask('City, State (e.g. San Francisco, CA): ');
3190
- const workAuth = await ask('Work authorization (e.g. US Citizen, Permanent Resident, H-1B, Need Sponsorship): ');
3191
- const yearsExp = await ask('Years of experience: ');
3192
- const currentTitle = await ask('Current/most recent job title: ');
3193
- const skills = await ask('Skills (comma-separated, e.g. TypeScript, React, Node.js): ');
3194
- const education = await ask('Education (e.g. B.S. Computer Science, MIT): ');
3195
- const resumePath = await ask('Path to resume PDF (e.g. /Users/you/resume.pdf): ');
3196
- const summary = await ask('Professional summary (1-3 sentences): ');
3197
- const salaryMin = await ask('Minimum desired salary (optional, e.g. 120000): ');
3198
- const salaryMax = await ask('Maximum desired salary (optional, e.g. 180000): ');
3199
- const relocate = await ask('Willing to relocate? (y/n): ');
3200
- const sponsorship = await ask('Need visa sponsorship? (y/n): ');
3201
- rl.close();
3202
- const profileData = {
3203
- name,
3204
- email,
3205
- phone,
3206
- ...(linkedin ? { linkedin } : {}),
3207
- ...(website ? { website } : {}),
3208
- location,
3209
- workAuthorization: workAuth,
3210
- yearsExperience: parseInt(yearsExp, 10) || 0,
3211
- currentTitle,
3212
- skills: skills.split(',').map(s => s.trim()).filter(Boolean),
3213
- education,
3214
- resumePath,
3215
- summary,
3216
- ...(salaryMin && salaryMax
3217
- ? { salaryRange: { min: parseInt(salaryMin, 10), max: parseInt(salaryMax, 10) } }
3218
- : {}),
3219
- willingToRelocate: relocate.toLowerCase().startsWith('y'),
3220
- needsSponsorship: sponsorship.toLowerCase().startsWith('y'),
3221
- };
3222
- const { mkdirSync: mk, writeFileSync: wf, existsSync: ex } = await import('fs');
3223
- const { join: j } = await import('path');
3224
- const { homedir: hd } = await import('os');
3225
- const webpeelDir = j(hd(), '.webpeel');
3226
- if (!ex(webpeelDir))
3227
- mk(webpeelDir, { recursive: true });
3228
- const profilePath = j(webpeelDir, 'profile.json');
3229
- wf(profilePath, JSON.stringify(profileData, null, 2), 'utf-8');
3230
- console.log(`\n✅ Profile saved to: ${profilePath}`);
3231
- console.log('\nNext steps:');
3232
- console.log(' 1. Apply to a job: webpeel jobs apply https://linkedin.com/jobs/view/...');
3233
- console.log(' (First run opens a browser — log in to LinkedIn, then the session is saved)\n');
3234
- }
3235
- catch (error) {
3236
- rl.close();
3237
- console.error(`\nError: ${error instanceof Error ? error.message : 'Unknown error'}`);
3238
- process.exit(1);
3239
- }
3240
- });
3241
- // ── jobs apply-history ───────────────────────────────────────────────────────
3242
- // View application history from ~/.webpeel/applications.json
3243
- jobsCmd
3244
- .command('apply-history')
3245
- .description('View application history from ~/.webpeel/applications.json')
3246
- .option('--json', 'Output as JSON')
3247
- .option('--limit <n>', 'Number of recent applications to show (default: 20)', '20')
3248
- .action(async (options) => {
3249
- const isJson = options.json;
3250
- const limit = parseInt(options.limit, 10) || 20;
3251
- try {
3252
- const { loadApplications } = await import('./core/apply.js');
3253
- const allApps = loadApplications();
3254
- const apps = allApps.slice().reverse().slice(0, limit);
3255
- if (isJson) {
3256
- await writeStdout(JSON.stringify(apps, null, 2) + '\n');
3257
- process.exit(0);
3258
- }
3259
- if (apps.length === 0) {
3260
- console.log('\nNo applications yet. Use "webpeel jobs apply <url>" to start.\n');
3261
- process.exit(0);
3262
- }
3263
- console.log(`\n📋 Application History (${apps.length} of ${allApps.length} total)\n`);
3264
- const colDate = 22;
3265
- const colStatus = 10;
3266
- const colTitle = 35;
3267
- const colCompany = 20;
3268
- const colMode = 8;
3269
- const pad = (s, w) => (s.length > w ? s.slice(0, w - 1) + '…' : s.padEnd(w));
3270
- console.log(` ${pad('Applied', colDate)} ${pad('Status', colStatus)} ${pad('Title', colTitle)} ${pad('Company', colCompany)} ${pad('Mode', colMode)}`);
3271
- console.log(` ${'-'.repeat(colDate)} ${'-'.repeat(colStatus)} ${'-'.repeat(colTitle)} ${'-'.repeat(colCompany)} ${'-'.repeat(colMode)}`);
3272
- for (const app of apps) {
3273
- const date = new Date(app.appliedAt).toLocaleString('en-US', {
3274
- month: 'short',
3275
- day: 'numeric',
3276
- year: 'numeric',
3277
- hour: '2-digit',
3278
- minute: '2-digit',
3279
- });
3280
- const statusEmoji = { applied: '📤', interview: '🎯', offer: '🎉', rejected: '❌', withdrawn: '🚫' }[app.status] ?? '';
3281
- console.log(` ${pad(date, colDate)} ${pad(`${statusEmoji} ${app.status}`, colStatus)} ${pad(app.title, colTitle)} ${pad(app.company, colCompany)} ${pad(app.mode, colMode)}`);
3282
- }
3283
- const today = new Date().toISOString().slice(0, 10);
3284
- const todayCount = allApps.filter(a => a.appliedAt.startsWith(today)).length;
3285
- console.log(`\n Today: ${todayCount} application(s)\n`);
3286
- process.exit(0);
3287
- }
3288
- catch (error) {
3289
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
3290
- process.exit(1);
3291
- }
3292
- });
3293
- // Queue command - list active async jobs (crawl, batch)
3294
- program
3295
- .command('queue')
3296
- .description('List active async jobs (crawl, batch)')
3297
- .option('--json', 'Output as JSON')
3298
- .action(async (options) => {
3299
- try {
3300
- const config = loadConfig();
3301
- if (!config.apiKey) {
3302
- console.error('Error: API key required. Run `webpeel login` first.');
3303
- process.exit(1);
3304
- }
3305
- const { fetch: undiciFetch } = await import('undici');
3306
- const response = await undiciFetch(`${process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev'}/v1/jobs`, {
3307
- headers: {
3308
- 'Authorization': `Bearer ${config.apiKey}`,
3309
- },
3310
- });
3311
- if (!response.ok) {
3312
- throw new Error(`API error: HTTP ${response.status}`);
3313
- }
3314
- const data = await response.json();
3315
- const jobs = data.jobs || data;
3316
- if (options.json) {
3317
- console.log(JSON.stringify(data, null, 2));
3318
- }
3319
- else {
3320
- if (!Array.isArray(jobs) || jobs.length === 0) {
3321
- console.log('No active jobs.');
3322
- }
3323
- else {
3324
- console.log(`Active Jobs (${jobs.length}):\n`);
3325
- for (const job of jobs) {
3326
- console.log(`ID: ${job.id}`);
3327
- console.log(`Type: ${job.type}`);
3328
- console.log(`Status: ${job.status}`);
3329
- console.log(`URL: ${job.url}`);
3330
- console.log(`Created: ${job.createdAt}`);
3331
- console.log('---');
3332
- }
3333
- }
3334
- }
3335
- process.exit(0);
3336
- }
3337
- catch (error) {
3338
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
3339
- process.exit(1);
3340
- }
3341
- });
3342
- // Job command - get job status
3343
- program
3344
- .command('job <id>')
3345
- .description('Get status of a specific job')
3346
- .option('--json', 'Output as JSON')
3347
- .action(async (id, options) => {
3348
- try {
3349
- const config = loadConfig();
3350
- if (!config.apiKey) {
3351
- console.error('Error: API key required. Run `webpeel login` first.');
3352
- process.exit(1);
3353
- }
3354
- const { fetch: undiciFetch } = await import('undici');
3355
- const response = await undiciFetch(`${process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev'}/v1/jobs/${id}`, {
3356
- headers: {
3357
- 'Authorization': `Bearer ${config.apiKey}`,
3358
- },
3359
- });
3360
- if (!response.ok) {
3361
- throw new Error(`API error: HTTP ${response.status}`);
3362
- }
3363
- const job = await response.json();
3364
- if (options.json) {
3365
- console.log(JSON.stringify(job, null, 2));
3366
- }
3367
- else {
3368
- console.log(`Job ID: ${job.id}`);
3369
- console.log(`Type: ${job.type}`);
3370
- console.log(`Status: ${job.status}`);
3371
- console.log(`URL: ${job.url}`);
3372
- console.log(`Created: ${job.createdAt}`);
3373
- if (job.completedAt) {
3374
- console.log(`Completed: ${job.completedAt}`);
3375
- }
3376
- if (job.error) {
3377
- console.log(`Error: ${job.error}`);
3378
- }
3379
- if (job.results) {
3380
- console.log(`\nResults: ${job.results.length} items`);
3381
- if (job.type === 'crawl' && job.results.length > 0) {
3382
- console.log('\nFirst 5 URLs:');
3383
- for (const result of job.results.slice(0, 5)) {
3384
- console.log(` - ${result.url}`);
3385
- }
3386
- }
3387
- }
3388
- }
3389
- process.exit(0);
3390
- }
3391
- catch (error) {
3392
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
3393
- process.exit(1);
3394
- }
3395
- });
3396
- // Answer command - search + fetch + LLM-generated answer
3397
- program
3398
- .command('answer <question>')
3399
- .description('Ask a question, search the web, and get an AI-generated answer with citations (BYOK)')
3400
- .option('--provider <provider>', 'Search provider: duckduckgo (default) or brave')
3401
- .option('--search-api-key <key>', 'Search provider API key (or env WEBPEEL_BRAVE_API_KEY)')
3402
- .option('--llm <provider>', 'LLM provider: openai, anthropic, or google (required)')
3403
- .option('--llm-api-key <key>', 'LLM API key (or env OPENAI_API_KEY / ANTHROPIC_API_KEY / GOOGLE_API_KEY)')
3404
- .option('--llm-model <model>', 'LLM model name (optional, uses provider default)')
3405
- .option('--max-sources <n>', 'Maximum sources to fetch (1-10, default 5)', '5')
3406
- .option('--json', 'Output as JSON')
3407
- .option('-s, --silent', 'Silent mode')
3408
- .action(async (question, options) => {
3409
- const spinner = options.silent ? null : ora('Thinking...').start();
3410
- try {
3411
- const { answerQuestion } = await import('./core/answer.js');
3412
- const config = loadConfig();
3413
- const llmProvider = options.llm;
3414
- if (!llmProvider || !['openai', 'anthropic', 'google'].includes(llmProvider)) {
3415
- console.error('Error: --llm is required (openai, anthropic, or google)');
3416
- process.exit(1);
3417
- }
3418
- const llmApiKey = options.llmApiKey
3419
- || process.env.OPENAI_API_KEY
3420
- || process.env.ANTHROPIC_API_KEY
3421
- || process.env.GOOGLE_API_KEY
3422
- || '';
3423
- if (!llmApiKey) {
3424
- console.error('Error: --llm-api-key is required (or set OPENAI_API_KEY / ANTHROPIC_API_KEY / GOOGLE_API_KEY)');
3425
- process.exit(1);
3426
- }
3427
- const searchProvider = (options.provider || 'duckduckgo');
3428
- const searchApiKey = options.searchApiKey
3429
- || process.env.WEBPEEL_BRAVE_API_KEY
3430
- || config.braveApiKey
3431
- || undefined;
3432
- const maxSources = Math.min(Math.max(parseInt(options.maxSources) || 5, 1), 10);
3433
- if (spinner)
3434
- spinner.text = 'Searching the web...';
3435
- const result = await answerQuestion({
3436
- question,
3437
- searchProvider,
3438
- searchApiKey,
3439
- llmProvider,
3440
- llmApiKey,
3441
- llmModel: options.llmModel,
3442
- maxSources,
3443
- stream: false,
3444
- });
3445
- if (spinner)
3446
- spinner.succeed('Done');
3447
- if (options.json) {
3448
- const jsonStr = JSON.stringify(result, null, 2);
3449
- await new Promise((resolve, reject) => {
3450
- process.stdout.write(jsonStr + '\n', (err) => {
3451
- if (err)
3452
- reject(err);
3453
- else
3454
- resolve();
3455
- });
3456
- });
3457
- }
3458
- else {
3459
- console.log(`\n${result.answer}`);
3460
- console.log(`\nSources:`);
3461
- result.citations.forEach((c, i) => {
3462
- console.log(` [${i + 1}] ${c.title}`);
3463
- console.log(` ${c.url}`);
3464
- });
3465
- console.log(`\nModel: ${result.llmModel} (${result.llmProvider})`);
3466
- }
3467
- await cleanup();
3468
- process.exit(0);
3469
- }
3470
- catch (error) {
3471
- if (spinner)
3472
- spinner.fail('Answer generation failed');
3473
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
3474
- await cleanup();
3475
- process.exit(1);
3476
- }
3477
- });
3478
- // Screenshot command
3479
- program
3480
- .command('screenshot <url>')
3481
- .alias('snap')
3482
- .description('Take a screenshot of a URL and save as PNG/JPEG')
3483
- .option('--full-page', 'Capture full page (not just viewport)')
3484
- .option('--width <px>', 'Viewport width in pixels (default: 1280)', parseInt)
3485
- .option('--height <px>', 'Viewport height in pixels (default: 720)', parseInt)
3486
- .option('--format <fmt>', 'Image format: png (default) or jpeg', 'png')
3487
- .option('--quality <n>', 'JPEG quality 1-100 (ignored for PNG)', parseInt)
3488
- .option('-w, --wait <ms>', 'Wait time after page load (ms)', parseInt)
3489
- .option('-t, --timeout <ms>', 'Request timeout (ms)', (v) => parseInt(v, 10), 30000)
3490
- .option('--stealth', 'Use stealth mode to bypass bot detection')
3491
- .option('--action <actions...>', 'Page actions before screenshot (e.g., "click:.btn" "wait:2000")')
3492
- .option('--scroll-through', 'Auto-scroll page before screenshot (triggers lazy content + scroll animations)')
3493
- .option('-o, --output <path>', 'Output file path (default: screenshot.png)')
3494
- .option('-s, --silent', 'Silent mode (no spinner)')
3495
- .option('--json', 'Output base64 JSON instead of binary file')
3496
- .action(async (url, options) => {
3497
- // Validate URL
3498
- try {
3499
- const parsed = new URL(url);
3500
- if (!['http:', 'https:'].includes(parsed.protocol)) {
3501
- console.error('Error: Only HTTP and HTTPS protocols are allowed');
3502
- process.exit(1);
3503
- }
3504
- }
3505
- catch {
3506
- console.error(`Error: Invalid URL format: ${url}`);
3507
- process.exit(1);
3508
- }
3509
- // Check usage quota
3510
- const usageCheck = await checkUsage();
3511
- if (!usageCheck.allowed) {
3512
- console.error(usageCheck.message);
3513
- process.exit(1);
3514
- }
3515
- const spinner = options.silent ? null : ora('Taking screenshot...').start();
3516
- try {
3517
- // Validate format
3518
- const format = options.format?.toLowerCase();
3519
- if (format && !['png', 'jpeg', 'jpg'].includes(format)) {
3520
- console.error('Error: --format must be png, jpeg, or jpg');
3521
- process.exit(1);
3522
- }
3523
- // Parse actions
3524
- let actions;
3525
- if (options.action && options.action.length > 0) {
3526
- try {
3527
- actions = parseActions(options.action);
3528
- }
3529
- catch (e) {
3530
- console.error(`Error: ${e.message}`);
3531
- process.exit(1);
3532
- }
3533
- }
3534
- const { takeScreenshot } = await import('./core/screenshot.js');
3535
- const result = await takeScreenshot(url, {
3536
- fullPage: options.fullPage || false,
3537
- width: options.width,
3538
- height: options.height,
3539
- format: format || 'png',
3540
- quality: options.quality,
3541
- waitFor: options.wait,
3542
- timeout: options.timeout,
3543
- stealth: options.stealth || false,
3544
- actions,
3545
- scrollThrough: options.scrollThrough || false,
3546
- });
3547
- if (spinner) {
3548
- spinner.succeed(`Screenshot taken (${result.format})`);
3549
- }
3550
- // Show usage footer for free/anonymous users
3551
- if (usageCheck.usageInfo && !options.silent) {
3552
- showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, true);
3553
- }
3554
- if (options.json) {
3555
- // Output JSON with base64
3556
- const jsonStr = JSON.stringify({
3557
- url: result.url,
3558
- format: result.format,
3559
- contentType: result.contentType,
3560
- screenshot: result.screenshot,
3561
- }, null, 2);
3562
- await new Promise((resolve, reject) => {
3563
- process.stdout.write(jsonStr + '\n', (err) => {
3564
- if (err)
3565
- reject(err);
3566
- else
3567
- resolve();
3568
- });
3569
- });
3570
- }
3571
- else {
3572
- // Save to file
3573
- const ext = result.format === 'jpeg' ? 'jpg' : 'png';
3574
- const outputPath = options.output || `screenshot.${ext}`;
3575
- const buffer = Buffer.from(result.screenshot, 'base64');
3576
- writeFileSync(outputPath, buffer);
3577
- if (!options.silent) {
3578
- console.error(`Screenshot saved to: ${outputPath} (${(buffer.length / 1024).toFixed(1)} KB)`);
3579
- }
3580
- }
3581
- await cleanup();
3582
- process.exit(0);
3583
- }
3584
- catch (error) {
3585
- if (spinner) {
3586
- spinner.fail('Screenshot failed');
3587
- }
3588
- if (error instanceof Error) {
3589
- console.error(`\nError: ${error.message}`);
3590
- }
3591
- else {
3592
- console.error('\nError: Unknown error occurred');
3593
- }
3594
- await cleanup();
3595
- process.exit(1);
3596
- }
3597
- });
3598
- // ── Top-level Apply command group ──────────────────────────────────────────
3599
- //
3600
- // webpeel apply <url> — submit a job application
3601
- // webpeel apply init — interactive profile setup wizard
3602
- // webpeel apply status — show application stats
3603
- // webpeel apply list — list tracked applications (with filters)
3604
- // webpeel apply rate — show rate-governor status
3605
- const applyCmd = program
3606
- .command('apply')
3607
- .description('Auto-apply pipeline: submit applications, track history, manage rate limits');
3608
- // apply <url> — auto-apply to a job posting
3609
- applyCmd
3610
- .command('submit <url>')
3611
- .description('Auto-apply to a job posting')
3612
- .alias('s')
3613
- .option('--profile-path <path>', 'Path to apply profile JSON', `${process.env.HOME ?? '~'}/.webpeel/profile.json`)
3614
- .option('--browser-profile <path>', 'Path to persistent browser data dir', `${process.env.HOME ?? '~'}/.webpeel/browser-profile`)
3615
- .option('--headed', 'Run browser visibly (default for apply)')
3616
- .option('--headless', 'Run browser invisibly')
3617
- .option('--confirm', 'Pause for confirmation before submit (default: true)')
3618
- .option('--no-confirm', 'Skip confirmation, auto-submit')
3619
- .option('--dry-run', 'Go through flow but do not submit')
3620
- .option('--generate-cover', 'Generate tailored cover letter (needs OPENAI_API_KEY)')
3621
- .option('--timeout <ms>', 'Timeout in ms (default: 300000)', '300000')
3622
- .option('--json', 'Output result as JSON')
3623
- .option('--silent', 'Silent mode')
3624
- .action(async (url, options) => {
3625
- const isSilent = options.silent;
3626
- const isJson = options.json;
3627
- // Load profile
3628
- const profilePath = options.profilePath;
3629
- let profile;
3630
- try {
3631
- const raw = readFileSync(profilePath, 'utf-8');
3632
- profile = JSON.parse(raw);
3633
- }
3634
- catch {
3635
- const msg = `Could not load profile from ${profilePath}. Run "webpeel apply init" to create one.`;
3636
- if (isJson) {
3637
- await writeStdout(JSON.stringify({ success: false, error: { type: 'fetch_failed', message: msg } }) + '\n');
3638
- }
3639
- else {
3640
- console.error(`Error: ${msg}`);
3641
- }
3642
- process.exit(1);
3643
- }
3644
- const spinner = isSilent ? null : ora('Applying...').start();
3645
- try {
3646
- const { applyToJob } = await import('./core/apply.js');
3647
- const result = await applyToJob({
3648
- url,
3649
- profile,
3650
- // Use sessionDir for persistent session storage (renamed from browserProfile)
3651
- sessionDir: options.browserProfile,
3652
- // Map dryRun flag → mode: 'dry-run'
3653
- mode: (options.dryRun ? 'dry-run' : (options.noConfirm ? 'auto' : 'review')),
3654
- timeout: parseInt(options.timeout, 10) || 300_000,
3655
- });
3656
- if (spinner)
3657
- spinner.stop();
3658
- // Normalize result to a consistent output shape
3659
- const success = result.submitted && !result.error;
3660
- const jobTitle = result.job?.title ?? '';
3661
- const jobCompany = result.job?.company ?? '';
3662
- if (isJson) {
3663
- await writeStdout(JSON.stringify(result, null, 2) + '\n');
3664
- process.exit(success ? 0 : 1);
3665
- }
3666
- const icon = success ? '✅' : '❌';
3667
- console.log(`\n${icon} ${success ? 'Application submitted!' : `Failed: ${result.error ?? 'Unknown error'}`}`);
3668
- if (jobTitle)
3669
- console.log(` ${jobTitle}${jobCompany ? ` @ ${jobCompany}` : ''}`);
3670
- if (options.dryRun)
3671
- console.log(' (Dry run — not submitted)');
3672
- console.log(` Time: ${(result.elapsed / 1000).toFixed(1)}s\n`);
3673
- process.exit(success ? 0 : 1);
3674
- }
3675
- catch (error) {
3676
- if (spinner)
3677
- spinner.fail('Application failed');
3678
- const msg = error instanceof Error ? error.message : 'Unknown error';
3679
- if (isJson) {
3680
- await writeStdout(JSON.stringify({ success: false, error: { type: 'fetch_failed', message: msg } }) + '\n');
3681
- }
3682
- else {
3683
- console.error(`Error: ${msg}`);
3684
- }
3685
- process.exit(1);
3686
- }
3687
- });
3688
- // apply init — interactive profile setup
3689
- applyCmd
3690
- .command('init')
3691
- .description('Interactive profile setup — creates ~/.webpeel/profile.json')
3692
- .action(async () => {
3693
- const { createInterface } = await import('readline');
3694
- const rl = createInterface({ input: process.stdin, output: process.stdout });
3695
- const ask = (q) => new Promise((resolve) => rl.question(q, (ans) => resolve(ans.trim())));
3696
- console.log('\n🤖 WebPeel Apply Setup — Create your applicant profile\n');
3697
- console.log('This creates ~/.webpeel/profile.json used by "webpeel apply submit".\n');
3698
- try {
3699
- const name = await ask('Full name: ');
3700
- const email = await ask('Email address: ');
3701
- const phone = await ask('Phone number (optional): ');
3702
- const resumePath = await ask('Path to resume PDF (e.g. /Users/you/resume.pdf): ');
3703
- const currentTitle = await ask('Current/most recent job title: ');
3704
- const yearsExp = await ask('Years of experience: ');
3705
- const skills = await ask('Skills (comma-separated, e.g. TypeScript, React, Node.js): ');
3706
- const education = await ask('Education (e.g. B.S. Computer Science, MIT): ');
3707
- const location = await ask('City, State (e.g. San Francisco, CA): ');
3708
- const workAuth = await ask('Work authorization (e.g. US Citizen, Permanent Resident, H-1B, Need Sponsorship): ');
3709
- const linkedinUrl = await ask('LinkedIn URL (optional): ');
3710
- const websiteUrl = await ask('Portfolio/website URL (optional): ');
3711
- const desiredSalary = await ask('Desired salary (optional, e.g. $150,000): ');
3712
- rl.close();
3713
- const { mkdirSync: mk, writeFileSync: wf } = await import('fs');
3714
- const { join: j } = await import('path');
3715
- const { homedir: hd } = await import('os');
3716
- const webpeelDir = j(hd(), '.webpeel');
3717
- mk(webpeelDir, { recursive: true });
3718
- const profile = {
3719
- name,
3720
- email,
3721
- ...(phone ? { phone } : {}),
3722
- resumePath,
3723
- currentTitle,
3724
- yearsExperience: parseInt(yearsExp, 10) || 0,
3725
- skills: skills.split(',').map((s) => s.trim()).filter(Boolean),
3726
- education,
3727
- location,
3728
- workAuthorization: workAuth,
3729
- ...(linkedinUrl ? { linkedinUrl } : {}),
3730
- ...(websiteUrl ? { websiteUrl } : {}),
3731
- ...(desiredSalary ? { desiredSalary } : {}),
3732
- };
3733
- const profilePath = j(webpeelDir, 'profile.json');
3734
- wf(profilePath, JSON.stringify(profile, null, 2), 'utf-8');
3735
- console.log(`\n✅ Profile saved to: ${profilePath}`);
3736
- console.log('\nNext steps:');
3737
- console.log(' • Apply to a job: webpeel apply submit <url>');
3738
- console.log(' • Dry run first: webpeel apply submit <url> --dry-run');
3739
- console.log(' • View stats: webpeel apply status\n');
3740
- }
3741
- catch (error) {
3742
- rl.close();
3743
- console.error(`\nError: ${error instanceof Error ? error.message : 'Unknown error'}`);
3744
- process.exit(1);
3745
- }
3746
- });
3747
- // apply status — application stats summary
3748
- applyCmd
3749
- .command('status')
3750
- .description('Show application stats')
3751
- .option('--json', 'Output as JSON')
3752
- .action(async (options) => {
3753
- try {
3754
- const { ApplicationTracker } = await import('./core/application-tracker.js');
3755
- const tracker = new ApplicationTracker();
3756
- const stats = tracker.stats();
3757
- if (options.json) {
3758
- await writeStdout(JSON.stringify(stats, null, 2) + '\n');
3759
- process.exit(0);
3760
- }
3761
- console.log('\n📊 Application Stats\n');
3762
- console.log(` Total: ${stats.total}`);
3763
- console.log(` Today: ${stats.today}`);
3764
- console.log(` This week: ${stats.thisWeek}`);
3765
- if (Object.keys(stats.byPlatform).length > 0) {
3766
- console.log('\n By Platform:');
3767
- for (const [platform, count] of Object.entries(stats.byPlatform)) {
3768
- console.log(` ${platform.padEnd(12)} ${count}`);
3769
- }
3770
- }
3771
- if (Object.keys(stats.byStatus).length > 0) {
3772
- console.log('\n By Status:');
3773
- for (const [status, count] of Object.entries(stats.byStatus)) {
3774
- console.log(` ${status.padEnd(12)} ${count}`);
3775
- }
3776
- }
3777
- console.log('');
3778
- process.exit(0);
3779
- }
3780
- catch (error) {
3781
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
3782
- process.exit(1);
3783
- }
3784
- });
3785
- // apply list — list applications with optional filters
3786
- applyCmd
3787
- .command('list')
3788
- .description('List tracked applications')
3789
- .option('--platform <platform>', 'Filter by platform (e.g. linkedin, upwork)')
3790
- .option('--status <status>', 'Filter by status (applied, interview, rejected, offer, ...)')
3791
- .option('--since <date>', 'Filter to applications on or after this date (YYYY-MM-DD)')
3792
- .option('--json', 'Output as JSON')
3793
- .option('--limit <n>', 'Max records to show (default: 50)', '50')
3794
- .action(async (options) => {
3795
- try {
3796
- const { ApplicationTracker } = await import('./core/application-tracker.js');
3797
- const tracker = new ApplicationTracker();
3798
- const limit = parseInt(options.limit, 10) || 50;
3799
- const records = tracker.list({
3800
- platform: options.platform,
3801
- status: options.status,
3802
- since: options.since,
3803
- }).slice(0, limit);
3804
- if (options.json) {
3805
- await writeStdout(JSON.stringify(records, null, 2) + '\n');
3806
- process.exit(0);
3807
- }
3808
- if (records.length === 0) {
3809
- console.log('\nNo applications found.\n');
3810
- process.exit(0);
3811
- }
3812
- console.log(`\n📋 Applications (${records.length})\n`);
3813
- const colDate = 12;
3814
- const colStatus = 10;
3815
- const colTitle = 35;
3816
- const colCompany = 20;
3817
- const pad = (s, w) => s.length > w ? s.slice(0, w - 1) + '…' : s.padEnd(w);
3818
- console.log(` ${'Date'.padEnd(colDate)} ${'Status'.padEnd(colStatus)} ${'Title'.padEnd(colTitle)} ${'Company'.padEnd(colCompany)}`);
3819
- console.log(` ${'-'.repeat(colDate)} ${'-'.repeat(colStatus)} ${'-'.repeat(colTitle)} ${'-'.repeat(colCompany)}`);
3820
- for (const r of records) {
3821
- const dateStr = r.appliedAt.slice(0, 10);
3822
- console.log(` ${pad(dateStr, colDate)} ${pad(r.status, colStatus)} ${pad(r.title, colTitle)} ${pad(r.company, colCompany)}`);
3823
- }
3824
- console.log('');
3825
- process.exit(0);
3826
- }
3827
- catch (error) {
3828
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
3829
- process.exit(1);
3830
- }
3831
- });
3832
- // apply rate — rate governor status
3833
- applyCmd
3834
- .command('rate')
3835
- .description('Show rate governor status (daily limits, cooldown, next allowed time)')
3836
- .option('--json', 'Output as JSON')
3837
- .option('--reset-cooldown', 'Clear any active cooldown (manual override)')
3838
- .action(async (options) => {
3839
- try {
3840
- const { RateGovernor, formatDuration } = await import('./core/rate-governor.js');
3841
- const governor = new RateGovernor();
3842
- if (options.resetCooldown) {
3843
- governor.resetCooldown();
3844
- console.log('✅ Cooldown cleared.');
3845
- process.exit(0);
3846
- }
3847
- const state = governor.getState();
3848
- const config = governor.getConfig();
3849
- const check = governor.canApply();
3850
- if (options.json) {
3851
- await writeStdout(JSON.stringify({
3852
- state,
3853
- config,
3854
- canApply: check.allowed,
3855
- reason: check.reason,
3856
- waitMs: check.waitMs,
3857
- nextDelayMs: governor.getNextDelay(),
3858
- }, null, 2) + '\n');
3859
- process.exit(0);
3860
- }
3861
- console.log('\n⏱ Rate Governor Status\n');
3862
- console.log(` Today's applications: ${state.todayCount} / ${config.maxPerDay}`);
3863
- console.log(` Total applications: ${state.totalApplications}`);
3864
- console.log(` Can apply now: ${check.allowed ? '✅ Yes' : '❌ No'}`);
3865
- if (!check.allowed && check.reason) {
3866
- console.log(` Reason: ${check.reason}`);
3867
- }
3868
- if (!check.allowed && check.waitMs) {
3869
- console.log(` Wait time: ${formatDuration(check.waitMs)}`);
3870
- }
3871
- if (state.cooldownUntil > 0) {
3872
- const remaining = state.cooldownUntil - Date.now();
3873
- console.log(` Cooldown: Active (${formatDuration(Math.max(0, remaining))} remaining)`);
3874
- }
3875
- console.log(` Min delay: ${formatDuration(config.minDelayMs)}`);
3876
- console.log(` Max delay: ${formatDuration(config.maxDelayMs)}`);
3877
- console.log(` Active hours: ${config.activeHours[0]}:00 – ${config.activeHours[1]}:00`);
3878
- console.log(` Weekdays only: ${config.weekdaysOnly ? 'Yes' : 'No'}`);
3879
- console.log('');
3880
- process.exit(0);
3881
- }
3882
- catch (error) {
3883
- console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
3884
- process.exit(1);
3885
- }
3886
- });
3887
- // ============================================================
3888
- // Profile management commands
3889
- // ============================================================
3890
- const profileCmd = program
3891
- .command('profile')
3892
- .description('Manage named browser profiles (saved login sessions)');
3893
- profileCmd
3894
- .command('create <name>')
3895
- .description('Create a new profile interactively (launches browser, log in, press Ctrl+C when done)')
3896
- .option('--description <text>', 'Optional description for this profile')
3897
- .action(async (name, opts) => {
3898
- try {
3899
- await createProfile(name, opts.description);
3900
- process.exit(0);
3901
- }
3902
- catch (error) {
3903
- console.error(`Error: ${error instanceof Error ? error.message : String(error)}`);
3904
- process.exit(1);
3905
- }
3906
- });
3907
- profileCmd
3908
- .command('list')
3909
- .description('List all saved browser profiles')
3910
- .action(() => {
3911
- const profiles = listProfiles();
3912
- if (profiles.length === 0) {
3913
- console.log('No profiles found.');
3914
- console.log('');
3915
- console.log('Create one with:');
3916
- console.log(' webpeel profile create <name>');
3917
- console.log('');
3918
- console.log('Then use it with:');
3919
- console.log(' webpeel <url> --profile <name>');
3920
- process.exit(0);
3921
- }
3922
- console.log('');
3923
- console.log('Saved profiles:');
3924
- console.log('');
3925
- // Column widths
3926
- const nameW = Math.max(8, ...profiles.map((p) => p.name.length));
3927
- const domainsW = Math.max(10, ...profiles.map((p) => (p.domains.join(', ') || '(none)').length));
3928
- const header = 'Name'.padEnd(nameW) + ' ' +
3929
- 'Domains'.padEnd(domainsW) + ' ' +
3930
- 'Last Used'.padEnd(12) + ' ' +
3931
- 'Created';
3932
- console.log(header);
3933
- console.log('─'.repeat(header.length + 4));
3934
- for (const p of profiles) {
3935
- const domainsStr = p.domains.length > 0 ? p.domains.join(', ') : '(none)';
3936
- const lastUsed = formatRelativeTime(new Date(p.lastUsed));
3937
- const created = new Date(p.created).toISOString().split('T')[0];
3938
- console.log(p.name.padEnd(nameW) + ' ' +
3939
- domainsStr.padEnd(domainsW) + ' ' +
3940
- lastUsed.padEnd(12) + ' ' +
3941
- created);
3942
- }
3943
- console.log('');
3944
- process.exit(0);
3945
- });
3946
- profileCmd
3947
- .command('show <name>')
3948
- .description('Show details for a profile')
3949
- .action((name) => {
3950
- const profilePath = getProfilePath(name);
3951
- if (!profilePath) {
3952
- console.error(`Error: Profile "${name}" not found.`);
3953
- console.error('Run "webpeel profile list" to see available profiles.');
3954
- process.exit(1);
3955
- }
3956
- try {
3957
- const meta = JSON.parse(readFileSync(`${profilePath}/metadata.json`, 'utf-8'));
3958
- console.log('');
3959
- console.log(`Profile: ${meta.name}`);
3960
- if (meta.description)
3961
- console.log(`Description: ${meta.description}`);
3962
- console.log(`Created: ${new Date(meta.created).toLocaleString()}`);
3963
- console.log(`Last used: ${new Date(meta.lastUsed).toLocaleString()}`);
3964
- console.log(`Domains: ${meta.domains.length > 0 ? meta.domains.join(', ') : '(none)'}`);
3965
- console.log(`Directory: ${profilePath}`);
3966
- console.log('');
3967
- process.exit(0);
3968
- }
3969
- catch (e) {
3970
- console.error(`Error reading profile: ${e instanceof Error ? e.message : String(e)}`);
3971
- process.exit(1);
3972
- }
3973
- });
3974
- profileCmd
3975
- .command('delete <name>')
3976
- .description('Delete a saved profile')
3977
- .action((name) => {
3978
- const deleted = deleteProfile(name);
3979
- if (deleted) {
3980
- console.log(`Profile "${name}" deleted.`);
3981
- process.exit(0);
3982
- }
3983
- else {
3984
- console.error(`Error: Profile "${name}" not found.`);
3985
- console.error('Run "webpeel profile list" to see available profiles.');
3986
- process.exit(1);
3987
- }
3988
- });
3989
- // ── Hotels command ─────────────────────────────────────────────────────────────
3990
- program
3991
- .command('hotels <destination>')
3992
- .description('Search multiple travel sites for hotels (Kayak, Booking.com, Google Travel)')
3993
- .option('--checkin <date>', 'Check-in date (ISO or relative, e.g. "tomorrow", "2026-02-20"). Default: tomorrow')
3994
- .option('--checkout <date>', 'Check-out date (ISO or relative). Default: checkin + 1 day')
3995
- .option('--sort <method>', 'Sort by: price, rating, value (default: price)', 'price')
3996
- .option('--limit <n>', 'Max results (default: 20)', '20')
3997
- .option('--source <name...>', 'Only use specific source(s): kayak, booking, google (repeatable)')
3998
- .option('--json', 'Output as JSON')
3999
- .option('--stealth', 'Use stealth mode for all sources')
4000
- .option('--proxy <url>', 'Proxy URL for requests (http://host:port, socks5://user:pass@host:port)')
4001
- .option('-s, --silent', 'Suppress progress messages')
4002
- .action(async (destination, options) => {
4003
- const isJson = options.json;
4004
- const isSilent = options.silent;
4005
- // Build checkin/checkout
4006
- const { parseDate, addDays: hotelAddDays } = await import('./core/hotel-search.js');
4007
- let checkinStr;
4008
- let checkoutStr;
4009
- try {
4010
- checkinStr = parseDate(options.checkin ?? 'tomorrow');
4011
- checkoutStr = options.checkout
4012
- ? parseDate(options.checkout)
4013
- : hotelAddDays(checkinStr, 1);
4014
- }
4015
- catch (err) {
4016
- const msg = err instanceof Error ? err.message : String(err);
4017
- if (isJson) {
4018
- await writeStdout(JSON.stringify({ success: false, error: { type: 'invalid_request', message: msg } }) + '\n');
4019
- }
4020
- else {
4021
- console.error(`Error: ${msg}`);
4022
- }
4023
- process.exit(1);
4024
- }
4025
- const sortMethod = (['price', 'rating', 'value'].includes(options.sort)
4026
- ? options.sort
4027
- : 'price');
4028
- const limit = Math.max(1, parseInt(options.limit, 10) || 20);
4029
- const sources = options.source
4030
- ? (Array.isArray(options.source) ? options.source : [options.source])
4031
- : undefined;
4032
- // Spinner per-source progress (non-silent, non-JSON)
4033
- let searchSpinner = null;
4034
- if (!isSilent && !isJson) {
4035
- searchSpinner = ora(`Searching hotels in ${destination}...`).start();
4036
- }
4037
- else if (!isSilent && !isJson) {
4038
- console.error(`⏳ Searching kayak.com...`);
4039
- console.error(`⏳ Searching booking.com...`);
4040
- console.error(`⏳ Searching google.com...`);
4041
- }
4042
- try {
4043
- const { searchHotels } = await import('./core/hotel-search.js');
4044
- const result = await searchHotels({
4045
- destination,
4046
- checkin: checkinStr,
4047
- checkout: checkoutStr,
4048
- sort: sortMethod,
4049
- limit,
4050
- sources,
4051
- stealth: options.stealth,
4052
- silent: isSilent,
4053
- proxy: options.proxy,
4054
- });
4055
- if (searchSpinner)
4056
- searchSpinner.stop();
4057
- // Show per-source status
4058
- if (!isSilent && !isJson) {
4059
- for (const src of result.sources) {
4060
- if (src.status === 'ok') {
4061
- console.error(`✅ ${src.name}: ${src.count} hotels found`);
4062
- }
4063
- else {
4064
- console.error(`❌ ${src.name}: ${src.status}${src.error ? ' — ' + src.error : ''}`);
4065
- }
4066
- }
4067
- }
4068
- if (isJson) {
4069
- await writeStdout(JSON.stringify(result, null, 2) + '\n');
4070
- await cleanup();
4071
- process.exit(0);
4072
- }
4073
- // Human-readable table output
4074
- const { formatDate: fmtDate } = {
4075
- formatDate: (iso) => {
4076
- const d = new Date(iso + 'T12:00:00Z');
4077
- return d.toLocaleDateString('en-US', { month: 'short', day: 'numeric', year: 'numeric', timeZone: 'UTC' });
4078
- },
4079
- };
4080
- const ci = fmtDate(result.checkin);
4081
- const co = fmtDate(result.checkout);
4082
- console.log(`\n🏨 Hotels in ${result.destination}`);
4083
- console.log(` ${ci} → ${co} | Sorted by ${sortMethod}\n`);
4084
- if (result.results.length === 0) {
4085
- console.log(' No hotels found.\n');
4086
- }
4087
- else {
4088
- const colNum = 3;
4089
- const colName = 42;
4090
- const colPrice = 8;
4091
- const colRating = 8;
4092
- const colSource = 10;
4093
- const padEnd = (s, w) => s.length > w ? s.slice(0, w - 1) + '…' : s.padEnd(w);
4094
- const padStart = (s, w) => s.padStart(w);
4095
- console.log(` ${padStart('#', colNum)} ${padEnd('Hotel', colName)} ${padEnd('Price', colPrice)} ${padEnd('Rating', colRating)} ${padEnd('Source', colSource)}`);
4096
- result.results.forEach((hotel, i) => {
4097
- const priceStr = hotel.priceDisplay || '—';
4098
- const ratingStr = hotel.rating !== null ? String(hotel.rating) : '—';
4099
- console.log(` ${padStart(String(i + 1), colNum)} ${padEnd(hotel.name, colName)} ${padEnd(priceStr, colPrice)} ${padEnd(ratingStr, colRating)} ${padEnd(hotel.source, colSource)}`);
4100
- });
4101
- console.log('');
4102
- const sourceSummary = result.sources
4103
- .map(s => `${s.name} (${s.count} ${s.status === 'ok' ? '✅' : s.status === 'blocked' ? '🚫' : '❌'})`)
4104
- .join(' | ');
4105
- console.log(`Sources: ${sourceSummary}`);
4106
- }
4107
- console.log('');
4108
- await cleanup();
4109
- process.exit(0);
4110
- }
4111
- catch (error) {
4112
- if (searchSpinner)
4113
- searchSpinner.fail('Hotel search failed');
4114
- const msg = error instanceof Error ? error.message : 'Unknown error';
4115
- if (isJson) {
4116
- await writeStdout(JSON.stringify({ success: false, error: { type: 'fetch_failed', message: msg } }) + '\n');
4117
- }
4118
- else {
4119
- console.error(`\nError: ${msg}`);
4120
- }
4121
- await cleanup();
4122
- process.exit(1);
4123
- }
4124
- });
4125
- // ============================================================
4126
- // answer command — LLM-free web Q&A (search + fetch + BM25)
4127
- // ============================================================
4128
- program
4129
- .command('webask <question>')
4130
- .alias('ask-web')
4131
- .description('Search the web and get a direct answer (no LLM key required)')
4132
- .option('-n, --sources <n>', 'Number of sources to check (1-5, default 3)', '3')
4133
- .option('--json', 'Output as JSON')
4134
- .option('-s, --silent', 'Silent mode')
4135
- .action(async (question, options) => {
4136
- const isJson = !!options.json;
4137
- const isSilent = !!options.silent;
4138
- const numSources = Math.min(Math.max(parseInt(options.sources) || 3, 1), 5);
4139
- const askCfg = loadConfig();
4140
- const askApiKey = askCfg.apiKey || process.env.WEBPEEL_API_KEY;
4141
- const askApiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
4142
- if (!askApiKey) {
4143
- console.error('No API key configured. Run: webpeel auth <your-key>');
4144
- console.error('Get a free key at: https://app.webpeel.dev/keys');
4145
- process.exit(2);
4146
- }
4147
- let spinner = null;
4148
- if (!isSilent && !isJson) {
4149
- const { default: ora } = await import('ora');
4150
- spinner = ora(`Searching for: ${question}`).start();
4151
- }
4152
- try {
4153
- const params = new URLSearchParams({ q: question, sources: String(numSources) });
4154
- const res = await fetch(`${askApiUrl}/v1/ask?${params}`, {
4155
- headers: { Authorization: `Bearer ${askApiKey}` },
4156
- signal: AbortSignal.timeout(60000),
4157
- });
4158
- if (res.status === 401) {
4159
- if (spinner)
4160
- spinner.fail('API key invalid or expired. Run: webpeel auth <new-key>');
4161
- process.exit(2);
4162
- }
4163
- if (res.status === 404) {
4164
- if (spinner)
4165
- spinner.fail('Ask endpoint not available on this server version');
4166
- process.exit(1);
4167
- }
4168
- if (!res.ok) {
4169
- const body = await res.text().catch(() => '');
4170
- if (spinner)
4171
- spinner.fail(`API error ${res.status}: ${body.slice(0, 100)}`);
4172
- process.exit(1);
4173
- }
4174
- const data = await res.json();
4175
- if (spinner) {
4176
- if (data.answer) {
4177
- spinner.succeed(`Found answer (confidence: ${Math.round((data.confidence || 0) * 100)}%)`);
4178
- }
4179
- else {
4180
- spinner.warn('No confident answer found');
4181
- }
4182
- }
4183
- if (isJson) {
4184
- console.log(JSON.stringify(data, null, 2));
4185
- }
4186
- else {
4187
- if (data.answer) {
4188
- console.log('\n' + data.answer);
4189
- if (data.sources?.length && !isSilent) {
4190
- console.log('\nSources:');
4191
- data.sources.slice(0, 3).forEach((s) => console.log(` • ${s.title || s.url} — ${s.url}`));
4192
- }
4193
- }
4194
- else {
4195
- console.log('\nNo confident answer found for:', question);
4196
- }
4197
- if (data.elapsed && !isSilent)
4198
- console.log(`\n⚡ ${data.elapsed}ms`);
4199
- }
4200
- }
4201
- catch (err) {
4202
- if (spinner)
4203
- spinner.fail(err.message);
4204
- process.exit(1);
4205
- }
4206
- });
4207
- // ============================================================
4208
- // research command — autonomous multi-step web research
4209
- // ============================================================
4210
- program
4211
- .command('research <query>')
4212
- .description('Conduct autonomous multi-step web research on a topic and synthesize a report')
4213
- .option('--max-sources <n>', 'Maximum sources to consult (default: 5)', '5')
4214
- .option('--max-depth <n>', 'Link-following depth (default: 1)', '1')
4215
- .option('--format <f>', 'Output format: report (default) or sources', 'report')
4216
- .option('--llm-key <key>', 'LLM API key for synthesis (or env OPENAI_API_KEY)')
4217
- .option('--llm-model <model>', 'LLM model for synthesis (default: gpt-4o-mini)')
4218
- .option('--llm-base-url <url>', 'LLM API base URL (default: https://api.openai.com/v1)')
4219
- .option('--timeout <ms>', 'Max research time in ms (default: 40000)', '60000')
4220
- .option('--json', 'Output result as JSON')
4221
- .option('-s, --silent', 'Suppress progress output')
4222
- .action(async (query, options) => {
4223
- const isSilent = !!options.silent;
4224
- const isJson = !!options.json;
4225
- const maxSources = parseInt(options.maxSources) || 5;
4226
- const maxDepth = parseInt(options.maxDepth) || 1;
4227
- const timeout = parseInt(options.timeout) || 60000;
4228
- const outputFormat = options.format === 'sources' ? 'sources' : 'report';
4229
- const apiKey = options.llmKey || process.env.OPENAI_API_KEY;
4230
- const model = options.llmModel;
4231
- const baseUrl = options.llmBaseUrl;
4232
- const phaseIcons = {
4233
- searching: '🔍',
4234
- fetching: '📄',
4235
- extracting: '🧠',
4236
- following: '🔗',
4237
- synthesizing: '✍️',
4238
- };
4239
- try {
4240
- const { research } = await import('./core/research.js');
4241
- const result = await research({
4242
- query,
4243
- maxSources,
4244
- maxDepth,
4245
- timeout,
4246
- outputFormat: outputFormat,
4247
- apiKey,
4248
- model,
4249
- baseUrl,
4250
- onProgress: (step) => {
4251
- if (!isSilent && !isJson) {
4252
- const icon = phaseIcons[step.phase] ?? '⚙️';
4253
- const extra = step.sourcesFound !== undefined
4254
- ? ` (found ${step.sourcesFound})`
4255
- : step.sourcesFetched !== undefined
4256
- ? ` (${step.sourcesFetched} fetched)`
4257
- : '';
4258
- process.stderr.write(`${icon} ${step.message}${extra}...\n`);
4259
- }
4260
- },
4261
- });
4262
- if (isJson) {
4263
- await writeStdout(JSON.stringify(result, null, 2) + '\n');
4264
- }
4265
- else {
4266
- await writeStdout(result.report + '\n');
4267
- if (!isSilent) {
4268
- const elapsed = (result.elapsed / 1000).toFixed(1);
4269
- const cost = result.cost !== undefined ? ` | cost: $${result.cost.toFixed(4)}` : '';
4270
- process.stderr.write(`\n📊 ${result.sourcesConsulted} sources consulted (${result.totalSourcesFound} found) | ${elapsed}s${cost}\n`);
4271
- }
4272
- }
4273
- await cleanup();
4274
- process.exit(0);
4275
- }
4276
- catch (error) {
4277
- const msg = error instanceof Error ? error.message : 'Unknown error';
4278
- if (isJson) {
4279
- await writeStdout(JSON.stringify({ success: false, error: { type: 'fetch_failed', message: msg } }) + '\n');
4280
- }
4281
- else {
4282
- console.error(`\nError: ${msg}`);
4283
- }
4284
- await cleanup();
4285
- process.exit(1);
4286
- }
4287
- });
4288
- // Schema templates listing command
4289
- program
4290
- .command('schemas')
4291
- .description('List available extraction schema templates')
4292
- .action(() => {
4293
- console.log('\nAvailable schema templates:\n');
4294
- for (const [key, template] of Object.entries(SCHEMA_TEMPLATES)) {
4295
- console.log(` ${key.padEnd(12)} ${template.description}`);
4296
- console.log(` ${''.padEnd(12)} Fields: ${Object.keys(template.fields).join(', ')}`);
4297
- console.log('');
4298
- }
4299
- console.log('Usage: webpeel "https://example.com" --schema product');
4300
- console.log(' webpeel "https://example.com" --schema \'{"field":"description"}\'');
4301
- });
4302
- // ── design-compare command ─────────────────────────────────────────────────────
4303
- //
4304
- // webpeel design-compare "https://subject.com" --ref "https://reference.com"
4305
- program
4306
- .command('design-compare <url>')
4307
- .description('Compare the design of a subject URL against a reference URL')
4308
- .option('--ref <url>', 'Reference URL to compare against (required)')
4309
- .option('--width <px>', 'Viewport width in pixels (default: 1440)', parseInt)
4310
- .option('--height <px>', 'Viewport height in pixels (default: 900)', parseInt)
4311
- .option('-o, --output <path>', 'Save comparison report to a JSON file')
4312
- .option('-s, --silent', 'Silent mode (no spinner)')
4313
- .option('--json', 'Output comparison as JSON to stdout')
4314
- .action(async (url, options) => {
4315
- // Validate subject URL
4316
- try {
4317
- const parsed = new URL(url);
4318
- if (!['http:', 'https:'].includes(parsed.protocol)) {
4319
- console.error('Error: Only HTTP and HTTPS protocols are allowed');
4320
- process.exit(1);
4321
- }
4322
- }
4323
- catch {
4324
- console.error(`Error: Invalid URL format: ${url}`);
4325
- process.exit(1);
4326
- }
4327
- // Validate --ref
4328
- if (!options.ref) {
4329
- console.error('Error: --ref <url> is required');
4330
- process.exit(1);
4331
- }
4332
- try {
4333
- const parsedRef = new URL(options.ref);
4334
- if (!['http:', 'https:'].includes(parsedRef.protocol)) {
4335
- console.error('Error: --ref must be an HTTP or HTTPS URL');
4336
- process.exit(1);
4337
- }
4338
- }
4339
- catch {
4340
- console.error(`Error: Invalid --ref URL format: ${options.ref}`);
4341
- process.exit(1);
4342
- }
4343
- const ora = (await import('ora')).default;
4344
- const spinner = options.silent ? null : ora(`Comparing designs: ${url} vs ${options.ref}...`).start();
4345
- try {
4346
- const { takeDesignComparison } = await import('./core/screenshot.js');
4347
- const result = await takeDesignComparison(url, options.ref, {
4348
- width: options.width,
4349
- height: options.height,
4350
- });
4351
- if (spinner)
4352
- spinner.succeed('Design comparison complete');
4353
- const { comparison } = result;
4354
- const output = {
4355
- subjectUrl: result.subjectUrl,
4356
- referenceUrl: result.referenceUrl,
4357
- score: comparison.score,
4358
- summary: comparison.summary,
4359
- gaps: comparison.gaps,
4360
- subjectAnalysis: comparison.subjectAnalysis,
4361
- referenceAnalysis: comparison.referenceAnalysis,
4362
- };
4363
- if (options.output) {
4364
- const { writeFileSync } = await import('fs');
4365
- writeFileSync(options.output, JSON.stringify(output, null, 2));
4366
- if (!options.silent)
4367
- console.error(`Report saved to: ${options.output}`);
4368
- }
4369
- if (options.json || !options.output) {
4370
- const jsonStr = JSON.stringify(output, null, 2);
4371
- await new Promise((resolve, reject) => {
4372
- process.stdout.write(jsonStr + '\n', (err) => {
4373
- if (err)
4374
- reject(err);
4375
- else
4376
- resolve();
4377
- });
4378
- });
4379
- }
4380
- else if (!options.silent) {
4381
- // Human-readable summary
4382
- console.log(`\n🎨 Design Comparison`);
4383
- console.log(`Subject: ${result.subjectUrl}`);
4384
- console.log(`Reference: ${result.referenceUrl}`);
4385
- console.log(`Score: ${comparison.score}/10`);
4386
- console.log(`\n${comparison.summary}`);
4387
- if (comparison.gaps.length > 0) {
4388
- console.log(`\nGaps (${comparison.gaps.length}):`);
4389
- for (const gap of comparison.gaps) {
4390
- const sev = gap.severity === 'high' ? '🔴' : gap.severity === 'medium' ? '🟡' : '🟢';
4391
- console.log(` ${sev} ${gap.property}: ${gap.description}`);
4392
- console.log(` Subject: ${gap.subject}`);
4393
- console.log(` Reference: ${gap.reference}`);
4394
- console.log(` Suggestion: ${gap.suggestion}`);
4395
- }
4396
- }
4397
- }
4398
- }
4399
- catch (error) {
4400
- if (spinner)
4401
- spinner.fail('Design comparison failed');
4402
- console.error(`Error: ${error.message}`);
4403
- process.exit(1);
4404
- }
4405
- });
56
+ // ── Update check (non-blocking, background) ───────────────────────────────────
57
+ void checkForUpdates();
58
+ // ── Register all command groups ───────────────────────────────────────────────
59
+ registerFetchCommands(program);
60
+ registerSearchCommands(program);
61
+ registerInteractCommands(program);
62
+ registerAuthCommands(program);
63
+ registerScreenshotCommands(program);
64
+ registerJobsCommands(program);
65
+ // ── Parse ─────────────────────────────────────────────────────────────────────
4406
66
  program.parse();
4407
- // ============================================================
4408
- // Time formatting helper
4409
- // ============================================================
4410
- /**
4411
- * Format a past Date relative to now (e.g. "2h ago", "5m ago").
4412
- */
4413
- function formatRelativeTime(past) {
4414
- const diffMs = Date.now() - past.getTime();
4415
- const diffSec = Math.round(diffMs / 1000);
4416
- if (diffSec < 60)
4417
- return `${diffSec}s ago`;
4418
- const diffMin = Math.round(diffSec / 60);
4419
- if (diffMin < 60)
4420
- return `${diffMin}m ago`;
4421
- const diffHr = Math.round(diffMin / 60);
4422
- if (diffHr < 24)
4423
- return `${diffHr}h ago`;
4424
- const diffDay = Math.round(diffHr / 24);
4425
- return `${diffDay}d ago`;
4426
- }
4427
- // ============================================================
4428
- // Error classification for JSON error output (#6)
4429
- // ============================================================
4430
- function classifyErrorCode(error) {
4431
- if (!(error instanceof Error))
4432
- return 'FETCH_FAILED';
4433
- // Check for our custom _code first (set in pre-fetch validation)
4434
- if (error._code)
4435
- return error._code;
4436
- const msg = error.message.toLowerCase();
4437
- const name = error.name || '';
4438
- if (name === 'TimeoutError' || msg.includes('timeout') || msg.includes('timed out')) {
4439
- return 'TIMEOUT';
4440
- }
4441
- if (name === 'BlockedError' || msg.includes('blocked') || msg.includes('403') || msg.includes('cloudflare')) {
4442
- return 'BLOCKED';
4443
- }
4444
- if (msg.includes('enotfound') || msg.includes('getaddrinfo') || msg.includes('dns resolution failed') || msg.includes('not found')) {
4445
- return 'DNS_FAILED';
4446
- }
4447
- if (msg.includes('invalid url') || msg.includes('invalid hostname') || msg.includes('only http')) {
4448
- return 'INVALID_URL';
4449
- }
4450
- return 'FETCH_FAILED';
4451
- }
4452
- /**
4453
- * Build a unified PeelEnvelope from a PeelResult.
4454
- *
4455
- * All existing PeelResult fields are spread first (backward compatibility),
4456
- * then canonical envelope fields override/extend them.
4457
- */
4458
- function buildEnvelope(result, extra) {
4459
- const envelope = {
4460
- // Spread all PeelResult fields for backward compatibility
4461
- ...result,
4462
- // Required envelope fields (override PeelResult where they overlap)
4463
- url: result.url,
4464
- status: 200,
4465
- content: result.content,
4466
- metadata: {
4467
- title: result.title,
4468
- ...result.metadata,
4469
- },
4470
- tokens: result.tokens,
4471
- cached: extra.cached ?? false,
4472
- elapsed: result.elapsed,
4473
- };
4474
- // Optional envelope fields — only include when meaningful
4475
- if (extra.structured !== undefined)
4476
- envelope.structured = extra.structured;
4477
- if (extra.truncated)
4478
- envelope.truncated = true;
4479
- if (extra.totalAvailable !== undefined)
4480
- envelope.totalAvailable = extra.totalAvailable;
4481
- return envelope;
4482
- }
4483
- // ============================================================
4484
- // Shared output helper
4485
- // ============================================================
4486
- async function outputResult(result, options, extra = {}) {
4487
- // --links: output only links
4488
- if (options.links) {
4489
- if (options.json) {
4490
- const jsonStr = JSON.stringify(result.links, null, 2);
4491
- await writeStdout(jsonStr + '\n');
4492
- }
4493
- else {
4494
- for (const link of result.links) {
4495
- await writeStdout(link + '\n');
4496
- }
4497
- }
4498
- return;
4499
- }
4500
- // --images: output only image URLs
4501
- if (options.images) {
4502
- // Extract image URLs from links that point to images
4503
- const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg', '.bmp', '.ico'];
4504
- const imageUrls = result.links.filter(link => {
4505
- const urlLower = link.toLowerCase();
4506
- return imageExtensions.some(ext => urlLower.includes(ext));
4507
- });
4508
- if (options.json) {
4509
- const jsonStr = JSON.stringify(imageUrls, null, 2);
4510
- await writeStdout(jsonStr + '\n');
4511
- }
4512
- else {
4513
- for (const imageUrl of imageUrls) {
4514
- await writeStdout(imageUrl + '\n');
4515
- }
4516
- }
4517
- return;
4518
- }
4519
- // --meta: output only metadata
4520
- if (options.meta) {
4521
- const meta = {
4522
- url: result.url,
4523
- title: result.title,
4524
- method: result.method,
4525
- elapsed: result.elapsed,
4526
- tokens: result.tokens,
4527
- cached: extra.cached ?? false,
4528
- ...result.metadata,
4529
- };
4530
- if (options.json) {
4531
- await writeStdout(JSON.stringify(meta, null, 2) + '\n');
4532
- }
4533
- else {
4534
- console.log(`Title: ${meta.title || '(none)'}`);
4535
- console.log(`URL: ${meta.url}`);
4536
- if (meta.description)
4537
- console.log(`Description: ${meta.description}`);
4538
- if (meta.author)
4539
- console.log(`Author: ${meta.author}`);
4540
- if (meta.published)
4541
- console.log(`Published: ${meta.published}`);
4542
- if (meta.canonical)
4543
- console.log(`Canonical: ${meta.canonical}`);
4544
- if (meta.image)
4545
- console.log(`OG Image: ${meta.image}`);
4546
- console.log(`Method: ${meta.method}`);
4547
- console.log(`Elapsed: ${meta.elapsed}ms`);
4548
- console.log(`Tokens: ${meta.tokens}`);
4549
- console.log(`Cached: ${meta.cached}`);
4550
- }
4551
- return;
4552
- }
4553
- // Default: full output
4554
- if (options.json) {
4555
- // Build clean JSON output with guaranteed top-level fields
4556
- const output = {
4557
- url: result.url,
4558
- title: result.metadata?.title || result.title || null,
4559
- tokens: result.tokens || 0,
4560
- fetchedAt: new Date().toISOString(),
4561
- method: result.method || 'simple',
4562
- elapsed: result.elapsed,
4563
- content: result.content,
4564
- };
4565
- // Add optional fields only if present (filter out undefined/null values from metadata)
4566
- if (result.metadata) {
4567
- const cleanMeta = {};
4568
- for (const [k, v] of Object.entries(result.metadata)) {
4569
- if (v !== undefined && v !== null)
4570
- cleanMeta[k] = v;
4571
- }
4572
- if (Object.keys(cleanMeta).length > 0)
4573
- output.metadata = cleanMeta;
4574
- }
4575
- if (result.links?.length)
4576
- output.links = result.links;
4577
- if (result.images?.length)
4578
- output.images = result.images;
4579
- if (result.structured)
4580
- output.structured = result.structured;
4581
- if (result.domainData)
4582
- output.domainData = result.domainData;
4583
- if (result.readability)
4584
- output.readability = result.readability;
4585
- if (result.quickAnswer)
4586
- output.quickAnswer = result.quickAnswer;
4587
- if (result.quality)
4588
- output.quality = result.quality;
4589
- if (result.contentType)
4590
- output.contentType = result.contentType;
4591
- if (result.chunks)
4592
- output.chunks = result.chunks;
4593
- if (result.totalChunks)
4594
- output.totalChunks = result.totalChunks;
4595
- if (result.warning)
4596
- output.warning = result.warning;
4597
- if (result.focusQuery)
4598
- output.focusQuery = result.focusQuery;
4599
- if (result.focusReduction)
4600
- output.focusReduction = result.focusReduction;
4601
- if (result.extracted)
4602
- output.extracted = result.extracted;
4603
- if (extra.cached)
4604
- output.cached = true;
4605
- if (extra.truncated)
4606
- output.truncated = true;
4607
- if (extra.totalAvailable !== undefined)
4608
- output.totalAvailable = extra.totalAvailable;
4609
- output._meta = { version: cliVersion, method: result.method || 'simple', timing: result.timing, serverMarkdown: result.serverMarkdown || false };
4610
- await writeStdout(JSON.stringify(output, null, 2) + '\n');
4611
- }
4612
- else {
4613
- // Smart terminal header (interactive mode only)
4614
- const isTerminalOutput = process.stdout.isTTY && !options.silent;
4615
- if (isTerminalOutput) {
4616
- const meta = result.metadata || {};
4617
- const parts = [];
4618
- if (meta.title || result.title)
4619
- parts.push(`\x1b[1m${meta.title || result.title}\x1b[0m`);
4620
- if (meta.author)
4621
- parts.push(`By ${meta.author}`);
4622
- if (meta.wordCount)
4623
- parts.push(`${meta.wordCount} words`);
4624
- const totalMs = result.timing?.total ?? result.elapsed;
4625
- if (totalMs)
4626
- parts.push(`${totalMs}ms`);
4627
- if (parts.length > 0) {
4628
- await writeStdout(`\n ${parts.join(' · ')}\n`);
4629
- await writeStdout(' ' + '─'.repeat(60) + '\n\n');
4630
- }
4631
- }
4632
- // Stream content immediately to stdout — consumer gets it without waiting
4633
- await writeStdout(result.content + '\n');
4634
- // Append timing summary to stderr so it doesn't pollute piped content
4635
- if (!options.silent) {
4636
- const totalMs = result.timing?.total ?? result.elapsed;
4637
- process.stderr.write(`\n--- ${result.tokens} tokens · ${totalMs}ms ---\n`);
4638
- }
4639
- }
4640
- }
4641
- function writeStdout(data) {
4642
- return new Promise((resolve, reject) => {
4643
- process.stdout.write(data, (err) => {
4644
- if (err)
4645
- reject(err);
4646
- else
4647
- resolve();
4648
- });
4649
- });
4650
- }
4651
- /**
4652
- * Convert an array of listing items to CSV.
4653
- */
4654
- function formatListingsCsv(items) {
4655
- if (items.length === 0)
4656
- return '';
4657
- // Collect all keys
4658
- const keySet = new Set();
4659
- for (const item of items) {
4660
- for (const key of Object.keys(item)) {
4661
- if (item[key] !== undefined)
4662
- keySet.add(key);
4663
- }
4664
- }
4665
- const keys = Array.from(keySet);
4666
- const escapeCsv = (s) => {
4667
- if (s === undefined || s === null)
4668
- return '""';
4669
- const str = String(s);
4670
- if (str.includes('"') || str.includes(',') || str.includes('\n') || str.includes('\r')) {
4671
- return '"' + str.replace(/"/g, '""') + '"';
4672
- }
4673
- return '"' + str + '"';
4674
- };
4675
- const lines = [keys.join(',')];
4676
- for (const item of items) {
4677
- lines.push(keys.map(k => escapeCsv(item[k])).join(','));
4678
- }
4679
- return lines.join('\n') + '\n';
4680
- }
4681
- /**
4682
- * Normalise the result of --extract (which may be a flat object or contain
4683
- * arrays) into an array of row objects suitable for CSV / table rendering.
4684
- */
4685
- function normaliseExtractedToRows(extracted) {
4686
- // If every value is an array of the same length, zip them into rows
4687
- const values = Object.values(extracted);
4688
- const allArrays = values.length > 0 && values.every(v => Array.isArray(v));
4689
- if (allArrays) {
4690
- const length = values[0].length;
4691
- const rows = [];
4692
- for (let i = 0; i < length; i++) {
4693
- const row = {};
4694
- for (const key of Object.keys(extracted)) {
4695
- const val = extracted[key][i];
4696
- row[key] = val != null ? String(val) : undefined;
4697
- }
4698
- rows.push(row);
4699
- }
4700
- return rows;
4701
- }
4702
- // Otherwise treat as a single row
4703
- const row = {};
4704
- for (const [k, v] of Object.entries(extracted)) {
4705
- row[k] = v != null ? String(v) : undefined;
4706
- }
4707
- return [row];
4708
- }
4709
- // Helper function to extract colors from content
4710
- function extractColors(content) {
4711
- const colors = [];
4712
- const hexRegex = /#[0-9A-Fa-f]{6}|#[0-9A-Fa-f]{3}/g;
4713
- const matches = content.match(hexRegex);
4714
- if (matches) {
4715
- colors.push(...[...new Set(matches)].slice(0, 10));
4716
- }
4717
- return colors;
4718
- }
4719
- // Helper function to extract font information
4720
- function extractFonts(content) {
4721
- const fonts = [];
4722
- const fontRegex = /font-family:\s*([^;}"'\n]+)/gi;
4723
- let match;
4724
- while ((match = fontRegex.exec(content)) !== null) {
4725
- fonts.push(match[1].trim());
4726
- }
4727
- return [...new Set(fonts)].slice(0, 5);
4728
- }
4729
- //# sourceMappingURL=cli.js.map