webpeel 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/README.md +140 -500
  2. package/dist/cli-auth.d.ts +2 -0
  3. package/dist/cli-auth.d.ts.map +1 -1
  4. package/dist/cli-auth.js +16 -3
  5. package/dist/cli-auth.js.map +1 -1
  6. package/dist/cli.js +475 -77
  7. package/dist/cli.js.map +1 -1
  8. package/dist/core/actions.d.ts +19 -10
  9. package/dist/core/actions.d.ts.map +1 -1
  10. package/dist/core/actions.js +214 -43
  11. package/dist/core/actions.js.map +1 -1
  12. package/dist/core/agent.d.ts +60 -3
  13. package/dist/core/agent.d.ts.map +1 -1
  14. package/dist/core/agent.js +375 -86
  15. package/dist/core/agent.js.map +1 -1
  16. package/dist/core/answer.d.ts +43 -0
  17. package/dist/core/answer.d.ts.map +1 -0
  18. package/dist/core/answer.js +378 -0
  19. package/dist/core/answer.js.map +1 -0
  20. package/dist/core/cache.d.ts +14 -0
  21. package/dist/core/cache.d.ts.map +1 -0
  22. package/dist/core/cache.js +122 -0
  23. package/dist/core/cache.js.map +1 -0
  24. package/dist/core/dns-cache.d.ts +21 -0
  25. package/dist/core/dns-cache.d.ts.map +1 -0
  26. package/dist/core/dns-cache.js +184 -0
  27. package/dist/core/dns-cache.js.map +1 -0
  28. package/dist/core/documents.d.ts +24 -0
  29. package/dist/core/documents.d.ts.map +1 -0
  30. package/dist/core/documents.js +124 -0
  31. package/dist/core/documents.js.map +1 -0
  32. package/dist/core/extract-inline.d.ts +39 -0
  33. package/dist/core/extract-inline.d.ts.map +1 -0
  34. package/dist/core/extract-inline.js +214 -0
  35. package/dist/core/extract-inline.js.map +1 -0
  36. package/dist/core/fetcher.d.ts +33 -7
  37. package/dist/core/fetcher.d.ts.map +1 -1
  38. package/dist/core/fetcher.js +608 -41
  39. package/dist/core/fetcher.js.map +1 -1
  40. package/dist/core/jobs.d.ts +66 -0
  41. package/dist/core/jobs.d.ts.map +1 -0
  42. package/dist/core/jobs.js +513 -0
  43. package/dist/core/jobs.js.map +1 -0
  44. package/dist/core/markdown.d.ts.map +1 -1
  45. package/dist/core/markdown.js +141 -31
  46. package/dist/core/markdown.js.map +1 -1
  47. package/dist/core/pdf.d.ts.map +1 -1
  48. package/dist/core/pdf.js +3 -1
  49. package/dist/core/pdf.js.map +1 -1
  50. package/dist/core/screenshot.d.ts +33 -0
  51. package/dist/core/screenshot.d.ts.map +1 -0
  52. package/dist/core/screenshot.js +30 -0
  53. package/dist/core/screenshot.js.map +1 -0
  54. package/dist/core/search-provider.d.ts +46 -0
  55. package/dist/core/search-provider.d.ts.map +1 -0
  56. package/dist/core/search-provider.js +281 -0
  57. package/dist/core/search-provider.js.map +1 -0
  58. package/dist/core/strategies.d.ts +7 -10
  59. package/dist/core/strategies.d.ts.map +1 -1
  60. package/dist/core/strategies.js +370 -63
  61. package/dist/core/strategies.js.map +1 -1
  62. package/dist/index.d.ts +9 -3
  63. package/dist/index.d.ts.map +1 -1
  64. package/dist/index.js +61 -32
  65. package/dist/index.js.map +1 -1
  66. package/dist/mcp/server.js +335 -70
  67. package/dist/mcp/server.js.map +1 -1
  68. package/dist/types.d.ts +43 -1
  69. package/dist/types.d.ts.map +1 -1
  70. package/dist/types.js.map +1 -1
  71. package/llms.txt +85 -47
  72. package/package.json +11 -5
package/dist/cli.js CHANGED
@@ -16,7 +16,7 @@ import { Command } from 'commander';
16
16
  import ora from 'ora';
17
17
  import { writeFileSync, readFileSync } from 'fs';
18
18
  import { peel, peelBatch, cleanup } from './index.js';
19
- import { checkUsage, showUsageFooter, handleLogin, handleLogout, handleUsage, loadConfig } from './cli-auth.js';
19
+ import { checkUsage, showUsageFooter, handleLogin, handleLogout, handleUsage, loadConfig, saveConfig } from './cli-auth.js';
20
20
  import { getCache, setCache, parseTTL, clearCache, cacheStats } from './cache.js';
21
21
  const program = new Command();
22
22
  // Read version from package.json dynamically
@@ -35,9 +35,39 @@ program
35
35
  .description('Fast web fetcher for AI agents')
36
36
  .version(cliVersion)
37
37
  .enablePositionalOptions();
38
+ // Check for updates (non-blocking, runs in background)
39
+ async function checkForUpdates() {
40
+ try {
41
+ const res = await fetch('https://registry.npmjs.org/webpeel/latest', {
42
+ signal: AbortSignal.timeout(2000),
43
+ });
44
+ if (!res.ok)
45
+ return;
46
+ const data = await res.json();
47
+ const latest = data.version;
48
+ if (latest && latest !== cliVersion && cliVersion !== '0.0.0') {
49
+ console.error(`\n💡 WebPeel v${latest} available (you have v${cliVersion}). Update: npm i -g webpeel@latest\n`);
50
+ }
51
+ }
52
+ catch { /* silently ignore — don't slow down the user */ }
53
+ }
54
+ // Fire and forget — don't await, don't block
55
+ void checkForUpdates();
38
56
  /**
39
57
  * Parse action strings into PageAction array
40
- * Format: "type:value" where type is wait|click|scroll|type|fill|press|hover|waitFor
58
+ * Formats:
59
+ * click:.selector — click an element
60
+ * type:.selector=text — type text into an input
61
+ * fill:.selector=text — fill an input (replaces existing value)
62
+ * scroll:down:500 — scroll direction + amount
63
+ * scroll:bottom — scroll to bottom (legacy)
64
+ * scroll:top — scroll to top (legacy)
65
+ * wait:2000 — wait N ms
66
+ * press:Enter — press a keyboard key
67
+ * hover:.selector — hover over an element
68
+ * waitFor:.selector — wait for a selector to appear
69
+ * select:.selector=value — select dropdown option
70
+ * screenshot — take a screenshot
41
71
  */
42
72
  function parseActions(actionStrings) {
43
73
  return actionStrings.map(str => {
@@ -48,8 +78,25 @@ function parseActions(actionStrings) {
48
78
  return { type: 'wait', ms: parseInt(value) || 1000 };
49
79
  case 'click':
50
80
  return { type: 'click', selector: value };
51
- case 'scroll':
52
- return { type: 'scroll', to: value === 'top' ? 'top' : value === 'bottom' ? 'bottom' : parseInt(value) };
81
+ case 'scroll': {
82
+ // scroll:down:500 or scroll:bottom or scroll:500
83
+ const parts = value.split(':');
84
+ const dir = parts[0];
85
+ if (dir === 'top' || dir === 'bottom') {
86
+ return { type: 'scroll', to: dir };
87
+ }
88
+ if (dir === 'down' || dir === 'up' || dir === 'left' || dir === 'right') {
89
+ const amount = parseInt(parts[1] || '500', 10);
90
+ return { type: 'scroll', direction: dir, amount };
91
+ }
92
+ // Bare number: absolute position
93
+ const num = parseInt(dir, 10);
94
+ if (!isNaN(num)) {
95
+ return { type: 'scroll', to: num };
96
+ }
97
+ // Default: scroll to bottom
98
+ return { type: 'scroll', to: 'bottom' };
99
+ }
53
100
  case 'type': {
54
101
  const [sel, ...text] = value.split('=');
55
102
  return { type: 'type', selector: sel, value: text.join('=') };
@@ -58,12 +105,18 @@ function parseActions(actionStrings) {
58
105
  const [sel, ...text] = value.split('=');
59
106
  return { type: 'fill', selector: sel, value: text.join('=') };
60
107
  }
108
+ case 'select': {
109
+ const [sel, ...vals] = value.split('=');
110
+ return { type: 'select', selector: sel, value: vals.join('=') };
111
+ }
61
112
  case 'press':
62
113
  return { type: 'press', key: value };
63
114
  case 'hover':
64
115
  return { type: 'hover', selector: value };
65
116
  case 'waitFor':
66
117
  return { type: 'waitForSelector', selector: value };
118
+ case 'screenshot':
119
+ return { type: 'screenshot' };
67
120
  default:
68
121
  throw new Error(`Unknown action type: ${type}`);
69
122
  }
@@ -334,6 +387,26 @@ program
334
387
  }
335
388
  if (error instanceof Error) {
336
389
  console.error(`\nError: ${error.message}`);
390
+ // Provide actionable hints based on error type
391
+ const msg = error.message.toLowerCase();
392
+ if (msg.includes('timeout') || msg.includes('timed out')) {
393
+ console.error('\n💡 Hint: Try --render for JS-heavy sites, or --wait 5000 to wait longer.');
394
+ }
395
+ else if (msg.includes('blocked') || msg.includes('403') || msg.includes('cloudflare')) {
396
+ console.error('\n💡 Hint: Try --stealth to bypass bot detection (uses more credits).');
397
+ }
398
+ else if (msg.includes('enotfound') || msg.includes('getaddrinfo')) {
399
+ console.error('\n💡 Hint: Could not resolve hostname. Check the URL is correct.');
400
+ }
401
+ else if (msg.includes('econnrefused') || msg.includes('econnreset')) {
402
+ console.error('\n💡 Hint: Connection refused. The site may be down or blocking requests.');
403
+ }
404
+ else if (msg.includes('certificate') || msg.includes('ssl') || msg.includes('tls')) {
405
+ console.error('\n💡 Hint: SSL/TLS error. The site may have an invalid certificate.');
406
+ }
407
+ else if (msg.includes('usage') || msg.includes('quota') || msg.includes('limit')) {
408
+ console.error('\n💡 Hint: Run `webpeel usage` to check your quota, or `webpeel login` to authenticate.');
409
+ }
337
410
  }
338
411
  else {
339
412
  console.error('\nError: Unknown error occurred');
@@ -345,8 +418,10 @@ program
345
418
  // Search command
346
419
  program
347
420
  .command('search <query>')
348
- .description('Search using DuckDuckGo')
421
+ .description('Search the web (DuckDuckGo by default, or Brave with --provider brave)')
349
422
  .option('-n, --count <n>', 'Number of results (1-10)', '5')
423
+ .option('--provider <provider>', 'Search provider: duckduckgo (default) or brave')
424
+ .option('--search-api-key <key>', 'API key for the search provider (or env WEBPEEL_BRAVE_API_KEY)')
350
425
  .option('--json', 'Output as JSON')
351
426
  .option('-s, --silent', 'Silent mode')
352
427
  .action(async (query, options) => {
@@ -361,61 +436,21 @@ program
361
436
  }
362
437
  const spinner = isSilent ? null : ora('Searching...').start();
363
438
  try {
364
- // Import the search function dynamically
365
- const { fetch: undiciFetch } = await import('undici');
366
- const { load } = await import('cheerio');
367
- const searchUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
368
- const response = await undiciFetch(searchUrl, {
369
- headers: {
370
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
371
- },
372
- });
373
- if (!response.ok) {
374
- throw new Error(`Search failed: HTTP ${response.status}`);
375
- }
376
- const html = await response.text();
377
- const $ = load(html);
378
- const results = [];
379
- $('.result').each((_i, elem) => {
380
- if (results.length >= count)
381
- return;
382
- const $result = $(elem);
383
- const title = $result.find('.result__title').text().trim();
384
- const rawUrl = $result.find('.result__a').attr('href') || '';
385
- const snippet = $result.find('.result__snippet').text().trim();
386
- if (!title || !rawUrl)
387
- return;
388
- // Extract actual URL from DuckDuckGo redirect
389
- let url = rawUrl;
390
- try {
391
- const ddgUrl = new URL(rawUrl, 'https://duckduckgo.com');
392
- const uddg = ddgUrl.searchParams.get('uddg');
393
- if (uddg) {
394
- url = decodeURIComponent(uddg);
395
- }
396
- }
397
- catch {
398
- // Use raw URL if parsing fails
399
- }
400
- // Validate final URL
401
- try {
402
- const parsed = new URL(url);
403
- if (!['http:', 'https:'].includes(parsed.protocol)) {
404
- return;
405
- }
406
- url = parsed.href;
407
- }
408
- catch {
409
- return;
410
- }
411
- results.push({
412
- title: title.slice(0, 200),
413
- url,
414
- snippet: snippet.slice(0, 500)
415
- });
439
+ const { getSearchProvider } = await import('./core/search-provider.js');
440
+ // Resolve provider
441
+ const providerId = (options.provider || 'duckduckgo');
442
+ const config = loadConfig();
443
+ const apiKey = options.searchApiKey
444
+ || process.env.WEBPEEL_BRAVE_API_KEY
445
+ || config.braveApiKey
446
+ || undefined;
447
+ const provider = getSearchProvider(providerId);
448
+ const results = await provider.searchWeb(query, {
449
+ count: Math.min(Math.max(count, 1), 10),
450
+ apiKey,
416
451
  });
417
452
  if (spinner) {
418
- spinner.succeed(`Found ${results.length} results`);
453
+ spinner.succeed(`Found ${results.length} results (${providerId})`);
419
454
  }
420
455
  // Show usage footer for free/anonymous users
421
456
  if (usageCheck.usageInfo && !isSilent) {
@@ -447,6 +482,14 @@ program
447
482
  }
448
483
  if (error instanceof Error) {
449
484
  console.error(`\nError: ${error.message}`);
485
+ const msg = error.message.toLowerCase();
486
+ if (msg.includes('brave') && msg.includes('api key')) {
487
+ console.error('\n💡 Hint: Set your Brave API key: webpeel config set braveApiKey YOUR_KEY');
488
+ console.error(' Or use free DuckDuckGo search (default, no key needed).');
489
+ }
490
+ else if (msg.includes('timeout') || msg.includes('timed out')) {
491
+ console.error('\n💡 Hint: Search timed out. Try a more specific query or try again.');
492
+ }
450
493
  }
451
494
  else {
452
495
  console.error('\nError: Unknown error occurred');
@@ -787,21 +830,34 @@ program
787
830
  .action(async () => {
788
831
  await import('./mcp/server.js');
789
832
  });
790
- // Config command
833
+ // Config command — webpeel config [get|set] [key] [value]
791
834
  program
792
835
  .command('config')
793
836
  .description('View or update CLI configuration')
794
- .argument('[key]', 'Config key to get or set')
837
+ .argument('[action]', '"get <key>", "set <key> <value>", or omit for overview')
838
+ .argument('[key]', 'Config key')
795
839
  .argument('[value]', 'Value to set')
796
- .action(async (key, value) => {
840
+ .action(async (action, key, value) => {
797
841
  const config = loadConfig();
798
- if (!key) {
842
+ // Settable config keys (safe for user modification)
843
+ const SETTABLE_KEYS = {
844
+ braveApiKey: 'Brave Search API key',
845
+ };
846
+ const maskSecret = (k, v) => {
847
+ if (!v)
848
+ return '(not set)';
849
+ if (k === 'apiKey' || k === 'braveApiKey')
850
+ return v.slice(0, 4) + '...' + v.slice(-4);
851
+ return String(v);
852
+ };
853
+ if (!action) {
799
854
  // Show all config
800
855
  console.log('WebPeel CLI Configuration');
801
856
  console.log(` Config file: ~/.webpeel/config.json`);
802
857
  console.log('');
803
- console.log(` apiKey: ${config.apiKey ? config.apiKey.slice(0, 7) + '...' + config.apiKey.slice(-4) : '(not set)'}`);
804
- console.log(` planTier: ${config.planTier || 'free'}`);
858
+ console.log(` apiKey: ${maskSecret('apiKey', config.apiKey)}`);
859
+ console.log(` braveApiKey: ${maskSecret('braveApiKey', config.braveApiKey)}`);
860
+ console.log(` planTier: ${config.planTier || 'free'}`);
805
861
  console.log(` anonymousUsage: ${config.anonymousUsage}`);
806
862
  const stats = cacheStats();
807
863
  console.log('');
@@ -809,21 +865,52 @@ program
809
865
  console.log(` entries: ${stats.entries}`);
810
866
  console.log(` size: ${(stats.sizeBytes / 1024).toFixed(1)} KB`);
811
867
  console.log(` dir: ${stats.dir}`);
868
+ console.log('');
869
+ console.log(' Settable keys: ' + Object.keys(SETTABLE_KEYS).join(', '));
870
+ console.log(' Usage: webpeel config set <key> <value>');
812
871
  process.exit(0);
813
872
  }
814
- if (key && !value) {
815
- // Get a specific key
816
- const val = config[key];
873
+ if (action === 'set') {
874
+ if (!key) {
875
+ console.error('Usage: webpeel config set <key> <value>');
876
+ console.error('Settable keys: ' + Object.keys(SETTABLE_KEYS).join(', '));
877
+ process.exit(1);
878
+ }
879
+ if (!(key in SETTABLE_KEYS)) {
880
+ console.error(`Cannot set "${key}". Settable keys: ${Object.keys(SETTABLE_KEYS).join(', ')}`);
881
+ process.exit(1);
882
+ }
883
+ if (!value) {
884
+ console.error(`Usage: webpeel config set ${key} <value>`);
885
+ process.exit(1);
886
+ }
887
+ config[key] = value;
888
+ saveConfig(config);
889
+ console.log(`✓ ${key} saved`);
890
+ process.exit(0);
891
+ }
892
+ if (action === 'get') {
893
+ const lookupKey = key || '';
894
+ const val = config[lookupKey];
817
895
  if (val !== undefined) {
818
- console.log(key === 'apiKey' && val ? val.slice(0, 7) + '...' + val.slice(-4) : val);
896
+ console.log(maskSecret(lookupKey, String(val)));
819
897
  }
820
898
  else {
821
- console.error(`Unknown config key: ${key}`);
899
+ console.error(`Unknown config key: ${lookupKey}`);
822
900
  process.exit(1);
823
901
  }
902
+ process.exit(0);
903
+ }
904
+ // Legacy: `webpeel config <key>` — treat action as the key name
905
+ const val = config[action];
906
+ if (val !== undefined) {
907
+ console.log(maskSecret(action, String(val)));
908
+ }
909
+ else {
910
+ console.error(`Unknown config key or action: ${action}`);
911
+ console.error('Usage: webpeel config [get|set] [key] [value]');
912
+ process.exit(1);
824
913
  }
825
- // Note: Setting config values directly is not supported for security
826
- // Use `webpeel login` for API key, plan is fetched from server
827
914
  process.exit(0);
828
915
  });
829
916
  // Cache management command
@@ -1058,10 +1145,121 @@ program
1058
1145
  process.exit(1);
1059
1146
  }
1060
1147
  });
1061
- // Jobs command - list active jobs
1148
+ // Jobs command - search job boards (LinkedIn, Indeed, Glassdoor)
1062
1149
  program
1063
- .command('jobs')
1064
- .description('List active jobs (crawl, batch)')
1150
+ .command('jobs <keywords>')
1151
+ .description('Search job boards for listings (LinkedIn, Indeed, Glassdoor)')
1152
+ .option('-l, --location <location>', 'Location filter')
1153
+ .option('-s, --source <source>', 'Job board: glassdoor, indeed, or linkedin (default: linkedin)', 'linkedin')
1154
+ .option('-n, --limit <number>', 'Max results (default: 25)', '25')
1155
+ .option('-d, --details <number>', 'Fetch full details for top N results (default: 0)', '0')
1156
+ .option('--json', 'Output raw JSON')
1157
+ .option('--timeout <ms>', 'Request timeout in ms (default: 30000)', '30000')
1158
+ .option('--silent', 'Silent mode (no spinner)')
1159
+ .action(async (keywords, options) => {
1160
+ const spinner = options.silent ? null : ora('Searching jobs...').start();
1161
+ try {
1162
+ const { searchJobs } = await import('./core/jobs.js');
1163
+ const source = (['glassdoor', 'indeed', 'linkedin'].includes(options.source) ? options.source : 'linkedin');
1164
+ const limit = Math.min(Math.max(parseInt(options.limit, 10) || 25, 1), 100);
1165
+ const fetchDetails = Math.min(Math.max(parseInt(options.details, 10) || 0, 0), limit);
1166
+ const timeout = parseInt(options.timeout, 10) || 30000;
1167
+ const result = await searchJobs({
1168
+ keywords,
1169
+ location: options.location,
1170
+ source,
1171
+ limit,
1172
+ fetchDetails,
1173
+ timeout,
1174
+ });
1175
+ if (spinner)
1176
+ spinner.stop();
1177
+ // --json: raw output
1178
+ if (options.json) {
1179
+ await writeStdout(JSON.stringify(result, null, 2) + '\n');
1180
+ process.exit(0);
1181
+ }
1182
+ // Formatted table output
1183
+ const totalLabel = result.totalFound >= 1000
1184
+ ? `${(result.totalFound / 1000).toFixed(0).replace(/\.0$/, '')}k+`
1185
+ : String(result.totalFound);
1186
+ const locationLabel = options.location ? ` in ${options.location}` : '';
1187
+ console.log(`\n🔍 Found ${totalLabel} ${keywords} jobs${locationLabel} (${result.source})\n`);
1188
+ if (result.jobs.length === 0) {
1189
+ console.log(' No jobs found.\n');
1190
+ process.exit(0);
1191
+ }
1192
+ // Column widths
1193
+ const colNum = 3;
1194
+ const colTitle = 40;
1195
+ const colCompany = 18;
1196
+ const colLocation = 16;
1197
+ const colSalary = 14;
1198
+ const colPosted = 10;
1199
+ const pad = (s, w) => s.length > w ? s.slice(0, w - 1) + '…' : s.padEnd(w);
1200
+ const rpad = (s, w) => s.padStart(w);
1201
+ // Header
1202
+ console.log(` ${rpad('#', colNum)} ${pad('Title', colTitle)} ${pad('Company', colCompany)} ${pad('Location', colLocation)} ${pad('Salary', colSalary)} ${pad('Posted', colPosted)}`);
1203
+ // Rows
1204
+ result.jobs.forEach((job, i) => {
1205
+ const title = job.title + (job.remote ? ' 🏠' : '');
1206
+ console.log(` ${rpad(String(i + 1), colNum)} ${pad(title, colTitle)} ${pad(job.company, colCompany)} ${pad(job.location, colLocation)} ${pad(job.salary || '', colSalary)} ${pad(job.postedAt || '', colPosted)}`);
1207
+ });
1208
+ // Footer
1209
+ const timeSec = (result.timeTakenMs / 1000).toFixed(1);
1210
+ const detailsNote = fetchDetails > 0 ? ` | Details: ${result.detailsFetched} fetched` : '';
1211
+ console.log(`\nFetched ${result.jobs.length} jobs in ${timeSec}s${detailsNote}\n`);
1212
+ // Detailed job cards (when --details > 0)
1213
+ const detailedJobs = result.jobs.filter((j) => 'description' in j);
1214
+ for (let i = 0; i < detailedJobs.length; i++) {
1215
+ const job = detailedJobs[i];
1216
+ console.log(`━━━ Job #${i + 1}: ${job.title} ━━━`);
1217
+ const metaParts = [`Company: ${job.company}`, `Location: ${job.location}`];
1218
+ if (job.salary)
1219
+ metaParts.push(`Salary: ${job.salary}`);
1220
+ console.log(metaParts.join(' | '));
1221
+ const typeParts = [];
1222
+ if (job.employmentType)
1223
+ typeParts.push(`Type: ${job.employmentType}`);
1224
+ if (job.experienceLevel)
1225
+ typeParts.push(`Level: ${job.experienceLevel}`);
1226
+ if (job.postedAt)
1227
+ typeParts.push(`Posted: ${job.postedAt}`);
1228
+ if (typeParts.length > 0)
1229
+ console.log(typeParts.join(' | '));
1230
+ if (job.description) {
1231
+ console.log(`\nDescription:\n ${job.description.slice(0, 500).replace(/\n/g, '\n ')}`);
1232
+ }
1233
+ if (job.requirements && job.requirements.length > 0) {
1234
+ console.log(`\nRequirements:`);
1235
+ job.requirements.forEach(r => console.log(` • ${r}`));
1236
+ }
1237
+ if (job.responsibilities && job.responsibilities.length > 0) {
1238
+ console.log(`\nResponsibilities:`);
1239
+ job.responsibilities.forEach(r => console.log(` • ${r}`));
1240
+ }
1241
+ if (job.benefits && job.benefits.length > 0) {
1242
+ console.log(`\nBenefits:`);
1243
+ job.benefits.forEach(b => console.log(` • ${b}`));
1244
+ }
1245
+ if (job.applyUrl) {
1246
+ console.log(`\nApply: ${job.applyUrl}`);
1247
+ }
1248
+ console.log('');
1249
+ }
1250
+ process.exit(0);
1251
+ }
1252
+ catch (error) {
1253
+ if (spinner)
1254
+ spinner.fail('Job search failed');
1255
+ console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
1256
+ process.exit(1);
1257
+ }
1258
+ });
1259
+ // Queue command - list active async jobs (crawl, batch)
1260
+ program
1261
+ .command('queue')
1262
+ .description('List active async jobs (crawl, batch)')
1065
1263
  .option('--json', 'Output as JSON')
1066
1264
  .action(async (options) => {
1067
1265
  try {
@@ -1079,12 +1277,13 @@ program
1079
1277
  if (!response.ok) {
1080
1278
  throw new Error(`API error: HTTP ${response.status}`);
1081
1279
  }
1082
- const jobs = await response.json();
1280
+ const data = await response.json();
1281
+ const jobs = data.jobs || data;
1083
1282
  if (options.json) {
1084
- console.log(JSON.stringify(jobs, null, 2));
1283
+ console.log(JSON.stringify(data, null, 2));
1085
1284
  }
1086
1285
  else {
1087
- if (jobs.length === 0) {
1286
+ if (!Array.isArray(jobs) || jobs.length === 0) {
1088
1287
  console.log('No active jobs.');
1089
1288
  }
1090
1289
  else {
@@ -1160,6 +1359,205 @@ program
1160
1359
  process.exit(1);
1161
1360
  }
1162
1361
  });
1362
+ // Answer command - search + fetch + LLM-generated answer
1363
+ program
1364
+ .command('answer <question>')
1365
+ .description('Ask a question, search the web, and get an AI-generated answer with citations (BYOK)')
1366
+ .option('--provider <provider>', 'Search provider: duckduckgo (default) or brave')
1367
+ .option('--search-api-key <key>', 'Search provider API key (or env WEBPEEL_BRAVE_API_KEY)')
1368
+ .option('--llm <provider>', 'LLM provider: openai, anthropic, or google (required)')
1369
+ .option('--llm-api-key <key>', 'LLM API key (or env OPENAI_API_KEY / ANTHROPIC_API_KEY / GOOGLE_API_KEY)')
1370
+ .option('--llm-model <model>', 'LLM model name (optional, uses provider default)')
1371
+ .option('--max-sources <n>', 'Maximum sources to fetch (1-10, default 5)', '5')
1372
+ .option('--json', 'Output as JSON')
1373
+ .option('-s, --silent', 'Silent mode')
1374
+ .action(async (question, options) => {
1375
+ const spinner = options.silent ? null : ora('Thinking...').start();
1376
+ try {
1377
+ const { answerQuestion } = await import('./core/answer.js');
1378
+ const config = loadConfig();
1379
+ const llmProvider = options.llm;
1380
+ if (!llmProvider || !['openai', 'anthropic', 'google'].includes(llmProvider)) {
1381
+ console.error('Error: --llm is required (openai, anthropic, or google)');
1382
+ process.exit(1);
1383
+ }
1384
+ const llmApiKey = options.llmApiKey
1385
+ || process.env.OPENAI_API_KEY
1386
+ || process.env.ANTHROPIC_API_KEY
1387
+ || process.env.GOOGLE_API_KEY
1388
+ || '';
1389
+ if (!llmApiKey) {
1390
+ console.error('Error: --llm-api-key is required (or set OPENAI_API_KEY / ANTHROPIC_API_KEY / GOOGLE_API_KEY)');
1391
+ process.exit(1);
1392
+ }
1393
+ const searchProvider = (options.provider || 'duckduckgo');
1394
+ const searchApiKey = options.searchApiKey
1395
+ || process.env.WEBPEEL_BRAVE_API_KEY
1396
+ || config.braveApiKey
1397
+ || undefined;
1398
+ const maxSources = Math.min(Math.max(parseInt(options.maxSources) || 5, 1), 10);
1399
+ if (spinner)
1400
+ spinner.text = 'Searching the web...';
1401
+ const result = await answerQuestion({
1402
+ question,
1403
+ searchProvider,
1404
+ searchApiKey,
1405
+ llmProvider,
1406
+ llmApiKey,
1407
+ llmModel: options.llmModel,
1408
+ maxSources,
1409
+ stream: false,
1410
+ });
1411
+ if (spinner)
1412
+ spinner.succeed('Done');
1413
+ if (options.json) {
1414
+ const jsonStr = JSON.stringify(result, null, 2);
1415
+ await new Promise((resolve, reject) => {
1416
+ process.stdout.write(jsonStr + '\n', (err) => {
1417
+ if (err)
1418
+ reject(err);
1419
+ else
1420
+ resolve();
1421
+ });
1422
+ });
1423
+ }
1424
+ else {
1425
+ console.log(`\n${result.answer}`);
1426
+ console.log(`\nSources:`);
1427
+ result.citations.forEach((c, i) => {
1428
+ console.log(` [${i + 1}] ${c.title}`);
1429
+ console.log(` ${c.url}`);
1430
+ });
1431
+ console.log(`\nModel: ${result.llmModel} (${result.llmProvider})`);
1432
+ }
1433
+ await cleanup();
1434
+ process.exit(0);
1435
+ }
1436
+ catch (error) {
1437
+ if (spinner)
1438
+ spinner.fail('Answer generation failed');
1439
+ console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
1440
+ await cleanup();
1441
+ process.exit(1);
1442
+ }
1443
+ });
1444
+ // Screenshot command
1445
+ program
1446
+ .command('screenshot <url>')
1447
+ .description('Take a screenshot of a URL and save as PNG/JPEG')
1448
+ .option('--full-page', 'Capture full page (not just viewport)')
1449
+ .option('--width <px>', 'Viewport width in pixels (default: 1280)', parseInt)
1450
+ .option('--height <px>', 'Viewport height in pixels (default: 720)', parseInt)
1451
+ .option('--format <fmt>', 'Image format: png (default) or jpeg', 'png')
1452
+ .option('--quality <n>', 'JPEG quality 1-100 (ignored for PNG)', parseInt)
1453
+ .option('-w, --wait <ms>', 'Wait time after page load (ms)', parseInt)
1454
+ .option('-t, --timeout <ms>', 'Request timeout (ms)', parseInt, 30000)
1455
+ .option('--stealth', 'Use stealth mode to bypass bot detection')
1456
+ .option('--action <actions...>', 'Page actions before screenshot (e.g., "click:.btn" "wait:2000")')
1457
+ .option('-o, --output <path>', 'Output file path (default: screenshot.png)')
1458
+ .option('-s, --silent', 'Silent mode (no spinner)')
1459
+ .option('--json', 'Output base64 JSON instead of binary file')
1460
+ .action(async (url, options) => {
1461
+ // Validate URL
1462
+ try {
1463
+ const parsed = new URL(url);
1464
+ if (!['http:', 'https:'].includes(parsed.protocol)) {
1465
+ console.error('Error: Only HTTP and HTTPS protocols are allowed');
1466
+ process.exit(1);
1467
+ }
1468
+ }
1469
+ catch {
1470
+ console.error(`Error: Invalid URL format: ${url}`);
1471
+ process.exit(1);
1472
+ }
1473
+ // Check usage quota
1474
+ const usageCheck = await checkUsage();
1475
+ if (!usageCheck.allowed) {
1476
+ console.error(usageCheck.message);
1477
+ process.exit(1);
1478
+ }
1479
+ const spinner = options.silent ? null : ora('Taking screenshot...').start();
1480
+ try {
1481
+ // Validate format
1482
+ const format = options.format?.toLowerCase();
1483
+ if (format && !['png', 'jpeg', 'jpg'].includes(format)) {
1484
+ console.error('Error: --format must be png, jpeg, or jpg');
1485
+ process.exit(1);
1486
+ }
1487
+ // Parse actions
1488
+ let actions;
1489
+ if (options.action && options.action.length > 0) {
1490
+ try {
1491
+ actions = parseActions(options.action);
1492
+ }
1493
+ catch (e) {
1494
+ console.error(`Error: ${e.message}`);
1495
+ process.exit(1);
1496
+ }
1497
+ }
1498
+ const { takeScreenshot } = await import('./core/screenshot.js');
1499
+ const result = await takeScreenshot(url, {
1500
+ fullPage: options.fullPage || false,
1501
+ width: options.width,
1502
+ height: options.height,
1503
+ format: format || 'png',
1504
+ quality: options.quality,
1505
+ waitFor: options.wait,
1506
+ timeout: options.timeout,
1507
+ stealth: options.stealth || false,
1508
+ actions,
1509
+ });
1510
+ if (spinner) {
1511
+ spinner.succeed(`Screenshot taken (${result.format})`);
1512
+ }
1513
+ // Show usage footer for free/anonymous users
1514
+ if (usageCheck.usageInfo && !options.silent) {
1515
+ showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, true);
1516
+ }
1517
+ if (options.json) {
1518
+ // Output JSON with base64
1519
+ const jsonStr = JSON.stringify({
1520
+ url: result.url,
1521
+ format: result.format,
1522
+ contentType: result.contentType,
1523
+ screenshot: result.screenshot,
1524
+ }, null, 2);
1525
+ await new Promise((resolve, reject) => {
1526
+ process.stdout.write(jsonStr + '\n', (err) => {
1527
+ if (err)
1528
+ reject(err);
1529
+ else
1530
+ resolve();
1531
+ });
1532
+ });
1533
+ }
1534
+ else {
1535
+ // Save to file
1536
+ const ext = result.format === 'jpeg' ? 'jpg' : 'png';
1537
+ const outputPath = options.output || `screenshot.${ext}`;
1538
+ const buffer = Buffer.from(result.screenshot, 'base64');
1539
+ writeFileSync(outputPath, buffer);
1540
+ if (!options.silent) {
1541
+ console.error(`Screenshot saved to: ${outputPath} (${(buffer.length / 1024).toFixed(1)} KB)`);
1542
+ }
1543
+ }
1544
+ await cleanup();
1545
+ process.exit(0);
1546
+ }
1547
+ catch (error) {
1548
+ if (spinner) {
1549
+ spinner.fail('Screenshot failed');
1550
+ }
1551
+ if (error instanceof Error) {
1552
+ console.error(`\nError: ${error.message}`);
1553
+ }
1554
+ else {
1555
+ console.error('\nError: Unknown error occurred');
1556
+ }
1557
+ await cleanup();
1558
+ process.exit(1);
1559
+ }
1560
+ });
1163
1561
  program.parse();
1164
1562
  // ============================================================
1165
1563
  // Shared output helper