webpeel 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +140 -500
- package/dist/cli-auth.d.ts +2 -0
- package/dist/cli-auth.d.ts.map +1 -1
- package/dist/cli-auth.js +16 -3
- package/dist/cli-auth.js.map +1 -1
- package/dist/cli.js +475 -77
- package/dist/cli.js.map +1 -1
- package/dist/core/actions.d.ts +19 -10
- package/dist/core/actions.d.ts.map +1 -1
- package/dist/core/actions.js +214 -43
- package/dist/core/actions.js.map +1 -1
- package/dist/core/agent.d.ts +60 -3
- package/dist/core/agent.d.ts.map +1 -1
- package/dist/core/agent.js +375 -86
- package/dist/core/agent.js.map +1 -1
- package/dist/core/answer.d.ts +43 -0
- package/dist/core/answer.d.ts.map +1 -0
- package/dist/core/answer.js +378 -0
- package/dist/core/answer.js.map +1 -0
- package/dist/core/cache.d.ts +14 -0
- package/dist/core/cache.d.ts.map +1 -0
- package/dist/core/cache.js +122 -0
- package/dist/core/cache.js.map +1 -0
- package/dist/core/dns-cache.d.ts +21 -0
- package/dist/core/dns-cache.d.ts.map +1 -0
- package/dist/core/dns-cache.js +184 -0
- package/dist/core/dns-cache.js.map +1 -0
- package/dist/core/documents.d.ts +24 -0
- package/dist/core/documents.d.ts.map +1 -0
- package/dist/core/documents.js +124 -0
- package/dist/core/documents.js.map +1 -0
- package/dist/core/extract-inline.d.ts +39 -0
- package/dist/core/extract-inline.d.ts.map +1 -0
- package/dist/core/extract-inline.js +214 -0
- package/dist/core/extract-inline.js.map +1 -0
- package/dist/core/fetcher.d.ts +33 -7
- package/dist/core/fetcher.d.ts.map +1 -1
- package/dist/core/fetcher.js +608 -41
- package/dist/core/fetcher.js.map +1 -1
- package/dist/core/jobs.d.ts +66 -0
- package/dist/core/jobs.d.ts.map +1 -0
- package/dist/core/jobs.js +513 -0
- package/dist/core/jobs.js.map +1 -0
- package/dist/core/markdown.d.ts.map +1 -1
- package/dist/core/markdown.js +141 -31
- package/dist/core/markdown.js.map +1 -1
- package/dist/core/pdf.d.ts.map +1 -1
- package/dist/core/pdf.js +3 -1
- package/dist/core/pdf.js.map +1 -1
- package/dist/core/screenshot.d.ts +33 -0
- package/dist/core/screenshot.d.ts.map +1 -0
- package/dist/core/screenshot.js +30 -0
- package/dist/core/screenshot.js.map +1 -0
- package/dist/core/search-provider.d.ts +46 -0
- package/dist/core/search-provider.d.ts.map +1 -0
- package/dist/core/search-provider.js +281 -0
- package/dist/core/search-provider.js.map +1 -0
- package/dist/core/strategies.d.ts +7 -10
- package/dist/core/strategies.d.ts.map +1 -1
- package/dist/core/strategies.js +370 -63
- package/dist/core/strategies.js.map +1 -1
- package/dist/index.d.ts +9 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +61 -32
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +335 -70
- package/dist/mcp/server.js.map +1 -1
- package/dist/types.d.ts +43 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/llms.txt +85 -47
- package/package.json +11 -5
package/dist/cli.js
CHANGED
|
@@ -16,7 +16,7 @@ import { Command } from 'commander';
|
|
|
16
16
|
import ora from 'ora';
|
|
17
17
|
import { writeFileSync, readFileSync } from 'fs';
|
|
18
18
|
import { peel, peelBatch, cleanup } from './index.js';
|
|
19
|
-
import { checkUsage, showUsageFooter, handleLogin, handleLogout, handleUsage, loadConfig } from './cli-auth.js';
|
|
19
|
+
import { checkUsage, showUsageFooter, handleLogin, handleLogout, handleUsage, loadConfig, saveConfig } from './cli-auth.js';
|
|
20
20
|
import { getCache, setCache, parseTTL, clearCache, cacheStats } from './cache.js';
|
|
21
21
|
const program = new Command();
|
|
22
22
|
// Read version from package.json dynamically
|
|
@@ -35,9 +35,39 @@ program
|
|
|
35
35
|
.description('Fast web fetcher for AI agents')
|
|
36
36
|
.version(cliVersion)
|
|
37
37
|
.enablePositionalOptions();
|
|
38
|
+
// Check for updates (non-blocking, runs in background)
|
|
39
|
+
async function checkForUpdates() {
|
|
40
|
+
try {
|
|
41
|
+
const res = await fetch('https://registry.npmjs.org/webpeel/latest', {
|
|
42
|
+
signal: AbortSignal.timeout(2000),
|
|
43
|
+
});
|
|
44
|
+
if (!res.ok)
|
|
45
|
+
return;
|
|
46
|
+
const data = await res.json();
|
|
47
|
+
const latest = data.version;
|
|
48
|
+
if (latest && latest !== cliVersion && cliVersion !== '0.0.0') {
|
|
49
|
+
console.error(`\n💡 WebPeel v${latest} available (you have v${cliVersion}). Update: npm i -g webpeel@latest\n`);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
catch { /* silently ignore — don't slow down the user */ }
|
|
53
|
+
}
|
|
54
|
+
// Fire and forget — don't await, don't block
|
|
55
|
+
void checkForUpdates();
|
|
38
56
|
/**
|
|
39
57
|
* Parse action strings into PageAction array
|
|
40
|
-
*
|
|
58
|
+
* Formats:
|
|
59
|
+
* click:.selector — click an element
|
|
60
|
+
* type:.selector=text — type text into an input
|
|
61
|
+
* fill:.selector=text — fill an input (replaces existing value)
|
|
62
|
+
* scroll:down:500 — scroll direction + amount
|
|
63
|
+
* scroll:bottom — scroll to bottom (legacy)
|
|
64
|
+
* scroll:top — scroll to top (legacy)
|
|
65
|
+
* wait:2000 — wait N ms
|
|
66
|
+
* press:Enter — press a keyboard key
|
|
67
|
+
* hover:.selector — hover over an element
|
|
68
|
+
* waitFor:.selector — wait for a selector to appear
|
|
69
|
+
* select:.selector=value — select dropdown option
|
|
70
|
+
* screenshot — take a screenshot
|
|
41
71
|
*/
|
|
42
72
|
function parseActions(actionStrings) {
|
|
43
73
|
return actionStrings.map(str => {
|
|
@@ -48,8 +78,25 @@ function parseActions(actionStrings) {
|
|
|
48
78
|
return { type: 'wait', ms: parseInt(value) || 1000 };
|
|
49
79
|
case 'click':
|
|
50
80
|
return { type: 'click', selector: value };
|
|
51
|
-
case 'scroll':
|
|
52
|
-
|
|
81
|
+
case 'scroll': {
|
|
82
|
+
// scroll:down:500 or scroll:bottom or scroll:500
|
|
83
|
+
const parts = value.split(':');
|
|
84
|
+
const dir = parts[0];
|
|
85
|
+
if (dir === 'top' || dir === 'bottom') {
|
|
86
|
+
return { type: 'scroll', to: dir };
|
|
87
|
+
}
|
|
88
|
+
if (dir === 'down' || dir === 'up' || dir === 'left' || dir === 'right') {
|
|
89
|
+
const amount = parseInt(parts[1] || '500', 10);
|
|
90
|
+
return { type: 'scroll', direction: dir, amount };
|
|
91
|
+
}
|
|
92
|
+
// Bare number: absolute position
|
|
93
|
+
const num = parseInt(dir, 10);
|
|
94
|
+
if (!isNaN(num)) {
|
|
95
|
+
return { type: 'scroll', to: num };
|
|
96
|
+
}
|
|
97
|
+
// Default: scroll to bottom
|
|
98
|
+
return { type: 'scroll', to: 'bottom' };
|
|
99
|
+
}
|
|
53
100
|
case 'type': {
|
|
54
101
|
const [sel, ...text] = value.split('=');
|
|
55
102
|
return { type: 'type', selector: sel, value: text.join('=') };
|
|
@@ -58,12 +105,18 @@ function parseActions(actionStrings) {
|
|
|
58
105
|
const [sel, ...text] = value.split('=');
|
|
59
106
|
return { type: 'fill', selector: sel, value: text.join('=') };
|
|
60
107
|
}
|
|
108
|
+
case 'select': {
|
|
109
|
+
const [sel, ...vals] = value.split('=');
|
|
110
|
+
return { type: 'select', selector: sel, value: vals.join('=') };
|
|
111
|
+
}
|
|
61
112
|
case 'press':
|
|
62
113
|
return { type: 'press', key: value };
|
|
63
114
|
case 'hover':
|
|
64
115
|
return { type: 'hover', selector: value };
|
|
65
116
|
case 'waitFor':
|
|
66
117
|
return { type: 'waitForSelector', selector: value };
|
|
118
|
+
case 'screenshot':
|
|
119
|
+
return { type: 'screenshot' };
|
|
67
120
|
default:
|
|
68
121
|
throw new Error(`Unknown action type: ${type}`);
|
|
69
122
|
}
|
|
@@ -334,6 +387,26 @@ program
|
|
|
334
387
|
}
|
|
335
388
|
if (error instanceof Error) {
|
|
336
389
|
console.error(`\nError: ${error.message}`);
|
|
390
|
+
// Provide actionable hints based on error type
|
|
391
|
+
const msg = error.message.toLowerCase();
|
|
392
|
+
if (msg.includes('timeout') || msg.includes('timed out')) {
|
|
393
|
+
console.error('\n💡 Hint: Try --render for JS-heavy sites, or --wait 5000 to wait longer.');
|
|
394
|
+
}
|
|
395
|
+
else if (msg.includes('blocked') || msg.includes('403') || msg.includes('cloudflare')) {
|
|
396
|
+
console.error('\n💡 Hint: Try --stealth to bypass bot detection (uses more credits).');
|
|
397
|
+
}
|
|
398
|
+
else if (msg.includes('enotfound') || msg.includes('getaddrinfo')) {
|
|
399
|
+
console.error('\n💡 Hint: Could not resolve hostname. Check the URL is correct.');
|
|
400
|
+
}
|
|
401
|
+
else if (msg.includes('econnrefused') || msg.includes('econnreset')) {
|
|
402
|
+
console.error('\n💡 Hint: Connection refused. The site may be down or blocking requests.');
|
|
403
|
+
}
|
|
404
|
+
else if (msg.includes('certificate') || msg.includes('ssl') || msg.includes('tls')) {
|
|
405
|
+
console.error('\n💡 Hint: SSL/TLS error. The site may have an invalid certificate.');
|
|
406
|
+
}
|
|
407
|
+
else if (msg.includes('usage') || msg.includes('quota') || msg.includes('limit')) {
|
|
408
|
+
console.error('\n💡 Hint: Run `webpeel usage` to check your quota, or `webpeel login` to authenticate.');
|
|
409
|
+
}
|
|
337
410
|
}
|
|
338
411
|
else {
|
|
339
412
|
console.error('\nError: Unknown error occurred');
|
|
@@ -345,8 +418,10 @@ program
|
|
|
345
418
|
// Search command
|
|
346
419
|
program
|
|
347
420
|
.command('search <query>')
|
|
348
|
-
.description('Search
|
|
421
|
+
.description('Search the web (DuckDuckGo by default, or Brave with --provider brave)')
|
|
349
422
|
.option('-n, --count <n>', 'Number of results (1-10)', '5')
|
|
423
|
+
.option('--provider <provider>', 'Search provider: duckduckgo (default) or brave')
|
|
424
|
+
.option('--search-api-key <key>', 'API key for the search provider (or env WEBPEEL_BRAVE_API_KEY)')
|
|
350
425
|
.option('--json', 'Output as JSON')
|
|
351
426
|
.option('-s, --silent', 'Silent mode')
|
|
352
427
|
.action(async (query, options) => {
|
|
@@ -361,61 +436,21 @@ program
|
|
|
361
436
|
}
|
|
362
437
|
const spinner = isSilent ? null : ora('Searching...').start();
|
|
363
438
|
try {
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
const
|
|
367
|
-
const
|
|
368
|
-
const
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
const html = await response.text();
|
|
377
|
-
const $ = load(html);
|
|
378
|
-
const results = [];
|
|
379
|
-
$('.result').each((_i, elem) => {
|
|
380
|
-
if (results.length >= count)
|
|
381
|
-
return;
|
|
382
|
-
const $result = $(elem);
|
|
383
|
-
const title = $result.find('.result__title').text().trim();
|
|
384
|
-
const rawUrl = $result.find('.result__a').attr('href') || '';
|
|
385
|
-
const snippet = $result.find('.result__snippet').text().trim();
|
|
386
|
-
if (!title || !rawUrl)
|
|
387
|
-
return;
|
|
388
|
-
// Extract actual URL from DuckDuckGo redirect
|
|
389
|
-
let url = rawUrl;
|
|
390
|
-
try {
|
|
391
|
-
const ddgUrl = new URL(rawUrl, 'https://duckduckgo.com');
|
|
392
|
-
const uddg = ddgUrl.searchParams.get('uddg');
|
|
393
|
-
if (uddg) {
|
|
394
|
-
url = decodeURIComponent(uddg);
|
|
395
|
-
}
|
|
396
|
-
}
|
|
397
|
-
catch {
|
|
398
|
-
// Use raw URL if parsing fails
|
|
399
|
-
}
|
|
400
|
-
// Validate final URL
|
|
401
|
-
try {
|
|
402
|
-
const parsed = new URL(url);
|
|
403
|
-
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
|
404
|
-
return;
|
|
405
|
-
}
|
|
406
|
-
url = parsed.href;
|
|
407
|
-
}
|
|
408
|
-
catch {
|
|
409
|
-
return;
|
|
410
|
-
}
|
|
411
|
-
results.push({
|
|
412
|
-
title: title.slice(0, 200),
|
|
413
|
-
url,
|
|
414
|
-
snippet: snippet.slice(0, 500)
|
|
415
|
-
});
|
|
439
|
+
const { getSearchProvider } = await import('./core/search-provider.js');
|
|
440
|
+
// Resolve provider
|
|
441
|
+
const providerId = (options.provider || 'duckduckgo');
|
|
442
|
+
const config = loadConfig();
|
|
443
|
+
const apiKey = options.searchApiKey
|
|
444
|
+
|| process.env.WEBPEEL_BRAVE_API_KEY
|
|
445
|
+
|| config.braveApiKey
|
|
446
|
+
|| undefined;
|
|
447
|
+
const provider = getSearchProvider(providerId);
|
|
448
|
+
const results = await provider.searchWeb(query, {
|
|
449
|
+
count: Math.min(Math.max(count, 1), 10),
|
|
450
|
+
apiKey,
|
|
416
451
|
});
|
|
417
452
|
if (spinner) {
|
|
418
|
-
spinner.succeed(`Found ${results.length} results`);
|
|
453
|
+
spinner.succeed(`Found ${results.length} results (${providerId})`);
|
|
419
454
|
}
|
|
420
455
|
// Show usage footer for free/anonymous users
|
|
421
456
|
if (usageCheck.usageInfo && !isSilent) {
|
|
@@ -447,6 +482,14 @@ program
|
|
|
447
482
|
}
|
|
448
483
|
if (error instanceof Error) {
|
|
449
484
|
console.error(`\nError: ${error.message}`);
|
|
485
|
+
const msg = error.message.toLowerCase();
|
|
486
|
+
if (msg.includes('brave') && msg.includes('api key')) {
|
|
487
|
+
console.error('\n💡 Hint: Set your Brave API key: webpeel config set braveApiKey YOUR_KEY');
|
|
488
|
+
console.error(' Or use free DuckDuckGo search (default, no key needed).');
|
|
489
|
+
}
|
|
490
|
+
else if (msg.includes('timeout') || msg.includes('timed out')) {
|
|
491
|
+
console.error('\n💡 Hint: Search timed out. Try a more specific query or try again.');
|
|
492
|
+
}
|
|
450
493
|
}
|
|
451
494
|
else {
|
|
452
495
|
console.error('\nError: Unknown error occurred');
|
|
@@ -787,21 +830,34 @@ program
|
|
|
787
830
|
.action(async () => {
|
|
788
831
|
await import('./mcp/server.js');
|
|
789
832
|
});
|
|
790
|
-
// Config command
|
|
833
|
+
// Config command — webpeel config [get|set] [key] [value]
|
|
791
834
|
program
|
|
792
835
|
.command('config')
|
|
793
836
|
.description('View or update CLI configuration')
|
|
794
|
-
.argument('[
|
|
837
|
+
.argument('[action]', '"get <key>", "set <key> <value>", or omit for overview')
|
|
838
|
+
.argument('[key]', 'Config key')
|
|
795
839
|
.argument('[value]', 'Value to set')
|
|
796
|
-
.action(async (key, value) => {
|
|
840
|
+
.action(async (action, key, value) => {
|
|
797
841
|
const config = loadConfig();
|
|
798
|
-
|
|
842
|
+
// Settable config keys (safe for user modification)
|
|
843
|
+
const SETTABLE_KEYS = {
|
|
844
|
+
braveApiKey: 'Brave Search API key',
|
|
845
|
+
};
|
|
846
|
+
const maskSecret = (k, v) => {
|
|
847
|
+
if (!v)
|
|
848
|
+
return '(not set)';
|
|
849
|
+
if (k === 'apiKey' || k === 'braveApiKey')
|
|
850
|
+
return v.slice(0, 4) + '...' + v.slice(-4);
|
|
851
|
+
return String(v);
|
|
852
|
+
};
|
|
853
|
+
if (!action) {
|
|
799
854
|
// Show all config
|
|
800
855
|
console.log('WebPeel CLI Configuration');
|
|
801
856
|
console.log(` Config file: ~/.webpeel/config.json`);
|
|
802
857
|
console.log('');
|
|
803
|
-
console.log(` apiKey:
|
|
804
|
-
console.log(`
|
|
858
|
+
console.log(` apiKey: ${maskSecret('apiKey', config.apiKey)}`);
|
|
859
|
+
console.log(` braveApiKey: ${maskSecret('braveApiKey', config.braveApiKey)}`);
|
|
860
|
+
console.log(` planTier: ${config.planTier || 'free'}`);
|
|
805
861
|
console.log(` anonymousUsage: ${config.anonymousUsage}`);
|
|
806
862
|
const stats = cacheStats();
|
|
807
863
|
console.log('');
|
|
@@ -809,21 +865,52 @@ program
|
|
|
809
865
|
console.log(` entries: ${stats.entries}`);
|
|
810
866
|
console.log(` size: ${(stats.sizeBytes / 1024).toFixed(1)} KB`);
|
|
811
867
|
console.log(` dir: ${stats.dir}`);
|
|
868
|
+
console.log('');
|
|
869
|
+
console.log(' Settable keys: ' + Object.keys(SETTABLE_KEYS).join(', '));
|
|
870
|
+
console.log(' Usage: webpeel config set <key> <value>');
|
|
812
871
|
process.exit(0);
|
|
813
872
|
}
|
|
814
|
-
if (
|
|
815
|
-
|
|
816
|
-
|
|
873
|
+
if (action === 'set') {
|
|
874
|
+
if (!key) {
|
|
875
|
+
console.error('Usage: webpeel config set <key> <value>');
|
|
876
|
+
console.error('Settable keys: ' + Object.keys(SETTABLE_KEYS).join(', '));
|
|
877
|
+
process.exit(1);
|
|
878
|
+
}
|
|
879
|
+
if (!(key in SETTABLE_KEYS)) {
|
|
880
|
+
console.error(`Cannot set "${key}". Settable keys: ${Object.keys(SETTABLE_KEYS).join(', ')}`);
|
|
881
|
+
process.exit(1);
|
|
882
|
+
}
|
|
883
|
+
if (!value) {
|
|
884
|
+
console.error(`Usage: webpeel config set ${key} <value>`);
|
|
885
|
+
process.exit(1);
|
|
886
|
+
}
|
|
887
|
+
config[key] = value;
|
|
888
|
+
saveConfig(config);
|
|
889
|
+
console.log(`✓ ${key} saved`);
|
|
890
|
+
process.exit(0);
|
|
891
|
+
}
|
|
892
|
+
if (action === 'get') {
|
|
893
|
+
const lookupKey = key || '';
|
|
894
|
+
const val = config[lookupKey];
|
|
817
895
|
if (val !== undefined) {
|
|
818
|
-
console.log(
|
|
896
|
+
console.log(maskSecret(lookupKey, String(val)));
|
|
819
897
|
}
|
|
820
898
|
else {
|
|
821
|
-
console.error(`Unknown config key: ${
|
|
899
|
+
console.error(`Unknown config key: ${lookupKey}`);
|
|
822
900
|
process.exit(1);
|
|
823
901
|
}
|
|
902
|
+
process.exit(0);
|
|
903
|
+
}
|
|
904
|
+
// Legacy: `webpeel config <key>` — treat action as the key name
|
|
905
|
+
const val = config[action];
|
|
906
|
+
if (val !== undefined) {
|
|
907
|
+
console.log(maskSecret(action, String(val)));
|
|
908
|
+
}
|
|
909
|
+
else {
|
|
910
|
+
console.error(`Unknown config key or action: ${action}`);
|
|
911
|
+
console.error('Usage: webpeel config [get|set] [key] [value]');
|
|
912
|
+
process.exit(1);
|
|
824
913
|
}
|
|
825
|
-
// Note: Setting config values directly is not supported for security
|
|
826
|
-
// Use `webpeel login` for API key, plan is fetched from server
|
|
827
914
|
process.exit(0);
|
|
828
915
|
});
|
|
829
916
|
// Cache management command
|
|
@@ -1058,10 +1145,121 @@ program
|
|
|
1058
1145
|
process.exit(1);
|
|
1059
1146
|
}
|
|
1060
1147
|
});
|
|
1061
|
-
// Jobs command -
|
|
1148
|
+
// Jobs command - search job boards (LinkedIn, Indeed, Glassdoor)
|
|
1062
1149
|
program
|
|
1063
|
-
.command('jobs')
|
|
1064
|
-
.description('
|
|
1150
|
+
.command('jobs <keywords>')
|
|
1151
|
+
.description('Search job boards for listings (LinkedIn, Indeed, Glassdoor)')
|
|
1152
|
+
.option('-l, --location <location>', 'Location filter')
|
|
1153
|
+
.option('-s, --source <source>', 'Job board: glassdoor, indeed, or linkedin (default: linkedin)', 'linkedin')
|
|
1154
|
+
.option('-n, --limit <number>', 'Max results (default: 25)', '25')
|
|
1155
|
+
.option('-d, --details <number>', 'Fetch full details for top N results (default: 0)', '0')
|
|
1156
|
+
.option('--json', 'Output raw JSON')
|
|
1157
|
+
.option('--timeout <ms>', 'Request timeout in ms (default: 30000)', '30000')
|
|
1158
|
+
.option('--silent', 'Silent mode (no spinner)')
|
|
1159
|
+
.action(async (keywords, options) => {
|
|
1160
|
+
const spinner = options.silent ? null : ora('Searching jobs...').start();
|
|
1161
|
+
try {
|
|
1162
|
+
const { searchJobs } = await import('./core/jobs.js');
|
|
1163
|
+
const source = (['glassdoor', 'indeed', 'linkedin'].includes(options.source) ? options.source : 'linkedin');
|
|
1164
|
+
const limit = Math.min(Math.max(parseInt(options.limit, 10) || 25, 1), 100);
|
|
1165
|
+
const fetchDetails = Math.min(Math.max(parseInt(options.details, 10) || 0, 0), limit);
|
|
1166
|
+
const timeout = parseInt(options.timeout, 10) || 30000;
|
|
1167
|
+
const result = await searchJobs({
|
|
1168
|
+
keywords,
|
|
1169
|
+
location: options.location,
|
|
1170
|
+
source,
|
|
1171
|
+
limit,
|
|
1172
|
+
fetchDetails,
|
|
1173
|
+
timeout,
|
|
1174
|
+
});
|
|
1175
|
+
if (spinner)
|
|
1176
|
+
spinner.stop();
|
|
1177
|
+
// --json: raw output
|
|
1178
|
+
if (options.json) {
|
|
1179
|
+
await writeStdout(JSON.stringify(result, null, 2) + '\n');
|
|
1180
|
+
process.exit(0);
|
|
1181
|
+
}
|
|
1182
|
+
// Formatted table output
|
|
1183
|
+
const totalLabel = result.totalFound >= 1000
|
|
1184
|
+
? `${(result.totalFound / 1000).toFixed(0).replace(/\.0$/, '')}k+`
|
|
1185
|
+
: String(result.totalFound);
|
|
1186
|
+
const locationLabel = options.location ? ` in ${options.location}` : '';
|
|
1187
|
+
console.log(`\n🔍 Found ${totalLabel} ${keywords} jobs${locationLabel} (${result.source})\n`);
|
|
1188
|
+
if (result.jobs.length === 0) {
|
|
1189
|
+
console.log(' No jobs found.\n');
|
|
1190
|
+
process.exit(0);
|
|
1191
|
+
}
|
|
1192
|
+
// Column widths
|
|
1193
|
+
const colNum = 3;
|
|
1194
|
+
const colTitle = 40;
|
|
1195
|
+
const colCompany = 18;
|
|
1196
|
+
const colLocation = 16;
|
|
1197
|
+
const colSalary = 14;
|
|
1198
|
+
const colPosted = 10;
|
|
1199
|
+
const pad = (s, w) => s.length > w ? s.slice(0, w - 1) + '…' : s.padEnd(w);
|
|
1200
|
+
const rpad = (s, w) => s.padStart(w);
|
|
1201
|
+
// Header
|
|
1202
|
+
console.log(` ${rpad('#', colNum)} ${pad('Title', colTitle)} ${pad('Company', colCompany)} ${pad('Location', colLocation)} ${pad('Salary', colSalary)} ${pad('Posted', colPosted)}`);
|
|
1203
|
+
// Rows
|
|
1204
|
+
result.jobs.forEach((job, i) => {
|
|
1205
|
+
const title = job.title + (job.remote ? ' 🏠' : '');
|
|
1206
|
+
console.log(` ${rpad(String(i + 1), colNum)} ${pad(title, colTitle)} ${pad(job.company, colCompany)} ${pad(job.location, colLocation)} ${pad(job.salary || '', colSalary)} ${pad(job.postedAt || '', colPosted)}`);
|
|
1207
|
+
});
|
|
1208
|
+
// Footer
|
|
1209
|
+
const timeSec = (result.timeTakenMs / 1000).toFixed(1);
|
|
1210
|
+
const detailsNote = fetchDetails > 0 ? ` | Details: ${result.detailsFetched} fetched` : '';
|
|
1211
|
+
console.log(`\nFetched ${result.jobs.length} jobs in ${timeSec}s${detailsNote}\n`);
|
|
1212
|
+
// Detailed job cards (when --details > 0)
|
|
1213
|
+
const detailedJobs = result.jobs.filter((j) => 'description' in j);
|
|
1214
|
+
for (let i = 0; i < detailedJobs.length; i++) {
|
|
1215
|
+
const job = detailedJobs[i];
|
|
1216
|
+
console.log(`━━━ Job #${i + 1}: ${job.title} ━━━`);
|
|
1217
|
+
const metaParts = [`Company: ${job.company}`, `Location: ${job.location}`];
|
|
1218
|
+
if (job.salary)
|
|
1219
|
+
metaParts.push(`Salary: ${job.salary}`);
|
|
1220
|
+
console.log(metaParts.join(' | '));
|
|
1221
|
+
const typeParts = [];
|
|
1222
|
+
if (job.employmentType)
|
|
1223
|
+
typeParts.push(`Type: ${job.employmentType}`);
|
|
1224
|
+
if (job.experienceLevel)
|
|
1225
|
+
typeParts.push(`Level: ${job.experienceLevel}`);
|
|
1226
|
+
if (job.postedAt)
|
|
1227
|
+
typeParts.push(`Posted: ${job.postedAt}`);
|
|
1228
|
+
if (typeParts.length > 0)
|
|
1229
|
+
console.log(typeParts.join(' | '));
|
|
1230
|
+
if (job.description) {
|
|
1231
|
+
console.log(`\nDescription:\n ${job.description.slice(0, 500).replace(/\n/g, '\n ')}`);
|
|
1232
|
+
}
|
|
1233
|
+
if (job.requirements && job.requirements.length > 0) {
|
|
1234
|
+
console.log(`\nRequirements:`);
|
|
1235
|
+
job.requirements.forEach(r => console.log(` • ${r}`));
|
|
1236
|
+
}
|
|
1237
|
+
if (job.responsibilities && job.responsibilities.length > 0) {
|
|
1238
|
+
console.log(`\nResponsibilities:`);
|
|
1239
|
+
job.responsibilities.forEach(r => console.log(` • ${r}`));
|
|
1240
|
+
}
|
|
1241
|
+
if (job.benefits && job.benefits.length > 0) {
|
|
1242
|
+
console.log(`\nBenefits:`);
|
|
1243
|
+
job.benefits.forEach(b => console.log(` • ${b}`));
|
|
1244
|
+
}
|
|
1245
|
+
if (job.applyUrl) {
|
|
1246
|
+
console.log(`\nApply: ${job.applyUrl}`);
|
|
1247
|
+
}
|
|
1248
|
+
console.log('');
|
|
1249
|
+
}
|
|
1250
|
+
process.exit(0);
|
|
1251
|
+
}
|
|
1252
|
+
catch (error) {
|
|
1253
|
+
if (spinner)
|
|
1254
|
+
spinner.fail('Job search failed');
|
|
1255
|
+
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1256
|
+
process.exit(1);
|
|
1257
|
+
}
|
|
1258
|
+
});
|
|
1259
|
+
// Queue command - list active async jobs (crawl, batch)
|
|
1260
|
+
program
|
|
1261
|
+
.command('queue')
|
|
1262
|
+
.description('List active async jobs (crawl, batch)')
|
|
1065
1263
|
.option('--json', 'Output as JSON')
|
|
1066
1264
|
.action(async (options) => {
|
|
1067
1265
|
try {
|
|
@@ -1079,12 +1277,13 @@ program
|
|
|
1079
1277
|
if (!response.ok) {
|
|
1080
1278
|
throw new Error(`API error: HTTP ${response.status}`);
|
|
1081
1279
|
}
|
|
1082
|
-
const
|
|
1280
|
+
const data = await response.json();
|
|
1281
|
+
const jobs = data.jobs || data;
|
|
1083
1282
|
if (options.json) {
|
|
1084
|
-
console.log(JSON.stringify(
|
|
1283
|
+
console.log(JSON.stringify(data, null, 2));
|
|
1085
1284
|
}
|
|
1086
1285
|
else {
|
|
1087
|
-
if (jobs.length === 0) {
|
|
1286
|
+
if (!Array.isArray(jobs) || jobs.length === 0) {
|
|
1088
1287
|
console.log('No active jobs.');
|
|
1089
1288
|
}
|
|
1090
1289
|
else {
|
|
@@ -1160,6 +1359,205 @@ program
|
|
|
1160
1359
|
process.exit(1);
|
|
1161
1360
|
}
|
|
1162
1361
|
});
|
|
1362
|
+
// Answer command - search + fetch + LLM-generated answer
|
|
1363
|
+
program
|
|
1364
|
+
.command('answer <question>')
|
|
1365
|
+
.description('Ask a question, search the web, and get an AI-generated answer with citations (BYOK)')
|
|
1366
|
+
.option('--provider <provider>', 'Search provider: duckduckgo (default) or brave')
|
|
1367
|
+
.option('--search-api-key <key>', 'Search provider API key (or env WEBPEEL_BRAVE_API_KEY)')
|
|
1368
|
+
.option('--llm <provider>', 'LLM provider: openai, anthropic, or google (required)')
|
|
1369
|
+
.option('--llm-api-key <key>', 'LLM API key (or env OPENAI_API_KEY / ANTHROPIC_API_KEY / GOOGLE_API_KEY)')
|
|
1370
|
+
.option('--llm-model <model>', 'LLM model name (optional, uses provider default)')
|
|
1371
|
+
.option('--max-sources <n>', 'Maximum sources to fetch (1-10, default 5)', '5')
|
|
1372
|
+
.option('--json', 'Output as JSON')
|
|
1373
|
+
.option('-s, --silent', 'Silent mode')
|
|
1374
|
+
.action(async (question, options) => {
|
|
1375
|
+
const spinner = options.silent ? null : ora('Thinking...').start();
|
|
1376
|
+
try {
|
|
1377
|
+
const { answerQuestion } = await import('./core/answer.js');
|
|
1378
|
+
const config = loadConfig();
|
|
1379
|
+
const llmProvider = options.llm;
|
|
1380
|
+
if (!llmProvider || !['openai', 'anthropic', 'google'].includes(llmProvider)) {
|
|
1381
|
+
console.error('Error: --llm is required (openai, anthropic, or google)');
|
|
1382
|
+
process.exit(1);
|
|
1383
|
+
}
|
|
1384
|
+
const llmApiKey = options.llmApiKey
|
|
1385
|
+
|| process.env.OPENAI_API_KEY
|
|
1386
|
+
|| process.env.ANTHROPIC_API_KEY
|
|
1387
|
+
|| process.env.GOOGLE_API_KEY
|
|
1388
|
+
|| '';
|
|
1389
|
+
if (!llmApiKey) {
|
|
1390
|
+
console.error('Error: --llm-api-key is required (or set OPENAI_API_KEY / ANTHROPIC_API_KEY / GOOGLE_API_KEY)');
|
|
1391
|
+
process.exit(1);
|
|
1392
|
+
}
|
|
1393
|
+
const searchProvider = (options.provider || 'duckduckgo');
|
|
1394
|
+
const searchApiKey = options.searchApiKey
|
|
1395
|
+
|| process.env.WEBPEEL_BRAVE_API_KEY
|
|
1396
|
+
|| config.braveApiKey
|
|
1397
|
+
|| undefined;
|
|
1398
|
+
const maxSources = Math.min(Math.max(parseInt(options.maxSources) || 5, 1), 10);
|
|
1399
|
+
if (spinner)
|
|
1400
|
+
spinner.text = 'Searching the web...';
|
|
1401
|
+
const result = await answerQuestion({
|
|
1402
|
+
question,
|
|
1403
|
+
searchProvider,
|
|
1404
|
+
searchApiKey,
|
|
1405
|
+
llmProvider,
|
|
1406
|
+
llmApiKey,
|
|
1407
|
+
llmModel: options.llmModel,
|
|
1408
|
+
maxSources,
|
|
1409
|
+
stream: false,
|
|
1410
|
+
});
|
|
1411
|
+
if (spinner)
|
|
1412
|
+
spinner.succeed('Done');
|
|
1413
|
+
if (options.json) {
|
|
1414
|
+
const jsonStr = JSON.stringify(result, null, 2);
|
|
1415
|
+
await new Promise((resolve, reject) => {
|
|
1416
|
+
process.stdout.write(jsonStr + '\n', (err) => {
|
|
1417
|
+
if (err)
|
|
1418
|
+
reject(err);
|
|
1419
|
+
else
|
|
1420
|
+
resolve();
|
|
1421
|
+
});
|
|
1422
|
+
});
|
|
1423
|
+
}
|
|
1424
|
+
else {
|
|
1425
|
+
console.log(`\n${result.answer}`);
|
|
1426
|
+
console.log(`\nSources:`);
|
|
1427
|
+
result.citations.forEach((c, i) => {
|
|
1428
|
+
console.log(` [${i + 1}] ${c.title}`);
|
|
1429
|
+
console.log(` ${c.url}`);
|
|
1430
|
+
});
|
|
1431
|
+
console.log(`\nModel: ${result.llmModel} (${result.llmProvider})`);
|
|
1432
|
+
}
|
|
1433
|
+
await cleanup();
|
|
1434
|
+
process.exit(0);
|
|
1435
|
+
}
|
|
1436
|
+
catch (error) {
|
|
1437
|
+
if (spinner)
|
|
1438
|
+
spinner.fail('Answer generation failed');
|
|
1439
|
+
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1440
|
+
await cleanup();
|
|
1441
|
+
process.exit(1);
|
|
1442
|
+
}
|
|
1443
|
+
});
|
|
1444
|
+
// Screenshot command
|
|
1445
|
+
program
|
|
1446
|
+
.command('screenshot <url>')
|
|
1447
|
+
.description('Take a screenshot of a URL and save as PNG/JPEG')
|
|
1448
|
+
.option('--full-page', 'Capture full page (not just viewport)')
|
|
1449
|
+
.option('--width <px>', 'Viewport width in pixels (default: 1280)', parseInt)
|
|
1450
|
+
.option('--height <px>', 'Viewport height in pixels (default: 720)', parseInt)
|
|
1451
|
+
.option('--format <fmt>', 'Image format: png (default) or jpeg', 'png')
|
|
1452
|
+
.option('--quality <n>', 'JPEG quality 1-100 (ignored for PNG)', parseInt)
|
|
1453
|
+
.option('-w, --wait <ms>', 'Wait time after page load (ms)', parseInt)
|
|
1454
|
+
.option('-t, --timeout <ms>', 'Request timeout (ms)', parseInt, 30000)
|
|
1455
|
+
.option('--stealth', 'Use stealth mode to bypass bot detection')
|
|
1456
|
+
.option('--action <actions...>', 'Page actions before screenshot (e.g., "click:.btn" "wait:2000")')
|
|
1457
|
+
.option('-o, --output <path>', 'Output file path (default: screenshot.png)')
|
|
1458
|
+
.option('-s, --silent', 'Silent mode (no spinner)')
|
|
1459
|
+
.option('--json', 'Output base64 JSON instead of binary file')
|
|
1460
|
+
.action(async (url, options) => {
|
|
1461
|
+
// Validate URL
|
|
1462
|
+
try {
|
|
1463
|
+
const parsed = new URL(url);
|
|
1464
|
+
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
|
1465
|
+
console.error('Error: Only HTTP and HTTPS protocols are allowed');
|
|
1466
|
+
process.exit(1);
|
|
1467
|
+
}
|
|
1468
|
+
}
|
|
1469
|
+
catch {
|
|
1470
|
+
console.error(`Error: Invalid URL format: ${url}`);
|
|
1471
|
+
process.exit(1);
|
|
1472
|
+
}
|
|
1473
|
+
// Check usage quota
|
|
1474
|
+
const usageCheck = await checkUsage();
|
|
1475
|
+
if (!usageCheck.allowed) {
|
|
1476
|
+
console.error(usageCheck.message);
|
|
1477
|
+
process.exit(1);
|
|
1478
|
+
}
|
|
1479
|
+
const spinner = options.silent ? null : ora('Taking screenshot...').start();
|
|
1480
|
+
try {
|
|
1481
|
+
// Validate format
|
|
1482
|
+
const format = options.format?.toLowerCase();
|
|
1483
|
+
if (format && !['png', 'jpeg', 'jpg'].includes(format)) {
|
|
1484
|
+
console.error('Error: --format must be png, jpeg, or jpg');
|
|
1485
|
+
process.exit(1);
|
|
1486
|
+
}
|
|
1487
|
+
// Parse actions
|
|
1488
|
+
let actions;
|
|
1489
|
+
if (options.action && options.action.length > 0) {
|
|
1490
|
+
try {
|
|
1491
|
+
actions = parseActions(options.action);
|
|
1492
|
+
}
|
|
1493
|
+
catch (e) {
|
|
1494
|
+
console.error(`Error: ${e.message}`);
|
|
1495
|
+
process.exit(1);
|
|
1496
|
+
}
|
|
1497
|
+
}
|
|
1498
|
+
const { takeScreenshot } = await import('./core/screenshot.js');
|
|
1499
|
+
const result = await takeScreenshot(url, {
|
|
1500
|
+
fullPage: options.fullPage || false,
|
|
1501
|
+
width: options.width,
|
|
1502
|
+
height: options.height,
|
|
1503
|
+
format: format || 'png',
|
|
1504
|
+
quality: options.quality,
|
|
1505
|
+
waitFor: options.wait,
|
|
1506
|
+
timeout: options.timeout,
|
|
1507
|
+
stealth: options.stealth || false,
|
|
1508
|
+
actions,
|
|
1509
|
+
});
|
|
1510
|
+
if (spinner) {
|
|
1511
|
+
spinner.succeed(`Screenshot taken (${result.format})`);
|
|
1512
|
+
}
|
|
1513
|
+
// Show usage footer for free/anonymous users
|
|
1514
|
+
if (usageCheck.usageInfo && !options.silent) {
|
|
1515
|
+
showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, true);
|
|
1516
|
+
}
|
|
1517
|
+
if (options.json) {
|
|
1518
|
+
// Output JSON with base64
|
|
1519
|
+
const jsonStr = JSON.stringify({
|
|
1520
|
+
url: result.url,
|
|
1521
|
+
format: result.format,
|
|
1522
|
+
contentType: result.contentType,
|
|
1523
|
+
screenshot: result.screenshot,
|
|
1524
|
+
}, null, 2);
|
|
1525
|
+
await new Promise((resolve, reject) => {
|
|
1526
|
+
process.stdout.write(jsonStr + '\n', (err) => {
|
|
1527
|
+
if (err)
|
|
1528
|
+
reject(err);
|
|
1529
|
+
else
|
|
1530
|
+
resolve();
|
|
1531
|
+
});
|
|
1532
|
+
});
|
|
1533
|
+
}
|
|
1534
|
+
else {
|
|
1535
|
+
// Save to file
|
|
1536
|
+
const ext = result.format === 'jpeg' ? 'jpg' : 'png';
|
|
1537
|
+
const outputPath = options.output || `screenshot.${ext}`;
|
|
1538
|
+
const buffer = Buffer.from(result.screenshot, 'base64');
|
|
1539
|
+
writeFileSync(outputPath, buffer);
|
|
1540
|
+
if (!options.silent) {
|
|
1541
|
+
console.error(`Screenshot saved to: ${outputPath} (${(buffer.length / 1024).toFixed(1)} KB)`);
|
|
1542
|
+
}
|
|
1543
|
+
}
|
|
1544
|
+
await cleanup();
|
|
1545
|
+
process.exit(0);
|
|
1546
|
+
}
|
|
1547
|
+
catch (error) {
|
|
1548
|
+
if (spinner) {
|
|
1549
|
+
spinner.fail('Screenshot failed');
|
|
1550
|
+
}
|
|
1551
|
+
if (error instanceof Error) {
|
|
1552
|
+
console.error(`\nError: ${error.message}`);
|
|
1553
|
+
}
|
|
1554
|
+
else {
|
|
1555
|
+
console.error('\nError: Unknown error occurred');
|
|
1556
|
+
}
|
|
1557
|
+
await cleanup();
|
|
1558
|
+
process.exit(1);
|
|
1559
|
+
}
|
|
1560
|
+
});
|
|
1163
1561
|
program.parse();
|
|
1164
1562
|
// ============================================================
|
|
1165
1563
|
// Shared output helper
|