webpeel 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -4
- package/dist/cli-auth.d.ts +6 -0
- package/dist/cli-auth.d.ts.map +1 -1
- package/dist/cli-auth.js.map +1 -1
- package/dist/cli.js +463 -22
- package/dist/cli.js.map +1 -1
- package/dist/core/challenge-detection.d.ts.map +1 -1
- package/dist/core/challenge-detection.js +39 -6
- package/dist/core/challenge-detection.js.map +1 -1
- package/dist/core/extract-listings.d.ts.map +1 -1
- package/dist/core/extract-listings.js +167 -36
- package/dist/core/extract-listings.js.map +1 -1
- package/dist/core/fetcher.d.ts +6 -0
- package/dist/core/fetcher.d.ts.map +1 -1
- package/dist/core/fetcher.js +147 -11
- package/dist/core/fetcher.js.map +1 -1
- package/dist/core/hotel-search.d.ts +121 -0
- package/dist/core/hotel-search.d.ts.map +1 -0
- package/dist/core/hotel-search.js +381 -0
- package/dist/core/hotel-search.js.map +1 -0
- package/dist/core/llm-extract.d.ts +42 -0
- package/dist/core/llm-extract.d.ts.map +1 -0
- package/dist/core/llm-extract.js +144 -0
- package/dist/core/llm-extract.js.map +1 -0
- package/dist/core/profiles.d.ts +48 -0
- package/dist/core/profiles.d.ts.map +1 -0
- package/dist/core/profiles.js +211 -0
- package/dist/core/profiles.js.map +1 -0
- package/dist/core/schema-extraction.d.ts +67 -0
- package/dist/core/schema-extraction.d.ts.map +1 -0
- package/dist/core/schema-extraction.js +353 -0
- package/dist/core/schema-extraction.js.map +1 -0
- package/dist/core/strategies.d.ts +5 -0
- package/dist/core/strategies.d.ts.map +1 -1
- package/dist/core/strategies.js +9 -2
- package/dist/core/strategies.js.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/types.d.ts +6 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -14,7 +14,8 @@
|
|
|
14
14
|
*/
|
|
15
15
|
import { Command } from 'commander';
|
|
16
16
|
import ora from 'ora';
|
|
17
|
-
import { writeFileSync, readFileSync } from 'fs';
|
|
17
|
+
import { writeFileSync, readFileSync, existsSync } from 'fs';
|
|
18
|
+
import { getProfilePath, loadStorageState, touchProfile, listProfiles, deleteProfile, createProfile } from './core/profiles.js';
|
|
18
19
|
import { peel, peelBatch, cleanup } from './index.js';
|
|
19
20
|
import { checkUsage, showUsageFooter, handleLogin, handleLogout, handleUsage, loadConfig, saveConfig } from './cli-auth.js';
|
|
20
21
|
import { getCache, setCache, parseTTL, clearCache, cacheStats } from './cache.js';
|
|
@@ -154,14 +155,18 @@ program
|
|
|
154
155
|
.option('--raw', 'Return full page without smart content extraction')
|
|
155
156
|
.option('--action <actions...>', 'Page actions before scraping (e.g., "click:.btn" "wait:2000" "scroll:bottom")')
|
|
156
157
|
.option('--extract <json>', 'Extract structured data using CSS selectors (JSON object of field:selector pairs)')
|
|
157
|
-
.option('--llm-extract
|
|
158
|
+
.option('--llm-extract [instruction]', 'Extract structured data using LLM (optional instruction, e.g. "extract hotel names and prices")')
|
|
158
159
|
.option('--llm-key <key>', 'LLM API key for AI features (or use OPENAI_API_KEY env var)')
|
|
160
|
+
.option('--llm-model <model>', 'LLM model to use (default: gpt-4o-mini)')
|
|
161
|
+
.option('--llm-base-url <url>', 'LLM API base URL (default: https://api.openai.com/v1)')
|
|
159
162
|
.option('--summary', 'Generate AI summary of content (requires --llm-key or OPENAI_API_KEY)')
|
|
160
163
|
.option('--location <country>', 'ISO country code for geo-targeting (e.g., "US", "DE", "JP")')
|
|
161
164
|
.option('--language <lang>', 'Language preference (e.g., "en", "de", "ja")')
|
|
162
165
|
.option('--max-tokens <n>', 'Maximum token count for output (truncate if exceeded)', parseInt)
|
|
163
166
|
.option('--budget <n>', 'Smart token budget — distill content to fit within N tokens (heuristic, no LLM key needed)', parseInt)
|
|
164
167
|
.option('--extract-all', 'Auto-detect and extract repeated listing items (e.g., search results)')
|
|
168
|
+
.option('--schema <name>', 'Force a specific extraction schema by name or domain (e.g., "booking.com", "amazon")')
|
|
169
|
+
.option('--list-schemas', 'List all available extraction schemas and their supported domains')
|
|
165
170
|
.option('--scroll-extract [count]', 'Scroll page N times to load lazy content, then extract (implies --render)', (v) => parseInt(v, 10))
|
|
166
171
|
.option('--csv', 'Output extraction results as CSV')
|
|
167
172
|
.option('--table', 'Output extraction results as a formatted table')
|
|
@@ -182,6 +187,31 @@ program
|
|
|
182
187
|
options.budget = 4000;
|
|
183
188
|
}
|
|
184
189
|
const isJson = options.json;
|
|
190
|
+
// --- --list-schemas: print all available schemas and exit ---
|
|
191
|
+
if (options.listSchemas) {
|
|
192
|
+
const { loadBundledSchemas } = await import('./core/schema-extraction.js');
|
|
193
|
+
const schemas = loadBundledSchemas();
|
|
194
|
+
if (isJson) {
|
|
195
|
+
await writeStdout(JSON.stringify(schemas.map(s => ({
|
|
196
|
+
name: s.name,
|
|
197
|
+
version: s.version,
|
|
198
|
+
domains: s.domains,
|
|
199
|
+
urlPatterns: s.urlPatterns,
|
|
200
|
+
})), null, 2) + '\n');
|
|
201
|
+
}
|
|
202
|
+
else {
|
|
203
|
+
console.log(`\nAvailable extraction schemas (${schemas.length}):\n`);
|
|
204
|
+
for (const s of schemas) {
|
|
205
|
+
console.log(` ${s.name} (v${s.version})`);
|
|
206
|
+
console.log(` Domains: ${s.domains.join(', ')}`);
|
|
207
|
+
if (s.urlPatterns && s.urlPatterns.length > 0) {
|
|
208
|
+
console.log(` URL patterns: ${s.urlPatterns.join(', ')}`);
|
|
209
|
+
}
|
|
210
|
+
console.log('');
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
process.exit(0);
|
|
214
|
+
}
|
|
185
215
|
// --- #5: Concise error for missing URL (no help dump) ---
|
|
186
216
|
if (!url || url.trim() === '') {
|
|
187
217
|
if (isJson) {
|
|
@@ -265,6 +295,33 @@ program
|
|
|
265
295
|
cachedResult.content = distillToBudget(cachedResult.content, options.budget, fmt);
|
|
266
296
|
cachedResult.tokens = Math.ceil(cachedResult.content.length / 4);
|
|
267
297
|
}
|
|
298
|
+
// LLM extraction from cached content
|
|
299
|
+
if (options.llmExtract) {
|
|
300
|
+
const { extractWithLLM } = await import('./core/llm-extract.js');
|
|
301
|
+
const llmCfgCached = loadConfig();
|
|
302
|
+
const llmApiKeyCached = options.llmKey || llmCfgCached.llm?.apiKey || process.env.OPENAI_API_KEY;
|
|
303
|
+
if (!llmApiKeyCached) {
|
|
304
|
+
console.error('Error: LLM extraction requires an API key.\nSet OPENAI_API_KEY environment variable or use --llm-key <key>');
|
|
305
|
+
process.exit(1);
|
|
306
|
+
}
|
|
307
|
+
const llmModelCached = options.llmModel || llmCfgCached.llm?.model || process.env.WEBPEEL_LLM_MODEL || 'gpt-4o-mini';
|
|
308
|
+
const llmBaseUrlCached = options.llmBaseUrl || llmCfgCached.llm?.baseUrl || process.env.WEBPEEL_LLM_BASE_URL || 'https://api.openai.com/v1';
|
|
309
|
+
const llmInstructionCached = typeof options.llmExtract === 'string' ? options.llmExtract : undefined;
|
|
310
|
+
const llmResultCached = await extractWithLLM({
|
|
311
|
+
content: cachedResult.content,
|
|
312
|
+
instruction: llmInstructionCached,
|
|
313
|
+
apiKey: llmApiKeyCached,
|
|
314
|
+
model: llmModelCached,
|
|
315
|
+
baseUrl: llmBaseUrlCached,
|
|
316
|
+
});
|
|
317
|
+
await writeStdout(JSON.stringify(llmResultCached.items, null, 2) + '\n');
|
|
318
|
+
if (!options.silent) {
|
|
319
|
+
const { input, output } = llmResultCached.tokensUsed;
|
|
320
|
+
const costStr = llmResultCached.cost !== undefined ? ` | Est. cost: $${llmResultCached.cost.toFixed(6)}` : '';
|
|
321
|
+
console.error(`\n🤖 LLM extraction: ${llmResultCached.items.length} items | ${input} input + ${output} output tokens${costStr} | model: ${llmResultCached.model}`);
|
|
322
|
+
}
|
|
323
|
+
process.exit(0);
|
|
324
|
+
}
|
|
268
325
|
await outputResult(cachedResult, options, { cached: true });
|
|
269
326
|
process.exit(0);
|
|
270
327
|
}
|
|
@@ -302,16 +359,15 @@ program
|
|
|
302
359
|
// Parse extract
|
|
303
360
|
let extract;
|
|
304
361
|
if (options.llmExtract) {
|
|
305
|
-
// LLM-based extraction
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
if (!extract.llmApiKey) {
|
|
313
|
-
throw Object.assign(new Error('--llm-extract requires OPENAI_API_KEY environment variable'), { _code: 'FETCH_FAILED' });
|
|
362
|
+
// LLM-based extraction is handled post-fetch (after peel returns markdown).
|
|
363
|
+
// Early-validate that an API key is available so we fail fast.
|
|
364
|
+
const llmCfg = loadConfig();
|
|
365
|
+
const llmApiKey = options.llmKey || llmCfg.llm?.apiKey || process.env.OPENAI_API_KEY;
|
|
366
|
+
if (!llmApiKey) {
|
|
367
|
+
throw Object.assign(new Error('LLM extraction requires an API key.\n' +
|
|
368
|
+
'Set OPENAI_API_KEY environment variable or use --llm-key <key>'), { _code: 'FETCH_FAILED' });
|
|
314
369
|
}
|
|
370
|
+
// Do NOT set extract here — peel runs normally, LLM extraction happens below.
|
|
315
371
|
}
|
|
316
372
|
else if (options.extract) {
|
|
317
373
|
// CSS-based extraction
|
|
@@ -351,6 +407,26 @@ program
|
|
|
351
407
|
locationOptions.languages = [options.language];
|
|
352
408
|
}
|
|
353
409
|
}
|
|
410
|
+
// ── Resolve --profile: name → path + storage state ─────────────────
|
|
411
|
+
let resolvedProfileDir;
|
|
412
|
+
let resolvedStorageState;
|
|
413
|
+
let resolvedProfileName;
|
|
414
|
+
if (options.profile) {
|
|
415
|
+
const profilePath = getProfilePath(options.profile);
|
|
416
|
+
if (profilePath) {
|
|
417
|
+
// It's a named profile in ~/.webpeel/profiles/
|
|
418
|
+
resolvedProfileDir = profilePath;
|
|
419
|
+
resolvedStorageState = loadStorageState(options.profile) ?? undefined;
|
|
420
|
+
resolvedProfileName = options.profile;
|
|
421
|
+
}
|
|
422
|
+
else if (existsSync(options.profile)) {
|
|
423
|
+
// It's a raw directory path (backward compat)
|
|
424
|
+
resolvedProfileDir = options.profile;
|
|
425
|
+
}
|
|
426
|
+
else {
|
|
427
|
+
exitWithJsonError(`Profile "${options.profile}" not found. Run "webpeel profile list" to see available profiles.`, 'PROFILE_NOT_FOUND');
|
|
428
|
+
}
|
|
429
|
+
}
|
|
354
430
|
// Build peel options
|
|
355
431
|
// --stealth auto-enables --render (stealth requires browser)
|
|
356
432
|
// --action auto-enables --render (actions require browser)
|
|
@@ -391,8 +467,9 @@ program
|
|
|
391
467
|
extract,
|
|
392
468
|
images: options.images || false,
|
|
393
469
|
location: locationOptions,
|
|
394
|
-
profileDir:
|
|
470
|
+
profileDir: resolvedProfileDir,
|
|
395
471
|
headed: options.headed || false,
|
|
472
|
+
storageState: resolvedStorageState,
|
|
396
473
|
};
|
|
397
474
|
// Add summary option if requested
|
|
398
475
|
if (options.summary) {
|
|
@@ -419,6 +496,10 @@ program
|
|
|
419
496
|
}
|
|
420
497
|
// Fetch the page
|
|
421
498
|
const result = await peel(url, peelOptions);
|
|
499
|
+
// Update lastUsed timestamp for named profiles
|
|
500
|
+
if (resolvedProfileName) {
|
|
501
|
+
touchProfile(resolvedProfileName);
|
|
502
|
+
}
|
|
422
503
|
if (spinner) {
|
|
423
504
|
spinner.succeed(`Fetched in ${result.elapsed}ms using ${result.method} method`);
|
|
424
505
|
}
|
|
@@ -477,19 +558,77 @@ program
|
|
|
477
558
|
console.error(`⚠ ${warningMsg}`);
|
|
478
559
|
}
|
|
479
560
|
}
|
|
561
|
+
// --- LLM-based extraction (post-peel) ---
|
|
562
|
+
if (options.llmExtract) {
|
|
563
|
+
const { extractWithLLM } = await import('./core/llm-extract.js');
|
|
564
|
+
const llmCfg = loadConfig();
|
|
565
|
+
const llmApiKey = options.llmKey || llmCfg.llm?.apiKey || process.env.OPENAI_API_KEY;
|
|
566
|
+
const llmModel = options.llmModel || llmCfg.llm?.model || process.env.WEBPEEL_LLM_MODEL || 'gpt-4o-mini';
|
|
567
|
+
const llmBaseUrl = options.llmBaseUrl || llmCfg.llm?.baseUrl || process.env.WEBPEEL_LLM_BASE_URL || 'https://api.openai.com/v1';
|
|
568
|
+
const llmInstruction = typeof options.llmExtract === 'string' ? options.llmExtract : undefined;
|
|
569
|
+
const llmResult = await extractWithLLM({
|
|
570
|
+
content: result.content,
|
|
571
|
+
instruction: llmInstruction,
|
|
572
|
+
apiKey: llmApiKey,
|
|
573
|
+
model: llmModel,
|
|
574
|
+
baseUrl: llmBaseUrl,
|
|
575
|
+
});
|
|
576
|
+
// Output structured items as JSON
|
|
577
|
+
await writeStdout(JSON.stringify(llmResult.items, null, 2) + '\n');
|
|
578
|
+
// Show token usage and estimated cost
|
|
579
|
+
if (!options.silent) {
|
|
580
|
+
const { input, output } = llmResult.tokensUsed;
|
|
581
|
+
const costStr = llmResult.cost !== undefined
|
|
582
|
+
? ` | Est. cost: $${llmResult.cost.toFixed(6)}`
|
|
583
|
+
: '';
|
|
584
|
+
console.error(`\n🤖 LLM extraction: ${llmResult.items.length} items | ${input} input + ${output} output tokens${costStr} | model: ${llmResult.model}`);
|
|
585
|
+
}
|
|
586
|
+
await cleanup();
|
|
587
|
+
process.exit(0);
|
|
588
|
+
}
|
|
480
589
|
// --- Extract-all / pagination / output formatting ---
|
|
481
590
|
const wantsExtractAll = options.extractAll || options.scrollExtract !== undefined;
|
|
482
591
|
const pagesCount = Math.min(Math.max(options.pages || 1, 1), 10);
|
|
483
592
|
if (wantsExtractAll) {
|
|
484
593
|
const { extractListings } = await import('./core/extract-listings.js');
|
|
485
594
|
const { findNextPageUrl } = await import('./core/paginate.js');
|
|
595
|
+
const { findSchemaForUrl, extractWithSchema, loadBundledSchemas } = await import('./core/schema-extraction.js');
|
|
596
|
+
// Resolve which schema to use (explicit --schema flag or auto-detect)
|
|
597
|
+
let activeSchema = null;
|
|
598
|
+
if (options.schema) {
|
|
599
|
+
// Find schema by name or domain match
|
|
600
|
+
const schemaQuery = options.schema.toLowerCase();
|
|
601
|
+
const allSchemas = loadBundledSchemas();
|
|
602
|
+
activeSchema = allSchemas.find(s => s.name.toLowerCase().includes(schemaQuery) ||
|
|
603
|
+
s.domains.some(d => d.toLowerCase().includes(schemaQuery))) ?? null;
|
|
604
|
+
if (!activeSchema && !options.silent) {
|
|
605
|
+
console.error(`Warning: No schema found for "${options.schema}", falling back to auto-detection`);
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
else {
|
|
609
|
+
// Auto-detect from URL
|
|
610
|
+
activeSchema = findSchemaForUrl(result.url || url);
|
|
611
|
+
}
|
|
486
612
|
// We need the raw HTML for extraction. Re-fetch with format=html if needed.
|
|
487
613
|
let allListings = [];
|
|
488
614
|
// Fetch HTML for extraction
|
|
489
615
|
const htmlResult = peelOptions.format === 'html'
|
|
490
616
|
? result
|
|
491
617
|
: await peel(url, { ...peelOptions, format: 'html', maxTokens: undefined });
|
|
492
|
-
|
|
618
|
+
// Try schema extraction first, fall back to generic
|
|
619
|
+
if (activeSchema) {
|
|
620
|
+
const schemaListings = extractWithSchema(htmlResult.content, activeSchema, result.url);
|
|
621
|
+
if (schemaListings.length > 0) {
|
|
622
|
+
allListings.push(...schemaListings);
|
|
623
|
+
}
|
|
624
|
+
else {
|
|
625
|
+
// Schema returned nothing — fall back to generic
|
|
626
|
+
allListings.push(...extractListings(htmlResult.content, result.url));
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
else {
|
|
630
|
+
allListings.push(...extractListings(htmlResult.content, result.url));
|
|
631
|
+
}
|
|
493
632
|
// Pagination: follow "Next" links
|
|
494
633
|
if (pagesCount > 1) {
|
|
495
634
|
let currentHtml = htmlResult.content;
|
|
@@ -500,7 +639,16 @@ program
|
|
|
500
639
|
break;
|
|
501
640
|
try {
|
|
502
641
|
const nextResult = await peel(nextUrl, { ...peelOptions, format: 'html', maxTokens: undefined });
|
|
503
|
-
|
|
642
|
+
let pageListings;
|
|
643
|
+
if (activeSchema) {
|
|
644
|
+
const schemaPage = extractWithSchema(nextResult.content, activeSchema, nextResult.url);
|
|
645
|
+
pageListings = schemaPage.length > 0
|
|
646
|
+
? schemaPage
|
|
647
|
+
: extractListings(nextResult.content, nextResult.url);
|
|
648
|
+
}
|
|
649
|
+
else {
|
|
650
|
+
pageListings = extractListings(nextResult.content, nextResult.url);
|
|
651
|
+
}
|
|
504
652
|
allListings.push(...pageListings);
|
|
505
653
|
currentHtml = nextResult.content;
|
|
506
654
|
currentUrl = nextResult.url;
|
|
@@ -658,7 +806,17 @@ program
|
|
|
658
806
|
.option('--csv', 'Output site-search results as CSV (requires --site)')
|
|
659
807
|
.option('--budget <n>', 'Token budget for site-search result content', parseInt)
|
|
660
808
|
.option('-s, --silent', 'Silent mode')
|
|
809
|
+
.option('--agent', 'Agent mode: sets --json, --silent, and --budget 4000 (override with --budget N)')
|
|
661
810
|
.action(async (query, options) => {
|
|
811
|
+
// --agent sets sensible defaults for AI agents; explicit flags override
|
|
812
|
+
if (options.agent) {
|
|
813
|
+
if (!options.json)
|
|
814
|
+
options.json = true;
|
|
815
|
+
if (!options.silent)
|
|
816
|
+
options.silent = true;
|
|
817
|
+
if (options.budget === undefined)
|
|
818
|
+
options.budget = 4000;
|
|
819
|
+
}
|
|
662
820
|
const isJson = options.json;
|
|
663
821
|
const isSilent = options.silent;
|
|
664
822
|
// --top overrides --count when both are provided
|
|
@@ -773,10 +931,24 @@ program
|
|
|
773
931
|
|| config.braveApiKey
|
|
774
932
|
|| undefined;
|
|
775
933
|
const provider = getSearchProvider(providerId);
|
|
776
|
-
|
|
934
|
+
let results = await provider.searchWeb(query, {
|
|
777
935
|
count: Math.min(Math.max(count, 1), 10),
|
|
778
936
|
apiKey,
|
|
779
937
|
});
|
|
938
|
+
// Apply budget to search results if requested (trim results to fit token budget)
|
|
939
|
+
if (options.budget && options.budget > 0 && results.length > 0) {
|
|
940
|
+
let totalTokens = 0;
|
|
941
|
+
let maxResults = 0;
|
|
942
|
+
for (const r of results) {
|
|
943
|
+
// Estimate ~4 chars per token for title + url + snippet
|
|
944
|
+
const resultTokens = Math.ceil((`${r.title || ''}\n${r.url || ''}\n${r.snippet || ''}`).length / 4);
|
|
945
|
+
if (totalTokens + resultTokens > options.budget)
|
|
946
|
+
break;
|
|
947
|
+
totalTokens += resultTokens;
|
|
948
|
+
maxResults++;
|
|
949
|
+
}
|
|
950
|
+
results = results.slice(0, Math.max(maxResults, 1));
|
|
951
|
+
}
|
|
780
952
|
if (spinner) {
|
|
781
953
|
spinner.succeed(`Found ${results.length} results (${providerId})`);
|
|
782
954
|
}
|
|
@@ -1370,24 +1542,52 @@ program
|
|
|
1370
1542
|
program
|
|
1371
1543
|
.command('config')
|
|
1372
1544
|
.description('View or update CLI configuration')
|
|
1373
|
-
.argument('[action]', '"get <key>", "set <key> <value>", or omit for overview')
|
|
1545
|
+
.argument('[action]', '"list", "get <key>", "set <key> <value>", or omit for overview')
|
|
1374
1546
|
.argument('[key]', 'Config key')
|
|
1375
1547
|
.argument('[value]', 'Value to set')
|
|
1376
1548
|
.action(async (action, key, value) => {
|
|
1377
1549
|
const config = loadConfig();
|
|
1378
1550
|
// Settable config keys (safe for user modification)
|
|
1551
|
+
// Supports dot-notation for nested keys (e.g., llm.apiKey)
|
|
1379
1552
|
const SETTABLE_KEYS = {
|
|
1380
1553
|
braveApiKey: 'Brave Search API key',
|
|
1554
|
+
'llm.apiKey': 'LLM API key for AI-powered extraction (OpenAI-compatible)',
|
|
1555
|
+
'llm.model': 'LLM model name (default: gpt-4o-mini)',
|
|
1556
|
+
'llm.baseUrl': 'LLM API base URL (default: https://api.openai.com/v1)',
|
|
1381
1557
|
};
|
|
1382
1558
|
const maskSecret = (k, v) => {
|
|
1383
1559
|
if (!v)
|
|
1384
1560
|
return '(not set)';
|
|
1385
|
-
if (k === 'apiKey' || k === 'braveApiKey')
|
|
1561
|
+
if (k === 'apiKey' || k === 'braveApiKey' || k === 'llm.apiKey') {
|
|
1386
1562
|
return v.slice(0, 4) + '...' + v.slice(-4);
|
|
1563
|
+
}
|
|
1387
1564
|
return String(v);
|
|
1388
1565
|
};
|
|
1389
|
-
|
|
1390
|
-
|
|
1566
|
+
/** Get a potentially nested value using dot-notation (e.g., "llm.apiKey") */
|
|
1567
|
+
function getNestedValue(obj, path) {
|
|
1568
|
+
const parts = path.split('.');
|
|
1569
|
+
let cur = obj;
|
|
1570
|
+
for (const part of parts) {
|
|
1571
|
+
if (cur == null || typeof cur !== 'object')
|
|
1572
|
+
return undefined;
|
|
1573
|
+
cur = cur[part];
|
|
1574
|
+
}
|
|
1575
|
+
return cur;
|
|
1576
|
+
}
|
|
1577
|
+
/** Set a potentially nested value using dot-notation (e.g., "llm.apiKey") */
|
|
1578
|
+
function setNestedValue(obj, path, val) {
|
|
1579
|
+
const parts = path.split('.');
|
|
1580
|
+
let cur = obj;
|
|
1581
|
+
for (let i = 0; i < parts.length - 1; i++) {
|
|
1582
|
+
const part = parts[i];
|
|
1583
|
+
if (cur[part] == null || typeof cur[part] !== 'object')
|
|
1584
|
+
cur[part] = {};
|
|
1585
|
+
cur = cur[part];
|
|
1586
|
+
}
|
|
1587
|
+
cur[parts[parts.length - 1]] = val;
|
|
1588
|
+
}
|
|
1589
|
+
if (!action || action === 'list') {
|
|
1590
|
+
// Show all config (also triggered by `webpeel config list`)
|
|
1391
1591
|
console.log('WebPeel CLI Configuration');
|
|
1392
1592
|
console.log(` Config file: ~/.webpeel/config.json`);
|
|
1393
1593
|
console.log('');
|
|
@@ -1395,6 +1595,11 @@ program
|
|
|
1395
1595
|
console.log(` braveApiKey: ${maskSecret('braveApiKey', config.braveApiKey)}`);
|
|
1396
1596
|
console.log(` planTier: ${config.planTier || 'free'}`);
|
|
1397
1597
|
console.log(` anonymousUsage: ${config.anonymousUsage}`);
|
|
1598
|
+
console.log('');
|
|
1599
|
+
console.log(' LLM:');
|
|
1600
|
+
console.log(` llm.apiKey: ${maskSecret('llm.apiKey', config.llm?.apiKey)}`);
|
|
1601
|
+
console.log(` llm.model: ${config.llm?.model || '(not set, default: gpt-4o-mini)'}`);
|
|
1602
|
+
console.log(` llm.baseUrl: ${config.llm?.baseUrl || '(not set, default: https://api.openai.com/v1)'}`);
|
|
1398
1603
|
const stats = cacheStats();
|
|
1399
1604
|
console.log('');
|
|
1400
1605
|
console.log(' Cache:');
|
|
@@ -1420,14 +1625,14 @@ program
|
|
|
1420
1625
|
console.error(`Usage: webpeel config set ${key} <value>`);
|
|
1421
1626
|
process.exit(1);
|
|
1422
1627
|
}
|
|
1423
|
-
config
|
|
1628
|
+
setNestedValue(config, key, value);
|
|
1424
1629
|
saveConfig(config);
|
|
1425
1630
|
console.log(`✓ ${key} saved`);
|
|
1426
1631
|
process.exit(0);
|
|
1427
1632
|
}
|
|
1428
1633
|
if (action === 'get') {
|
|
1429
1634
|
const lookupKey = key || '';
|
|
1430
|
-
const val = config[lookupKey];
|
|
1635
|
+
const val = getNestedValue(config, lookupKey) ?? config[lookupKey];
|
|
1431
1636
|
if (val !== undefined) {
|
|
1432
1637
|
console.log(maskSecret(lookupKey, String(val)));
|
|
1433
1638
|
}
|
|
@@ -1438,7 +1643,7 @@ program
|
|
|
1438
1643
|
process.exit(0);
|
|
1439
1644
|
}
|
|
1440
1645
|
// Legacy: `webpeel config <key>` — treat action as the key name
|
|
1441
|
-
const val = config[action];
|
|
1646
|
+
const val = getNestedValue(config, action) ?? config[action];
|
|
1442
1647
|
if (val !== undefined) {
|
|
1443
1648
|
console.log(maskSecret(action, String(val)));
|
|
1444
1649
|
}
|
|
@@ -2635,6 +2840,242 @@ applyCmd
|
|
|
2635
2840
|
process.exit(1);
|
|
2636
2841
|
}
|
|
2637
2842
|
});
|
|
2843
|
+
// ============================================================
|
|
2844
|
+
// Profile management commands
|
|
2845
|
+
// ============================================================
|
|
2846
|
+
const profileCmd = program
|
|
2847
|
+
.command('profile')
|
|
2848
|
+
.description('Manage named browser profiles (saved login sessions)');
|
|
2849
|
+
profileCmd
|
|
2850
|
+
.command('create <name>')
|
|
2851
|
+
.description('Create a new profile interactively (launches browser, log in, press Ctrl+C when done)')
|
|
2852
|
+
.option('--description <text>', 'Optional description for this profile')
|
|
2853
|
+
.action(async (name, opts) => {
|
|
2854
|
+
try {
|
|
2855
|
+
await createProfile(name, opts.description);
|
|
2856
|
+
process.exit(0);
|
|
2857
|
+
}
|
|
2858
|
+
catch (error) {
|
|
2859
|
+
console.error(`Error: ${error instanceof Error ? error.message : String(error)}`);
|
|
2860
|
+
process.exit(1);
|
|
2861
|
+
}
|
|
2862
|
+
});
|
|
2863
|
+
profileCmd
|
|
2864
|
+
.command('list')
|
|
2865
|
+
.description('List all saved browser profiles')
|
|
2866
|
+
.action(() => {
|
|
2867
|
+
const profiles = listProfiles();
|
|
2868
|
+
if (profiles.length === 0) {
|
|
2869
|
+
console.log('No profiles found.');
|
|
2870
|
+
console.log('');
|
|
2871
|
+
console.log('Create one with:');
|
|
2872
|
+
console.log(' webpeel profile create <name>');
|
|
2873
|
+
console.log('');
|
|
2874
|
+
console.log('Then use it with:');
|
|
2875
|
+
console.log(' webpeel <url> --profile <name>');
|
|
2876
|
+
process.exit(0);
|
|
2877
|
+
}
|
|
2878
|
+
console.log('');
|
|
2879
|
+
console.log('Saved profiles:');
|
|
2880
|
+
console.log('');
|
|
2881
|
+
// Column widths
|
|
2882
|
+
const nameW = Math.max(8, ...profiles.map((p) => p.name.length));
|
|
2883
|
+
const domainsW = Math.max(10, ...profiles.map((p) => (p.domains.join(', ') || '(none)').length));
|
|
2884
|
+
const header = 'Name'.padEnd(nameW) + ' ' +
|
|
2885
|
+
'Domains'.padEnd(domainsW) + ' ' +
|
|
2886
|
+
'Last Used'.padEnd(12) + ' ' +
|
|
2887
|
+
'Created';
|
|
2888
|
+
console.log(header);
|
|
2889
|
+
console.log('─'.repeat(header.length + 4));
|
|
2890
|
+
for (const p of profiles) {
|
|
2891
|
+
const domainsStr = p.domains.length > 0 ? p.domains.join(', ') : '(none)';
|
|
2892
|
+
const lastUsed = formatRelativeTime(new Date(p.lastUsed));
|
|
2893
|
+
const created = new Date(p.created).toISOString().split('T')[0];
|
|
2894
|
+
console.log(p.name.padEnd(nameW) + ' ' +
|
|
2895
|
+
domainsStr.padEnd(domainsW) + ' ' +
|
|
2896
|
+
lastUsed.padEnd(12) + ' ' +
|
|
2897
|
+
created);
|
|
2898
|
+
}
|
|
2899
|
+
console.log('');
|
|
2900
|
+
process.exit(0);
|
|
2901
|
+
});
|
|
2902
|
+
profileCmd
|
|
2903
|
+
.command('show <name>')
|
|
2904
|
+
.description('Show details for a profile')
|
|
2905
|
+
.action((name) => {
|
|
2906
|
+
const profilePath = getProfilePath(name);
|
|
2907
|
+
if (!profilePath) {
|
|
2908
|
+
console.error(`Error: Profile "${name}" not found.`);
|
|
2909
|
+
console.error('Run "webpeel profile list" to see available profiles.');
|
|
2910
|
+
process.exit(1);
|
|
2911
|
+
}
|
|
2912
|
+
try {
|
|
2913
|
+
const meta = JSON.parse(readFileSync(`${profilePath}/metadata.json`, 'utf-8'));
|
|
2914
|
+
console.log('');
|
|
2915
|
+
console.log(`Profile: ${meta.name}`);
|
|
2916
|
+
if (meta.description)
|
|
2917
|
+
console.log(`Description: ${meta.description}`);
|
|
2918
|
+
console.log(`Created: ${new Date(meta.created).toLocaleString()}`);
|
|
2919
|
+
console.log(`Last used: ${new Date(meta.lastUsed).toLocaleString()}`);
|
|
2920
|
+
console.log(`Domains: ${meta.domains.length > 0 ? meta.domains.join(', ') : '(none)'}`);
|
|
2921
|
+
console.log(`Directory: ${profilePath}`);
|
|
2922
|
+
console.log('');
|
|
2923
|
+
process.exit(0);
|
|
2924
|
+
}
|
|
2925
|
+
catch (e) {
|
|
2926
|
+
console.error(`Error reading profile: ${e instanceof Error ? e.message : String(e)}`);
|
|
2927
|
+
process.exit(1);
|
|
2928
|
+
}
|
|
2929
|
+
});
|
|
2930
|
+
profileCmd
|
|
2931
|
+
.command('delete <name>')
|
|
2932
|
+
.description('Delete a saved profile')
|
|
2933
|
+
.action((name) => {
|
|
2934
|
+
const deleted = deleteProfile(name);
|
|
2935
|
+
if (deleted) {
|
|
2936
|
+
console.log(`Profile "${name}" deleted.`);
|
|
2937
|
+
process.exit(0);
|
|
2938
|
+
}
|
|
2939
|
+
else {
|
|
2940
|
+
console.error(`Error: Profile "${name}" not found.`);
|
|
2941
|
+
console.error('Run "webpeel profile list" to see available profiles.');
|
|
2942
|
+
process.exit(1);
|
|
2943
|
+
}
|
|
2944
|
+
});
|
|
2945
|
+
// ── Hotels command ─────────────────────────────────────────────────────────────
|
|
2946
|
+
program
|
|
2947
|
+
.command('hotels <destination>')
|
|
2948
|
+
.description('Search multiple travel sites for hotels (Kayak, Booking.com, Google Travel)')
|
|
2949
|
+
.option('--checkin <date>', 'Check-in date (ISO or relative, e.g. "tomorrow", "2026-02-20"). Default: tomorrow')
|
|
2950
|
+
.option('--checkout <date>', 'Check-out date (ISO or relative). Default: checkin + 1 day')
|
|
2951
|
+
.option('--sort <method>', 'Sort by: price, rating, value (default: price)', 'price')
|
|
2952
|
+
.option('--limit <n>', 'Max results (default: 20)', '20')
|
|
2953
|
+
.option('--source <name...>', 'Only use specific source(s): kayak, booking, google (repeatable)')
|
|
2954
|
+
.option('--json', 'Output as JSON')
|
|
2955
|
+
.option('--stealth', 'Use stealth mode for all sources')
|
|
2956
|
+
.option('-s, --silent', 'Suppress progress messages')
|
|
2957
|
+
.action(async (destination, options) => {
|
|
2958
|
+
const isJson = options.json;
|
|
2959
|
+
const isSilent = options.silent;
|
|
2960
|
+
// Build checkin/checkout
|
|
2961
|
+
const { parseDate, addDays: hotelAddDays } = await import('./core/hotel-search.js');
|
|
2962
|
+
let checkinStr;
|
|
2963
|
+
let checkoutStr;
|
|
2964
|
+
try {
|
|
2965
|
+
checkinStr = parseDate(options.checkin ?? 'tomorrow');
|
|
2966
|
+
checkoutStr = options.checkout
|
|
2967
|
+
? parseDate(options.checkout)
|
|
2968
|
+
: hotelAddDays(checkinStr, 1);
|
|
2969
|
+
}
|
|
2970
|
+
catch (err) {
|
|
2971
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
2972
|
+
if (isJson) {
|
|
2973
|
+
await writeStdout(JSON.stringify({ error: msg, code: 'INVALID_DATE' }) + '\n');
|
|
2974
|
+
}
|
|
2975
|
+
else {
|
|
2976
|
+
console.error(`Error: ${msg}`);
|
|
2977
|
+
}
|
|
2978
|
+
process.exit(1);
|
|
2979
|
+
}
|
|
2980
|
+
const sortMethod = (['price', 'rating', 'value'].includes(options.sort)
|
|
2981
|
+
? options.sort
|
|
2982
|
+
: 'price');
|
|
2983
|
+
const limit = Math.max(1, parseInt(options.limit, 10) || 20);
|
|
2984
|
+
const sources = options.source
|
|
2985
|
+
? (Array.isArray(options.source) ? options.source : [options.source])
|
|
2986
|
+
: undefined;
|
|
2987
|
+
// Spinner per-source progress (non-silent, non-JSON)
|
|
2988
|
+
let searchSpinner = null;
|
|
2989
|
+
if (!isSilent && !isJson) {
|
|
2990
|
+
searchSpinner = ora(`Searching hotels in ${destination}...`).start();
|
|
2991
|
+
}
|
|
2992
|
+
else if (!isSilent && !isJson) {
|
|
2993
|
+
console.error(`⏳ Searching kayak.com...`);
|
|
2994
|
+
console.error(`⏳ Searching booking.com...`);
|
|
2995
|
+
console.error(`⏳ Searching google.com...`);
|
|
2996
|
+
}
|
|
2997
|
+
try {
|
|
2998
|
+
const { searchHotels } = await import('./core/hotel-search.js');
|
|
2999
|
+
const result = await searchHotels({
|
|
3000
|
+
destination,
|
|
3001
|
+
checkin: checkinStr,
|
|
3002
|
+
checkout: checkoutStr,
|
|
3003
|
+
sort: sortMethod,
|
|
3004
|
+
limit,
|
|
3005
|
+
sources,
|
|
3006
|
+
stealth: options.stealth,
|
|
3007
|
+
silent: isSilent,
|
|
3008
|
+
});
|
|
3009
|
+
if (searchSpinner)
|
|
3010
|
+
searchSpinner.stop();
|
|
3011
|
+
// Show per-source status
|
|
3012
|
+
if (!isSilent && !isJson) {
|
|
3013
|
+
for (const src of result.sources) {
|
|
3014
|
+
if (src.status === 'ok') {
|
|
3015
|
+
console.error(`✅ ${src.name}: ${src.count} hotels found`);
|
|
3016
|
+
}
|
|
3017
|
+
else {
|
|
3018
|
+
console.error(`❌ ${src.name}: ${src.status}${src.error ? ' — ' + src.error : ''}`);
|
|
3019
|
+
}
|
|
3020
|
+
}
|
|
3021
|
+
}
|
|
3022
|
+
if (isJson) {
|
|
3023
|
+
await writeStdout(JSON.stringify(result, null, 2) + '\n');
|
|
3024
|
+
await cleanup();
|
|
3025
|
+
process.exit(0);
|
|
3026
|
+
}
|
|
3027
|
+
// Human-readable table output
|
|
3028
|
+
const { formatDate: fmtDate } = {
|
|
3029
|
+
formatDate: (iso) => {
|
|
3030
|
+
const d = new Date(iso + 'T12:00:00Z');
|
|
3031
|
+
return d.toLocaleDateString('en-US', { month: 'short', day: 'numeric', year: 'numeric', timeZone: 'UTC' });
|
|
3032
|
+
},
|
|
3033
|
+
};
|
|
3034
|
+
const ci = fmtDate(result.checkin);
|
|
3035
|
+
const co = fmtDate(result.checkout);
|
|
3036
|
+
console.log(`\n🏨 Hotels in ${result.destination}`);
|
|
3037
|
+
console.log(` ${ci} → ${co} | Sorted by ${sortMethod}\n`);
|
|
3038
|
+
if (result.results.length === 0) {
|
|
3039
|
+
console.log(' No hotels found.\n');
|
|
3040
|
+
}
|
|
3041
|
+
else {
|
|
3042
|
+
const colNum = 3;
|
|
3043
|
+
const colName = 42;
|
|
3044
|
+
const colPrice = 8;
|
|
3045
|
+
const colRating = 8;
|
|
3046
|
+
const colSource = 10;
|
|
3047
|
+
const padEnd = (s, w) => s.length > w ? s.slice(0, w - 1) + '…' : s.padEnd(w);
|
|
3048
|
+
const padStart = (s, w) => s.padStart(w);
|
|
3049
|
+
console.log(` ${padStart('#', colNum)} ${padEnd('Hotel', colName)} ${padEnd('Price', colPrice)} ${padEnd('Rating', colRating)} ${padEnd('Source', colSource)}`);
|
|
3050
|
+
result.results.forEach((hotel, i) => {
|
|
3051
|
+
const priceStr = hotel.priceDisplay || '—';
|
|
3052
|
+
const ratingStr = hotel.rating !== null ? String(hotel.rating) : '—';
|
|
3053
|
+
console.log(` ${padStart(String(i + 1), colNum)} ${padEnd(hotel.name, colName)} ${padEnd(priceStr, colPrice)} ${padEnd(ratingStr, colRating)} ${padEnd(hotel.source, colSource)}`);
|
|
3054
|
+
});
|
|
3055
|
+
console.log('');
|
|
3056
|
+
const sourceSummary = result.sources
|
|
3057
|
+
.map(s => `${s.name} (${s.count} ${s.status === 'ok' ? '✅' : s.status === 'blocked' ? '🚫' : '❌'})`)
|
|
3058
|
+
.join(' | ');
|
|
3059
|
+
console.log(`Sources: ${sourceSummary}`);
|
|
3060
|
+
}
|
|
3061
|
+
console.log('');
|
|
3062
|
+
await cleanup();
|
|
3063
|
+
process.exit(0);
|
|
3064
|
+
}
|
|
3065
|
+
catch (error) {
|
|
3066
|
+
if (searchSpinner)
|
|
3067
|
+
searchSpinner.fail('Hotel search failed');
|
|
3068
|
+
const msg = error instanceof Error ? error.message : 'Unknown error';
|
|
3069
|
+
if (isJson) {
|
|
3070
|
+
await writeStdout(JSON.stringify({ error: msg, code: 'FETCH_FAILED' }) + '\n');
|
|
3071
|
+
}
|
|
3072
|
+
else {
|
|
3073
|
+
console.error(`\nError: ${msg}`);
|
|
3074
|
+
}
|
|
3075
|
+
await cleanup();
|
|
3076
|
+
process.exit(1);
|
|
3077
|
+
}
|
|
3078
|
+
});
|
|
2638
3079
|
program.parse();
|
|
2639
3080
|
// ============================================================
|
|
2640
3081
|
// Time formatting helper
|