webpeel 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +39 -4
  2. package/dist/cli-auth.d.ts +6 -0
  3. package/dist/cli-auth.d.ts.map +1 -1
  4. package/dist/cli-auth.js.map +1 -1
  5. package/dist/cli.js +506 -23
  6. package/dist/cli.js.map +1 -1
  7. package/dist/core/challenge-detection.d.ts.map +1 -1
  8. package/dist/core/challenge-detection.js +39 -6
  9. package/dist/core/challenge-detection.js.map +1 -1
  10. package/dist/core/extract-listings.d.ts.map +1 -1
  11. package/dist/core/extract-listings.js +167 -36
  12. package/dist/core/extract-listings.js.map +1 -1
  13. package/dist/core/fetcher.d.ts +14 -1
  14. package/dist/core/fetcher.d.ts.map +1 -1
  15. package/dist/core/fetcher.js +176 -14
  16. package/dist/core/fetcher.js.map +1 -1
  17. package/dist/core/hotel-search.d.ts +123 -0
  18. package/dist/core/hotel-search.d.ts.map +1 -0
  19. package/dist/core/hotel-search.js +383 -0
  20. package/dist/core/hotel-search.js.map +1 -0
  21. package/dist/core/llm-extract.d.ts +56 -0
  22. package/dist/core/llm-extract.d.ts.map +1 -0
  23. package/dist/core/llm-extract.js +264 -0
  24. package/dist/core/llm-extract.js.map +1 -0
  25. package/dist/core/profiles.d.ts +48 -0
  26. package/dist/core/profiles.d.ts.map +1 -0
  27. package/dist/core/profiles.js +211 -0
  28. package/dist/core/profiles.js.map +1 -0
  29. package/dist/core/schema-extraction.d.ts +67 -0
  30. package/dist/core/schema-extraction.d.ts.map +1 -0
  31. package/dist/core/schema-extraction.js +353 -0
  32. package/dist/core/schema-extraction.js.map +1 -0
  33. package/dist/core/strategies.d.ts +11 -0
  34. package/dist/core/strategies.d.ts.map +1 -1
  35. package/dist/core/strategies.js +17 -5
  36. package/dist/core/strategies.js.map +1 -1
  37. package/dist/index.d.ts.map +1 -1
  38. package/dist/index.js +3 -1
  39. package/dist/index.js.map +1 -1
  40. package/dist/mcp/server.js +47 -3
  41. package/dist/mcp/server.js.map +1 -1
  42. package/dist/types.d.ts +16 -0
  43. package/dist/types.d.ts.map +1 -1
  44. package/dist/types.js.map +1 -1
  45. package/package.json +1 -1
package/dist/cli.js CHANGED
@@ -14,7 +14,8 @@
14
14
  */
15
15
  import { Command } from 'commander';
16
16
  import ora from 'ora';
17
- import { writeFileSync, readFileSync } from 'fs';
17
+ import { writeFileSync, readFileSync, existsSync } from 'fs';
18
+ import { getProfilePath, loadStorageState, touchProfile, listProfiles, deleteProfile, createProfile } from './core/profiles.js';
18
19
  import { peel, peelBatch, cleanup } from './index.js';
19
20
  import { checkUsage, showUsageFooter, handleLogin, handleLogout, handleUsage, loadConfig, saveConfig } from './cli-auth.js';
20
21
  import { getCache, setCache, parseTTL, clearCache, cacheStats } from './cache.js';
@@ -130,6 +131,7 @@ program
130
131
  .argument('[url]', 'URL to fetch')
131
132
  .option('-r, --render', 'Use headless browser (for JS-heavy sites)')
132
133
  .option('--stealth', 'Use stealth mode to bypass bot detection (auto-enables --render)')
134
+ .option('--proxy <url>', 'Proxy URL for requests (http://host:port, socks5://user:pass@host:port)')
133
135
  .option('-w, --wait <ms>', 'Wait time after page load (ms)', parseInt)
134
136
  .option('--html', 'Output raw HTML instead of markdown')
135
137
  .option('--text', 'Output plain text instead of markdown')
@@ -154,14 +156,19 @@ program
154
156
  .option('--raw', 'Return full page without smart content extraction')
155
157
  .option('--action <actions...>', 'Page actions before scraping (e.g., "click:.btn" "wait:2000" "scroll:bottom")')
156
158
  .option('--extract <json>', 'Extract structured data using CSS selectors (JSON object of field:selector pairs)')
157
- .option('--llm-extract <prompt>', 'AI-powered extraction using LLM (requires OPENAI_API_KEY env var)')
159
+ .option('--llm-extract [instruction]', 'Extract structured data using LLM (optional instruction, e.g. "extract hotel names and prices")')
160
+ .option('--extract-schema <schema>', 'JSON schema for structured extraction (requires LLM key). Pass inline JSON or @file.json')
158
161
  .option('--llm-key <key>', 'LLM API key for AI features (or use OPENAI_API_KEY env var)')
162
+ .option('--llm-model <model>', 'LLM model to use (default: gpt-4o-mini)')
163
+ .option('--llm-base-url <url>', 'LLM API base URL (default: https://api.openai.com/v1)')
159
164
  .option('--summary', 'Generate AI summary of content (requires --llm-key or OPENAI_API_KEY)')
160
165
  .option('--location <country>', 'ISO country code for geo-targeting (e.g., "US", "DE", "JP")')
161
166
  .option('--language <lang>', 'Language preference (e.g., "en", "de", "ja")')
162
167
  .option('--max-tokens <n>', 'Maximum token count for output (truncate if exceeded)', parseInt)
163
168
  .option('--budget <n>', 'Smart token budget — distill content to fit within N tokens (heuristic, no LLM key needed)', parseInt)
164
169
  .option('--extract-all', 'Auto-detect and extract repeated listing items (e.g., search results)')
170
+ .option('--schema <name>', 'Force a specific extraction schema by name or domain (e.g., "booking.com", "amazon")')
171
+ .option('--list-schemas', 'List all available extraction schemas and their supported domains')
165
172
  .option('--scroll-extract [count]', 'Scroll page N times to load lazy content, then extract (implies --render)', (v) => parseInt(v, 10))
166
173
  .option('--csv', 'Output extraction results as CSV')
167
174
  .option('--table', 'Output extraction results as a formatted table')
@@ -182,6 +189,31 @@ program
182
189
  options.budget = 4000;
183
190
  }
184
191
  const isJson = options.json;
192
+ // --- --list-schemas: print all available schemas and exit ---
193
+ if (options.listSchemas) {
194
+ const { loadBundledSchemas } = await import('./core/schema-extraction.js');
195
+ const schemas = loadBundledSchemas();
196
+ if (isJson) {
197
+ await writeStdout(JSON.stringify(schemas.map(s => ({
198
+ name: s.name,
199
+ version: s.version,
200
+ domains: s.domains,
201
+ urlPatterns: s.urlPatterns,
202
+ })), null, 2) + '\n');
203
+ }
204
+ else {
205
+ console.log(`\nAvailable extraction schemas (${schemas.length}):\n`);
206
+ for (const s of schemas) {
207
+ console.log(` ${s.name} (v${s.version})`);
208
+ console.log(` Domains: ${s.domains.join(', ')}`);
209
+ if (s.urlPatterns && s.urlPatterns.length > 0) {
210
+ console.log(` URL patterns: ${s.urlPatterns.join(', ')}`);
211
+ }
212
+ console.log('');
213
+ }
214
+ }
215
+ process.exit(0);
216
+ }
185
217
  // --- #5: Concise error for missing URL (no help dump) ---
186
218
  if (!url || url.trim() === '') {
187
219
  if (isJson) {
@@ -265,6 +297,49 @@ program
265
297
  cachedResult.content = distillToBudget(cachedResult.content, options.budget, fmt);
266
298
  cachedResult.tokens = Math.ceil(cachedResult.content.length / 4);
267
299
  }
300
+ // LLM extraction from cached content
301
+ if (options.llmExtract || options.extractSchema) {
302
+ const { extractWithLLM } = await import('./core/llm-extract.js');
303
+ const llmCfgCached = loadConfig();
304
+ const llmApiKeyCached = options.llmKey || llmCfgCached.llm?.apiKey || process.env.OPENAI_API_KEY;
305
+ if (!llmApiKeyCached) {
306
+ console.error('Error: LLM extraction requires an API key.\nSet OPENAI_API_KEY environment variable or use --llm-key <key>');
307
+ process.exit(1);
308
+ }
309
+ const llmModelCached = options.llmModel || llmCfgCached.llm?.model || process.env.WEBPEEL_LLM_MODEL || 'gpt-4o-mini';
310
+ const llmBaseUrlCached = options.llmBaseUrl || llmCfgCached.llm?.baseUrl || process.env.WEBPEEL_LLM_BASE_URL || 'https://api.openai.com/v1';
311
+ const llmInstructionCached = typeof options.llmExtract === 'string' ? options.llmExtract : undefined;
312
+ // Parse schema if provided
313
+ let llmSchemaCached;
314
+ if (options.extractSchema) {
315
+ let schemaStr = options.extractSchema;
316
+ if (schemaStr.startsWith('@')) {
317
+ schemaStr = readFileSync(schemaStr.slice(1), 'utf-8');
318
+ }
319
+ try {
320
+ llmSchemaCached = JSON.parse(schemaStr);
321
+ }
322
+ catch {
323
+ console.error('Error: --extract-schema must be valid JSON or a valid @file.json path');
324
+ process.exit(1);
325
+ }
326
+ }
327
+ const llmResultCached = await extractWithLLM({
328
+ content: cachedResult.content,
329
+ instruction: llmInstructionCached,
330
+ schema: llmSchemaCached,
331
+ apiKey: llmApiKeyCached,
332
+ model: llmModelCached,
333
+ baseUrl: llmBaseUrlCached,
334
+ });
335
+ await writeStdout(JSON.stringify(llmResultCached.items, null, 2) + '\n');
336
+ if (!options.silent) {
337
+ const { input, output } = llmResultCached.tokensUsed;
338
+ const costStr = llmResultCached.cost !== undefined ? ` | Est. cost: $${llmResultCached.cost.toFixed(6)}` : '';
339
+ console.error(`\n🤖 LLM extraction: ${llmResultCached.items.length} items | ${input} input + ${output} output tokens${costStr} | model: ${llmResultCached.model}`);
340
+ }
341
+ process.exit(0);
342
+ }
268
343
  await outputResult(cachedResult, options, { cached: true });
269
344
  process.exit(0);
270
345
  }
@@ -299,19 +374,22 @@ program
299
374
  throw Object.assign(new Error(e.message), { _code: 'FETCH_FAILED' });
300
375
  }
301
376
  }
377
+ // --extract-schema auto-enables JSON output
378
+ if (options.extractSchema) {
379
+ options.json = true;
380
+ }
302
381
  // Parse extract
303
382
  let extract;
304
- if (options.llmExtract) {
305
- // LLM-based extraction
306
- extract = {
307
- prompt: options.llmExtract,
308
- llmApiKey: process.env.OPENAI_API_KEY,
309
- llmModel: process.env.WEBPEEL_LLM_MODEL || 'gpt-4o-mini',
310
- llmBaseUrl: process.env.WEBPEEL_LLM_BASE_URL || 'https://api.openai.com/v1',
311
- };
312
- if (!extract.llmApiKey) {
313
- throw Object.assign(new Error('--llm-extract requires OPENAI_API_KEY environment variable'), { _code: 'FETCH_FAILED' });
383
+ if (options.llmExtract || options.extractSchema) {
384
+ // LLM-based extraction is handled post-fetch (after peel returns markdown).
385
+ // Early-validate that an API key is available so we fail fast.
386
+ const llmCfg = loadConfig();
387
+ const llmApiKey = options.llmKey || llmCfg.llm?.apiKey || process.env.OPENAI_API_KEY;
388
+ if (!llmApiKey) {
389
+ throw Object.assign(new Error('LLM extraction requires an API key.\n' +
390
+ 'Set OPENAI_API_KEY environment variable or use --llm-key <key>'), { _code: 'FETCH_FAILED' });
314
391
  }
392
+ // Do NOT set extract here — peel runs normally, LLM extraction happens below.
315
393
  }
316
394
  else if (options.extract) {
317
395
  // CSS-based extraction
@@ -351,6 +429,26 @@ program
351
429
  locationOptions.languages = [options.language];
352
430
  }
353
431
  }
432
+ // ── Resolve --profile: name → path + storage state ─────────────────
433
+ let resolvedProfileDir;
434
+ let resolvedStorageState;
435
+ let resolvedProfileName;
436
+ if (options.profile) {
437
+ const profilePath = getProfilePath(options.profile);
438
+ if (profilePath) {
439
+ // It's a named profile in ~/.webpeel/profiles/
440
+ resolvedProfileDir = profilePath;
441
+ resolvedStorageState = loadStorageState(options.profile) ?? undefined;
442
+ resolvedProfileName = options.profile;
443
+ }
444
+ else if (existsSync(options.profile)) {
445
+ // It's a raw directory path (backward compat)
446
+ resolvedProfileDir = options.profile;
447
+ }
448
+ else {
449
+ exitWithJsonError(`Profile "${options.profile}" not found. Run "webpeel profile list" to see available profiles.`, 'PROFILE_NOT_FOUND');
450
+ }
451
+ }
354
452
  // Build peel options
355
453
  // --stealth auto-enables --render (stealth requires browser)
356
454
  // --action auto-enables --render (actions require browser)
@@ -391,8 +489,10 @@ program
391
489
  extract,
392
490
  images: options.images || false,
393
491
  location: locationOptions,
394
- profileDir: options.profile || undefined,
492
+ profileDir: resolvedProfileDir,
395
493
  headed: options.headed || false,
494
+ storageState: resolvedStorageState,
495
+ proxy: options.proxy,
396
496
  };
397
497
  // Add summary option if requested
398
498
  if (options.summary) {
@@ -419,6 +519,10 @@ program
419
519
  }
420
520
  // Fetch the page
421
521
  const result = await peel(url, peelOptions);
522
+ // Update lastUsed timestamp for named profiles
523
+ if (resolvedProfileName) {
524
+ touchProfile(resolvedProfileName);
525
+ }
422
526
  if (spinner) {
423
527
  spinner.succeed(`Fetched in ${result.elapsed}ms using ${result.method} method`);
424
528
  }
@@ -477,19 +581,92 @@ program
477
581
  console.error(`⚠ ${warningMsg}`);
478
582
  }
479
583
  }
584
+ // --- LLM-based extraction (post-peel) ---
585
+ if (options.llmExtract || options.extractSchema) {
586
+ const { extractWithLLM } = await import('./core/llm-extract.js');
587
+ const llmCfg = loadConfig();
588
+ const llmApiKey = options.llmKey || llmCfg.llm?.apiKey || process.env.OPENAI_API_KEY;
589
+ const llmModel = options.llmModel || llmCfg.llm?.model || process.env.WEBPEEL_LLM_MODEL || 'gpt-4o-mini';
590
+ const llmBaseUrl = options.llmBaseUrl || llmCfg.llm?.baseUrl || process.env.WEBPEEL_LLM_BASE_URL || 'https://api.openai.com/v1';
591
+ const llmInstruction = typeof options.llmExtract === 'string' ? options.llmExtract : undefined;
592
+ // Parse --extract-schema if provided
593
+ let llmSchema;
594
+ if (options.extractSchema) {
595
+ let schemaStr = options.extractSchema;
596
+ if (schemaStr.startsWith('@')) {
597
+ schemaStr = readFileSync(schemaStr.slice(1), 'utf-8');
598
+ }
599
+ try {
600
+ llmSchema = JSON.parse(schemaStr);
601
+ }
602
+ catch {
603
+ exitWithJsonError('--extract-schema must be valid JSON or a valid @file.json path', 'FETCH_FAILED');
604
+ }
605
+ }
606
+ const llmResult = await extractWithLLM({
607
+ content: result.content,
608
+ instruction: llmInstruction,
609
+ schema: llmSchema,
610
+ apiKey: llmApiKey,
611
+ model: llmModel,
612
+ baseUrl: llmBaseUrl,
613
+ });
614
+ // Output structured items as JSON
615
+ await writeStdout(JSON.stringify(llmResult.items, null, 2) + '\n');
616
+ // Show token usage and estimated cost
617
+ if (!options.silent) {
618
+ const { input, output } = llmResult.tokensUsed;
619
+ const costStr = llmResult.cost !== undefined
620
+ ? ` | Est. cost: $${llmResult.cost.toFixed(6)}`
621
+ : '';
622
+ console.error(`\n🤖 LLM extraction: ${llmResult.items.length} items | ${input} input + ${output} output tokens${costStr} | model: ${llmResult.model}`);
623
+ }
624
+ await cleanup();
625
+ process.exit(0);
626
+ }
480
627
  // --- Extract-all / pagination / output formatting ---
481
628
  const wantsExtractAll = options.extractAll || options.scrollExtract !== undefined;
482
629
  const pagesCount = Math.min(Math.max(options.pages || 1, 1), 10);
483
630
  if (wantsExtractAll) {
484
631
  const { extractListings } = await import('./core/extract-listings.js');
485
632
  const { findNextPageUrl } = await import('./core/paginate.js');
633
+ const { findSchemaForUrl, extractWithSchema, loadBundledSchemas } = await import('./core/schema-extraction.js');
634
+ // Resolve which schema to use (explicit --schema flag or auto-detect)
635
+ let activeSchema = null;
636
+ if (options.schema) {
637
+ // Find schema by name or domain match
638
+ const schemaQuery = options.schema.toLowerCase();
639
+ const allSchemas = loadBundledSchemas();
640
+ activeSchema = allSchemas.find(s => s.name.toLowerCase().includes(schemaQuery) ||
641
+ s.domains.some(d => d.toLowerCase().includes(schemaQuery))) ?? null;
642
+ if (!activeSchema && !options.silent) {
643
+ console.error(`Warning: No schema found for "${options.schema}", falling back to auto-detection`);
644
+ }
645
+ }
646
+ else {
647
+ // Auto-detect from URL
648
+ activeSchema = findSchemaForUrl(result.url || url);
649
+ }
486
650
  // We need the raw HTML for extraction. Re-fetch with format=html if needed.
487
651
  let allListings = [];
488
652
  // Fetch HTML for extraction
489
653
  const htmlResult = peelOptions.format === 'html'
490
654
  ? result
491
655
  : await peel(url, { ...peelOptions, format: 'html', maxTokens: undefined });
492
- allListings.push(...extractListings(htmlResult.content, result.url));
656
+ // Try schema extraction first, fall back to generic
657
+ if (activeSchema) {
658
+ const schemaListings = extractWithSchema(htmlResult.content, activeSchema, result.url);
659
+ if (schemaListings.length > 0) {
660
+ allListings.push(...schemaListings);
661
+ }
662
+ else {
663
+ // Schema returned nothing — fall back to generic
664
+ allListings.push(...extractListings(htmlResult.content, result.url));
665
+ }
666
+ }
667
+ else {
668
+ allListings.push(...extractListings(htmlResult.content, result.url));
669
+ }
493
670
  // Pagination: follow "Next" links
494
671
  if (pagesCount > 1) {
495
672
  let currentHtml = htmlResult.content;
@@ -500,7 +677,16 @@ program
500
677
  break;
501
678
  try {
502
679
  const nextResult = await peel(nextUrl, { ...peelOptions, format: 'html', maxTokens: undefined });
503
- const pageListings = extractListings(nextResult.content, nextResult.url);
680
+ let pageListings;
681
+ if (activeSchema) {
682
+ const schemaPage = extractWithSchema(nextResult.content, activeSchema, nextResult.url);
683
+ pageListings = schemaPage.length > 0
684
+ ? schemaPage
685
+ : extractListings(nextResult.content, nextResult.url);
686
+ }
687
+ else {
688
+ pageListings = extractListings(nextResult.content, nextResult.url);
689
+ }
504
690
  allListings.push(...pageListings);
505
691
  currentHtml = nextResult.content;
506
692
  currentUrl = nextResult.url;
@@ -658,7 +844,18 @@ program
658
844
  .option('--csv', 'Output site-search results as CSV (requires --site)')
659
845
  .option('--budget <n>', 'Token budget for site-search result content', parseInt)
660
846
  .option('-s, --silent', 'Silent mode')
847
+ .option('--proxy <url>', 'Proxy URL for requests (http://host:port, socks5://user:pass@host:port)')
848
+ .option('--agent', 'Agent mode: sets --json, --silent, and --budget 4000 (override with --budget N)')
661
849
  .action(async (query, options) => {
850
+ // --agent sets sensible defaults for AI agents; explicit flags override
851
+ if (options.agent) {
852
+ if (!options.json)
853
+ options.json = true;
854
+ if (!options.silent)
855
+ options.silent = true;
856
+ if (options.budget === undefined)
857
+ options.budget = 4000;
858
+ }
662
859
  const isJson = options.json;
663
860
  const isSilent = options.silent;
664
861
  // --top overrides --count when both are provided
@@ -679,6 +876,7 @@ program
679
876
  const htmlResult = await peel(siteResult.url, {
680
877
  format: 'html',
681
878
  timeout: 30000,
879
+ proxy: options.proxy,
682
880
  });
683
881
  if (spinner) {
684
882
  spinner.succeed(`Fetched ${siteResult.site} in ${htmlResult.elapsed}ms`);
@@ -773,10 +971,24 @@ program
773
971
  || config.braveApiKey
774
972
  || undefined;
775
973
  const provider = getSearchProvider(providerId);
776
- const results = await provider.searchWeb(query, {
974
+ let results = await provider.searchWeb(query, {
777
975
  count: Math.min(Math.max(count, 1), 10),
778
976
  apiKey,
779
977
  });
978
+ // Apply budget to search results if requested (trim results to fit token budget)
979
+ if (options.budget && options.budget > 0 && results.length > 0) {
980
+ let totalTokens = 0;
981
+ let maxResults = 0;
982
+ for (const r of results) {
983
+ // Estimate ~4 chars per token for title + url + snippet
984
+ const resultTokens = Math.ceil((`${r.title || ''}\n${r.url || ''}\n${r.snippet || ''}`).length / 4);
985
+ if (totalTokens + resultTokens > options.budget)
986
+ break;
987
+ totalTokens += resultTokens;
988
+ maxResults++;
989
+ }
990
+ results = results.slice(0, Math.max(maxResults, 1));
991
+ }
780
992
  if (spinner) {
781
993
  spinner.succeed(`Found ${results.length} results (${providerId})`);
782
994
  }
@@ -1370,24 +1582,52 @@ program
1370
1582
  program
1371
1583
  .command('config')
1372
1584
  .description('View or update CLI configuration')
1373
- .argument('[action]', '"get <key>", "set <key> <value>", or omit for overview')
1585
+ .argument('[action]', '"list", "get <key>", "set <key> <value>", or omit for overview')
1374
1586
  .argument('[key]', 'Config key')
1375
1587
  .argument('[value]', 'Value to set')
1376
1588
  .action(async (action, key, value) => {
1377
1589
  const config = loadConfig();
1378
1590
  // Settable config keys (safe for user modification)
1591
+ // Supports dot-notation for nested keys (e.g., llm.apiKey)
1379
1592
  const SETTABLE_KEYS = {
1380
1593
  braveApiKey: 'Brave Search API key',
1594
+ 'llm.apiKey': 'LLM API key for AI-powered extraction (OpenAI-compatible)',
1595
+ 'llm.model': 'LLM model name (default: gpt-4o-mini)',
1596
+ 'llm.baseUrl': 'LLM API base URL (default: https://api.openai.com/v1)',
1381
1597
  };
1382
1598
  const maskSecret = (k, v) => {
1383
1599
  if (!v)
1384
1600
  return '(not set)';
1385
- if (k === 'apiKey' || k === 'braveApiKey')
1601
+ if (k === 'apiKey' || k === 'braveApiKey' || k === 'llm.apiKey') {
1386
1602
  return v.slice(0, 4) + '...' + v.slice(-4);
1603
+ }
1387
1604
  return String(v);
1388
1605
  };
1389
- if (!action) {
1390
- // Show all config
1606
+ /** Get a potentially nested value using dot-notation (e.g., "llm.apiKey") */
1607
+ function getNestedValue(obj, path) {
1608
+ const parts = path.split('.');
1609
+ let cur = obj;
1610
+ for (const part of parts) {
1611
+ if (cur == null || typeof cur !== 'object')
1612
+ return undefined;
1613
+ cur = cur[part];
1614
+ }
1615
+ return cur;
1616
+ }
1617
+ /** Set a potentially nested value using dot-notation (e.g., "llm.apiKey") */
1618
+ function setNestedValue(obj, path, val) {
1619
+ const parts = path.split('.');
1620
+ let cur = obj;
1621
+ for (let i = 0; i < parts.length - 1; i++) {
1622
+ const part = parts[i];
1623
+ if (cur[part] == null || typeof cur[part] !== 'object')
1624
+ cur[part] = {};
1625
+ cur = cur[part];
1626
+ }
1627
+ cur[parts[parts.length - 1]] = val;
1628
+ }
1629
+ if (!action || action === 'list') {
1630
+ // Show all config (also triggered by `webpeel config list`)
1391
1631
  console.log('WebPeel CLI Configuration');
1392
1632
  console.log(` Config file: ~/.webpeel/config.json`);
1393
1633
  console.log('');
@@ -1395,6 +1635,11 @@ program
1395
1635
  console.log(` braveApiKey: ${maskSecret('braveApiKey', config.braveApiKey)}`);
1396
1636
  console.log(` planTier: ${config.planTier || 'free'}`);
1397
1637
  console.log(` anonymousUsage: ${config.anonymousUsage}`);
1638
+ console.log('');
1639
+ console.log(' LLM:');
1640
+ console.log(` llm.apiKey: ${maskSecret('llm.apiKey', config.llm?.apiKey)}`);
1641
+ console.log(` llm.model: ${config.llm?.model || '(not set, default: gpt-4o-mini)'}`);
1642
+ console.log(` llm.baseUrl: ${config.llm?.baseUrl || '(not set, default: https://api.openai.com/v1)'}`);
1398
1643
  const stats = cacheStats();
1399
1644
  console.log('');
1400
1645
  console.log(' Cache:');
@@ -1420,14 +1665,14 @@ program
1420
1665
  console.error(`Usage: webpeel config set ${key} <value>`);
1421
1666
  process.exit(1);
1422
1667
  }
1423
- config[key] = value;
1668
+ setNestedValue(config, key, value);
1424
1669
  saveConfig(config);
1425
1670
  console.log(`✓ ${key} saved`);
1426
1671
  process.exit(0);
1427
1672
  }
1428
1673
  if (action === 'get') {
1429
1674
  const lookupKey = key || '';
1430
- const val = config[lookupKey];
1675
+ const val = getNestedValue(config, lookupKey) ?? config[lookupKey];
1431
1676
  if (val !== undefined) {
1432
1677
  console.log(maskSecret(lookupKey, String(val)));
1433
1678
  }
@@ -1438,7 +1683,7 @@ program
1438
1683
  process.exit(0);
1439
1684
  }
1440
1685
  // Legacy: `webpeel config <key>` — treat action as the key name
1441
- const val = config[action];
1686
+ const val = getNestedValue(config, action) ?? config[action];
1442
1687
  if (val !== undefined) {
1443
1688
  console.log(maskSecret(action, String(val)));
1444
1689
  }
@@ -2635,6 +2880,244 @@ applyCmd
2635
2880
  process.exit(1);
2636
2881
  }
2637
2882
  });
2883
+ // ============================================================
2884
+ // Profile management commands
2885
+ // ============================================================
2886
+ const profileCmd = program
2887
+ .command('profile')
2888
+ .description('Manage named browser profiles (saved login sessions)');
2889
+ profileCmd
2890
+ .command('create <name>')
2891
+ .description('Create a new profile interactively (launches browser, log in, press Ctrl+C when done)')
2892
+ .option('--description <text>', 'Optional description for this profile')
2893
+ .action(async (name, opts) => {
2894
+ try {
2895
+ await createProfile(name, opts.description);
2896
+ process.exit(0);
2897
+ }
2898
+ catch (error) {
2899
+ console.error(`Error: ${error instanceof Error ? error.message : String(error)}`);
2900
+ process.exit(1);
2901
+ }
2902
+ });
2903
+ profileCmd
2904
+ .command('list')
2905
+ .description('List all saved browser profiles')
2906
+ .action(() => {
2907
+ const profiles = listProfiles();
2908
+ if (profiles.length === 0) {
2909
+ console.log('No profiles found.');
2910
+ console.log('');
2911
+ console.log('Create one with:');
2912
+ console.log(' webpeel profile create <name>');
2913
+ console.log('');
2914
+ console.log('Then use it with:');
2915
+ console.log(' webpeel <url> --profile <name>');
2916
+ process.exit(0);
2917
+ }
2918
+ console.log('');
2919
+ console.log('Saved profiles:');
2920
+ console.log('');
2921
+ // Column widths
2922
+ const nameW = Math.max(8, ...profiles.map((p) => p.name.length));
2923
+ const domainsW = Math.max(10, ...profiles.map((p) => (p.domains.join(', ') || '(none)').length));
2924
+ const header = 'Name'.padEnd(nameW) + ' ' +
2925
+ 'Domains'.padEnd(domainsW) + ' ' +
2926
+ 'Last Used'.padEnd(12) + ' ' +
2927
+ 'Created';
2928
+ console.log(header);
2929
+ console.log('─'.repeat(header.length + 4));
2930
+ for (const p of profiles) {
2931
+ const domainsStr = p.domains.length > 0 ? p.domains.join(', ') : '(none)';
2932
+ const lastUsed = formatRelativeTime(new Date(p.lastUsed));
2933
+ const created = new Date(p.created).toISOString().split('T')[0];
2934
+ console.log(p.name.padEnd(nameW) + ' ' +
2935
+ domainsStr.padEnd(domainsW) + ' ' +
2936
+ lastUsed.padEnd(12) + ' ' +
2937
+ created);
2938
+ }
2939
+ console.log('');
2940
+ process.exit(0);
2941
+ });
2942
+ profileCmd
2943
+ .command('show <name>')
2944
+ .description('Show details for a profile')
2945
+ .action((name) => {
2946
+ const profilePath = getProfilePath(name);
2947
+ if (!profilePath) {
2948
+ console.error(`Error: Profile "${name}" not found.`);
2949
+ console.error('Run "webpeel profile list" to see available profiles.');
2950
+ process.exit(1);
2951
+ }
2952
+ try {
2953
+ const meta = JSON.parse(readFileSync(`${profilePath}/metadata.json`, 'utf-8'));
2954
+ console.log('');
2955
+ console.log(`Profile: ${meta.name}`);
2956
+ if (meta.description)
2957
+ console.log(`Description: ${meta.description}`);
2958
+ console.log(`Created: ${new Date(meta.created).toLocaleString()}`);
2959
+ console.log(`Last used: ${new Date(meta.lastUsed).toLocaleString()}`);
2960
+ console.log(`Domains: ${meta.domains.length > 0 ? meta.domains.join(', ') : '(none)'}`);
2961
+ console.log(`Directory: ${profilePath}`);
2962
+ console.log('');
2963
+ process.exit(0);
2964
+ }
2965
+ catch (e) {
2966
+ console.error(`Error reading profile: ${e instanceof Error ? e.message : String(e)}`);
2967
+ process.exit(1);
2968
+ }
2969
+ });
2970
+ profileCmd
2971
+ .command('delete <name>')
2972
+ .description('Delete a saved profile')
2973
+ .action((name) => {
2974
+ const deleted = deleteProfile(name);
2975
+ if (deleted) {
2976
+ console.log(`Profile "${name}" deleted.`);
2977
+ process.exit(0);
2978
+ }
2979
+ else {
2980
+ console.error(`Error: Profile "${name}" not found.`);
2981
+ console.error('Run "webpeel profile list" to see available profiles.');
2982
+ process.exit(1);
2983
+ }
2984
+ });
2985
+ // ── Hotels command ─────────────────────────────────────────────────────────────
2986
+ program
2987
+ .command('hotels <destination>')
2988
+ .description('Search multiple travel sites for hotels (Kayak, Booking.com, Google Travel)')
2989
+ .option('--checkin <date>', 'Check-in date (ISO or relative, e.g. "tomorrow", "2026-02-20"). Default: tomorrow')
2990
+ .option('--checkout <date>', 'Check-out date (ISO or relative). Default: checkin + 1 day')
2991
+ .option('--sort <method>', 'Sort by: price, rating, value (default: price)', 'price')
2992
+ .option('--limit <n>', 'Max results (default: 20)', '20')
2993
+ .option('--source <name...>', 'Only use specific source(s): kayak, booking, google (repeatable)')
2994
+ .option('--json', 'Output as JSON')
2995
+ .option('--stealth', 'Use stealth mode for all sources')
2996
+ .option('--proxy <url>', 'Proxy URL for requests (http://host:port, socks5://user:pass@host:port)')
2997
+ .option('-s, --silent', 'Suppress progress messages')
2998
+ .action(async (destination, options) => {
2999
+ const isJson = options.json;
3000
+ const isSilent = options.silent;
3001
+ // Build checkin/checkout
3002
+ const { parseDate, addDays: hotelAddDays } = await import('./core/hotel-search.js');
3003
+ let checkinStr;
3004
+ let checkoutStr;
3005
+ try {
3006
+ checkinStr = parseDate(options.checkin ?? 'tomorrow');
3007
+ checkoutStr = options.checkout
3008
+ ? parseDate(options.checkout)
3009
+ : hotelAddDays(checkinStr, 1);
3010
+ }
3011
+ catch (err) {
3012
+ const msg = err instanceof Error ? err.message : String(err);
3013
+ if (isJson) {
3014
+ await writeStdout(JSON.stringify({ error: msg, code: 'INVALID_DATE' }) + '\n');
3015
+ }
3016
+ else {
3017
+ console.error(`Error: ${msg}`);
3018
+ }
3019
+ process.exit(1);
3020
+ }
3021
+ const sortMethod = (['price', 'rating', 'value'].includes(options.sort)
3022
+ ? options.sort
3023
+ : 'price');
3024
+ const limit = Math.max(1, parseInt(options.limit, 10) || 20);
3025
+ const sources = options.source
3026
+ ? (Array.isArray(options.source) ? options.source : [options.source])
3027
+ : undefined;
3028
+ // Spinner per-source progress (non-silent, non-JSON)
3029
+ let searchSpinner = null;
3030
+ if (!isSilent && !isJson) {
3031
+ searchSpinner = ora(`Searching hotels in ${destination}...`).start();
3032
+ }
3033
+ else if (!isSilent && !isJson) {
3034
+ console.error(`⏳ Searching kayak.com...`);
3035
+ console.error(`⏳ Searching booking.com...`);
3036
+ console.error(`⏳ Searching google.com...`);
3037
+ }
3038
+ try {
3039
+ const { searchHotels } = await import('./core/hotel-search.js');
3040
+ const result = await searchHotels({
3041
+ destination,
3042
+ checkin: checkinStr,
3043
+ checkout: checkoutStr,
3044
+ sort: sortMethod,
3045
+ limit,
3046
+ sources,
3047
+ stealth: options.stealth,
3048
+ silent: isSilent,
3049
+ proxy: options.proxy,
3050
+ });
3051
+ if (searchSpinner)
3052
+ searchSpinner.stop();
3053
+ // Show per-source status
3054
+ if (!isSilent && !isJson) {
3055
+ for (const src of result.sources) {
3056
+ if (src.status === 'ok') {
3057
+ console.error(`✅ ${src.name}: ${src.count} hotels found`);
3058
+ }
3059
+ else {
3060
+ console.error(`❌ ${src.name}: ${src.status}${src.error ? ' — ' + src.error : ''}`);
3061
+ }
3062
+ }
3063
+ }
3064
+ if (isJson) {
3065
+ await writeStdout(JSON.stringify(result, null, 2) + '\n');
3066
+ await cleanup();
3067
+ process.exit(0);
3068
+ }
3069
+ // Human-readable table output
3070
+ const { formatDate: fmtDate } = {
3071
+ formatDate: (iso) => {
3072
+ const d = new Date(iso + 'T12:00:00Z');
3073
+ return d.toLocaleDateString('en-US', { month: 'short', day: 'numeric', year: 'numeric', timeZone: 'UTC' });
3074
+ },
3075
+ };
3076
+ const ci = fmtDate(result.checkin);
3077
+ const co = fmtDate(result.checkout);
3078
+ console.log(`\n🏨 Hotels in ${result.destination}`);
3079
+ console.log(` ${ci} → ${co} | Sorted by ${sortMethod}\n`);
3080
+ if (result.results.length === 0) {
3081
+ console.log(' No hotels found.\n');
3082
+ }
3083
+ else {
3084
+ const colNum = 3;
3085
+ const colName = 42;
3086
+ const colPrice = 8;
3087
+ const colRating = 8;
3088
+ const colSource = 10;
3089
+ const padEnd = (s, w) => s.length > w ? s.slice(0, w - 1) + '…' : s.padEnd(w);
3090
+ const padStart = (s, w) => s.padStart(w);
3091
+ console.log(` ${padStart('#', colNum)} ${padEnd('Hotel', colName)} ${padEnd('Price', colPrice)} ${padEnd('Rating', colRating)} ${padEnd('Source', colSource)}`);
3092
+ result.results.forEach((hotel, i) => {
3093
+ const priceStr = hotel.priceDisplay || '—';
3094
+ const ratingStr = hotel.rating !== null ? String(hotel.rating) : '—';
3095
+ console.log(` ${padStart(String(i + 1), colNum)} ${padEnd(hotel.name, colName)} ${padEnd(priceStr, colPrice)} ${padEnd(ratingStr, colRating)} ${padEnd(hotel.source, colSource)}`);
3096
+ });
3097
+ console.log('');
3098
+ const sourceSummary = result.sources
3099
+ .map(s => `${s.name} (${s.count} ${s.status === 'ok' ? '✅' : s.status === 'blocked' ? '🚫' : '❌'})`)
3100
+ .join(' | ');
3101
+ console.log(`Sources: ${sourceSummary}`);
3102
+ }
3103
+ console.log('');
3104
+ await cleanup();
3105
+ process.exit(0);
3106
+ }
3107
+ catch (error) {
3108
+ if (searchSpinner)
3109
+ searchSpinner.fail('Hotel search failed');
3110
+ const msg = error instanceof Error ? error.message : 'Unknown error';
3111
+ if (isJson) {
3112
+ await writeStdout(JSON.stringify({ error: msg, code: 'FETCH_FAILED' }) + '\n');
3113
+ }
3114
+ else {
3115
+ console.error(`\nError: ${msg}`);
3116
+ }
3117
+ await cleanup();
3118
+ process.exit(1);
3119
+ }
3120
+ });
2638
3121
  program.parse();
2639
3122
  // ============================================================
2640
3123
  // Time formatting helper