webpeel 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +39 -4
  2. package/dist/cli-auth.d.ts +6 -0
  3. package/dist/cli-auth.d.ts.map +1 -1
  4. package/dist/cli-auth.js.map +1 -1
  5. package/dist/cli.js +463 -22
  6. package/dist/cli.js.map +1 -1
  7. package/dist/core/challenge-detection.d.ts.map +1 -1
  8. package/dist/core/challenge-detection.js +39 -6
  9. package/dist/core/challenge-detection.js.map +1 -1
  10. package/dist/core/extract-listings.d.ts.map +1 -1
  11. package/dist/core/extract-listings.js +167 -36
  12. package/dist/core/extract-listings.js.map +1 -1
  13. package/dist/core/fetcher.d.ts +6 -0
  14. package/dist/core/fetcher.d.ts.map +1 -1
  15. package/dist/core/fetcher.js +147 -11
  16. package/dist/core/fetcher.js.map +1 -1
  17. package/dist/core/hotel-search.d.ts +121 -0
  18. package/dist/core/hotel-search.d.ts.map +1 -0
  19. package/dist/core/hotel-search.js +381 -0
  20. package/dist/core/hotel-search.js.map +1 -0
  21. package/dist/core/llm-extract.d.ts +42 -0
  22. package/dist/core/llm-extract.d.ts.map +1 -0
  23. package/dist/core/llm-extract.js +144 -0
  24. package/dist/core/llm-extract.js.map +1 -0
  25. package/dist/core/profiles.d.ts +48 -0
  26. package/dist/core/profiles.d.ts.map +1 -0
  27. package/dist/core/profiles.js +211 -0
  28. package/dist/core/profiles.js.map +1 -0
  29. package/dist/core/schema-extraction.d.ts +67 -0
  30. package/dist/core/schema-extraction.d.ts.map +1 -0
  31. package/dist/core/schema-extraction.js +353 -0
  32. package/dist/core/schema-extraction.js.map +1 -0
  33. package/dist/core/strategies.d.ts +5 -0
  34. package/dist/core/strategies.d.ts.map +1 -1
  35. package/dist/core/strategies.js +9 -2
  36. package/dist/core/strategies.js.map +1 -1
  37. package/dist/index.d.ts.map +1 -1
  38. package/dist/index.js +2 -1
  39. package/dist/index.js.map +1 -1
  40. package/dist/types.d.ts +6 -0
  41. package/dist/types.d.ts.map +1 -1
  42. package/dist/types.js.map +1 -1
  43. package/package.json +1 -1
package/dist/cli.js CHANGED
@@ -14,7 +14,8 @@
14
14
  */
15
15
  import { Command } from 'commander';
16
16
  import ora from 'ora';
17
- import { writeFileSync, readFileSync } from 'fs';
17
+ import { writeFileSync, readFileSync, existsSync } from 'fs';
18
+ import { getProfilePath, loadStorageState, touchProfile, listProfiles, deleteProfile, createProfile } from './core/profiles.js';
18
19
  import { peel, peelBatch, cleanup } from './index.js';
19
20
  import { checkUsage, showUsageFooter, handleLogin, handleLogout, handleUsage, loadConfig, saveConfig } from './cli-auth.js';
20
21
  import { getCache, setCache, parseTTL, clearCache, cacheStats } from './cache.js';
@@ -154,14 +155,18 @@ program
154
155
  .option('--raw', 'Return full page without smart content extraction')
155
156
  .option('--action <actions...>', 'Page actions before scraping (e.g., "click:.btn" "wait:2000" "scroll:bottom")')
156
157
  .option('--extract <json>', 'Extract structured data using CSS selectors (JSON object of field:selector pairs)')
157
- .option('--llm-extract <prompt>', 'AI-powered extraction using LLM (requires OPENAI_API_KEY env var)')
158
+ .option('--llm-extract [instruction]', 'Extract structured data using LLM (optional instruction, e.g. "extract hotel names and prices")')
158
159
  .option('--llm-key <key>', 'LLM API key for AI features (or use OPENAI_API_KEY env var)')
160
+ .option('--llm-model <model>', 'LLM model to use (default: gpt-4o-mini)')
161
+ .option('--llm-base-url <url>', 'LLM API base URL (default: https://api.openai.com/v1)')
159
162
  .option('--summary', 'Generate AI summary of content (requires --llm-key or OPENAI_API_KEY)')
160
163
  .option('--location <country>', 'ISO country code for geo-targeting (e.g., "US", "DE", "JP")')
161
164
  .option('--language <lang>', 'Language preference (e.g., "en", "de", "ja")')
162
165
  .option('--max-tokens <n>', 'Maximum token count for output (truncate if exceeded)', parseInt)
163
166
  .option('--budget <n>', 'Smart token budget — distill content to fit within N tokens (heuristic, no LLM key needed)', parseInt)
164
167
  .option('--extract-all', 'Auto-detect and extract repeated listing items (e.g., search results)')
168
+ .option('--schema <name>', 'Force a specific extraction schema by name or domain (e.g., "booking.com", "amazon")')
169
+ .option('--list-schemas', 'List all available extraction schemas and their supported domains')
165
170
  .option('--scroll-extract [count]', 'Scroll page N times to load lazy content, then extract (implies --render)', (v) => parseInt(v, 10))
166
171
  .option('--csv', 'Output extraction results as CSV')
167
172
  .option('--table', 'Output extraction results as a formatted table')
@@ -182,6 +187,31 @@ program
182
187
  options.budget = 4000;
183
188
  }
184
189
  const isJson = options.json;
190
+ // --- --list-schemas: print all available schemas and exit ---
191
+ if (options.listSchemas) {
192
+ const { loadBundledSchemas } = await import('./core/schema-extraction.js');
193
+ const schemas = loadBundledSchemas();
194
+ if (isJson) {
195
+ await writeStdout(JSON.stringify(schemas.map(s => ({
196
+ name: s.name,
197
+ version: s.version,
198
+ domains: s.domains,
199
+ urlPatterns: s.urlPatterns,
200
+ })), null, 2) + '\n');
201
+ }
202
+ else {
203
+ console.log(`\nAvailable extraction schemas (${schemas.length}):\n`);
204
+ for (const s of schemas) {
205
+ console.log(` ${s.name} (v${s.version})`);
206
+ console.log(` Domains: ${s.domains.join(', ')}`);
207
+ if (s.urlPatterns && s.urlPatterns.length > 0) {
208
+ console.log(` URL patterns: ${s.urlPatterns.join(', ')}`);
209
+ }
210
+ console.log('');
211
+ }
212
+ }
213
+ process.exit(0);
214
+ }
185
215
  // --- #5: Concise error for missing URL (no help dump) ---
186
216
  if (!url || url.trim() === '') {
187
217
  if (isJson) {
@@ -265,6 +295,33 @@ program
265
295
  cachedResult.content = distillToBudget(cachedResult.content, options.budget, fmt);
266
296
  cachedResult.tokens = Math.ceil(cachedResult.content.length / 4);
267
297
  }
298
+ // LLM extraction from cached content
299
+ if (options.llmExtract) {
300
+ const { extractWithLLM } = await import('./core/llm-extract.js');
301
+ const llmCfgCached = loadConfig();
302
+ const llmApiKeyCached = options.llmKey || llmCfgCached.llm?.apiKey || process.env.OPENAI_API_KEY;
303
+ if (!llmApiKeyCached) {
304
+ console.error('Error: LLM extraction requires an API key.\nSet OPENAI_API_KEY environment variable or use --llm-key <key>');
305
+ process.exit(1);
306
+ }
307
+ const llmModelCached = options.llmModel || llmCfgCached.llm?.model || process.env.WEBPEEL_LLM_MODEL || 'gpt-4o-mini';
308
+ const llmBaseUrlCached = options.llmBaseUrl || llmCfgCached.llm?.baseUrl || process.env.WEBPEEL_LLM_BASE_URL || 'https://api.openai.com/v1';
309
+ const llmInstructionCached = typeof options.llmExtract === 'string' ? options.llmExtract : undefined;
310
+ const llmResultCached = await extractWithLLM({
311
+ content: cachedResult.content,
312
+ instruction: llmInstructionCached,
313
+ apiKey: llmApiKeyCached,
314
+ model: llmModelCached,
315
+ baseUrl: llmBaseUrlCached,
316
+ });
317
+ await writeStdout(JSON.stringify(llmResultCached.items, null, 2) + '\n');
318
+ if (!options.silent) {
319
+ const { input, output } = llmResultCached.tokensUsed;
320
+ const costStr = llmResultCached.cost !== undefined ? ` | Est. cost: $${llmResultCached.cost.toFixed(6)}` : '';
321
+ console.error(`\n🤖 LLM extraction: ${llmResultCached.items.length} items | ${input} input + ${output} output tokens${costStr} | model: ${llmResultCached.model}`);
322
+ }
323
+ process.exit(0);
324
+ }
268
325
  await outputResult(cachedResult, options, { cached: true });
269
326
  process.exit(0);
270
327
  }
@@ -302,16 +359,15 @@ program
302
359
  // Parse extract
303
360
  let extract;
304
361
  if (options.llmExtract) {
305
- // LLM-based extraction
306
- extract = {
307
- prompt: options.llmExtract,
308
- llmApiKey: process.env.OPENAI_API_KEY,
309
- llmModel: process.env.WEBPEEL_LLM_MODEL || 'gpt-4o-mini',
310
- llmBaseUrl: process.env.WEBPEEL_LLM_BASE_URL || 'https://api.openai.com/v1',
311
- };
312
- if (!extract.llmApiKey) {
313
- throw Object.assign(new Error('--llm-extract requires OPENAI_API_KEY environment variable'), { _code: 'FETCH_FAILED' });
362
+ // LLM-based extraction is handled post-fetch (after peel returns markdown).
363
+ // Early-validate that an API key is available so we fail fast.
364
+ const llmCfg = loadConfig();
365
+ const llmApiKey = options.llmKey || llmCfg.llm?.apiKey || process.env.OPENAI_API_KEY;
366
+ if (!llmApiKey) {
367
+ throw Object.assign(new Error('LLM extraction requires an API key.\n' +
368
+ 'Set OPENAI_API_KEY environment variable or use --llm-key <key>'), { _code: 'FETCH_FAILED' });
314
369
  }
370
+ // Do NOT set extract here — peel runs normally, LLM extraction happens below.
315
371
  }
316
372
  else if (options.extract) {
317
373
  // CSS-based extraction
@@ -351,6 +407,26 @@ program
351
407
  locationOptions.languages = [options.language];
352
408
  }
353
409
  }
410
+ // ── Resolve --profile: name → path + storage state ─────────────────
411
+ let resolvedProfileDir;
412
+ let resolvedStorageState;
413
+ let resolvedProfileName;
414
+ if (options.profile) {
415
+ const profilePath = getProfilePath(options.profile);
416
+ if (profilePath) {
417
+ // It's a named profile in ~/.webpeel/profiles/
418
+ resolvedProfileDir = profilePath;
419
+ resolvedStorageState = loadStorageState(options.profile) ?? undefined;
420
+ resolvedProfileName = options.profile;
421
+ }
422
+ else if (existsSync(options.profile)) {
423
+ // It's a raw directory path (backward compat)
424
+ resolvedProfileDir = options.profile;
425
+ }
426
+ else {
427
+ exitWithJsonError(`Profile "${options.profile}" not found. Run "webpeel profile list" to see available profiles.`, 'PROFILE_NOT_FOUND');
428
+ }
429
+ }
354
430
  // Build peel options
355
431
  // --stealth auto-enables --render (stealth requires browser)
356
432
  // --action auto-enables --render (actions require browser)
@@ -391,8 +467,9 @@ program
391
467
  extract,
392
468
  images: options.images || false,
393
469
  location: locationOptions,
394
- profileDir: options.profile || undefined,
470
+ profileDir: resolvedProfileDir,
395
471
  headed: options.headed || false,
472
+ storageState: resolvedStorageState,
396
473
  };
397
474
  // Add summary option if requested
398
475
  if (options.summary) {
@@ -419,6 +496,10 @@ program
419
496
  }
420
497
  // Fetch the page
421
498
  const result = await peel(url, peelOptions);
499
+ // Update lastUsed timestamp for named profiles
500
+ if (resolvedProfileName) {
501
+ touchProfile(resolvedProfileName);
502
+ }
422
503
  if (spinner) {
423
504
  spinner.succeed(`Fetched in ${result.elapsed}ms using ${result.method} method`);
424
505
  }
@@ -477,19 +558,77 @@ program
477
558
  console.error(`⚠ ${warningMsg}`);
478
559
  }
479
560
  }
561
+ // --- LLM-based extraction (post-peel) ---
562
+ if (options.llmExtract) {
563
+ const { extractWithLLM } = await import('./core/llm-extract.js');
564
+ const llmCfg = loadConfig();
565
+ const llmApiKey = options.llmKey || llmCfg.llm?.apiKey || process.env.OPENAI_API_KEY;
566
+ const llmModel = options.llmModel || llmCfg.llm?.model || process.env.WEBPEEL_LLM_MODEL || 'gpt-4o-mini';
567
+ const llmBaseUrl = options.llmBaseUrl || llmCfg.llm?.baseUrl || process.env.WEBPEEL_LLM_BASE_URL || 'https://api.openai.com/v1';
568
+ const llmInstruction = typeof options.llmExtract === 'string' ? options.llmExtract : undefined;
569
+ const llmResult = await extractWithLLM({
570
+ content: result.content,
571
+ instruction: llmInstruction,
572
+ apiKey: llmApiKey,
573
+ model: llmModel,
574
+ baseUrl: llmBaseUrl,
575
+ });
576
+ // Output structured items as JSON
577
+ await writeStdout(JSON.stringify(llmResult.items, null, 2) + '\n');
578
+ // Show token usage and estimated cost
579
+ if (!options.silent) {
580
+ const { input, output } = llmResult.tokensUsed;
581
+ const costStr = llmResult.cost !== undefined
582
+ ? ` | Est. cost: $${llmResult.cost.toFixed(6)}`
583
+ : '';
584
+ console.error(`\n🤖 LLM extraction: ${llmResult.items.length} items | ${input} input + ${output} output tokens${costStr} | model: ${llmResult.model}`);
585
+ }
586
+ await cleanup();
587
+ process.exit(0);
588
+ }
480
589
  // --- Extract-all / pagination / output formatting ---
481
590
  const wantsExtractAll = options.extractAll || options.scrollExtract !== undefined;
482
591
  const pagesCount = Math.min(Math.max(options.pages || 1, 1), 10);
483
592
  if (wantsExtractAll) {
484
593
  const { extractListings } = await import('./core/extract-listings.js');
485
594
  const { findNextPageUrl } = await import('./core/paginate.js');
595
+ const { findSchemaForUrl, extractWithSchema, loadBundledSchemas } = await import('./core/schema-extraction.js');
596
+ // Resolve which schema to use (explicit --schema flag or auto-detect)
597
+ let activeSchema = null;
598
+ if (options.schema) {
599
+ // Find schema by name or domain match
600
+ const schemaQuery = options.schema.toLowerCase();
601
+ const allSchemas = loadBundledSchemas();
602
+ activeSchema = allSchemas.find(s => s.name.toLowerCase().includes(schemaQuery) ||
603
+ s.domains.some(d => d.toLowerCase().includes(schemaQuery))) ?? null;
604
+ if (!activeSchema && !options.silent) {
605
+ console.error(`Warning: No schema found for "${options.schema}", falling back to auto-detection`);
606
+ }
607
+ }
608
+ else {
609
+ // Auto-detect from URL
610
+ activeSchema = findSchemaForUrl(result.url || url);
611
+ }
486
612
  // We need the raw HTML for extraction. Re-fetch with format=html if needed.
487
613
  let allListings = [];
488
614
  // Fetch HTML for extraction
489
615
  const htmlResult = peelOptions.format === 'html'
490
616
  ? result
491
617
  : await peel(url, { ...peelOptions, format: 'html', maxTokens: undefined });
492
- allListings.push(...extractListings(htmlResult.content, result.url));
618
+ // Try schema extraction first, fall back to generic
619
+ if (activeSchema) {
620
+ const schemaListings = extractWithSchema(htmlResult.content, activeSchema, result.url);
621
+ if (schemaListings.length > 0) {
622
+ allListings.push(...schemaListings);
623
+ }
624
+ else {
625
+ // Schema returned nothing — fall back to generic
626
+ allListings.push(...extractListings(htmlResult.content, result.url));
627
+ }
628
+ }
629
+ else {
630
+ allListings.push(...extractListings(htmlResult.content, result.url));
631
+ }
493
632
  // Pagination: follow "Next" links
494
633
  if (pagesCount > 1) {
495
634
  let currentHtml = htmlResult.content;
@@ -500,7 +639,16 @@ program
500
639
  break;
501
640
  try {
502
641
  const nextResult = await peel(nextUrl, { ...peelOptions, format: 'html', maxTokens: undefined });
503
- const pageListings = extractListings(nextResult.content, nextResult.url);
642
+ let pageListings;
643
+ if (activeSchema) {
644
+ const schemaPage = extractWithSchema(nextResult.content, activeSchema, nextResult.url);
645
+ pageListings = schemaPage.length > 0
646
+ ? schemaPage
647
+ : extractListings(nextResult.content, nextResult.url);
648
+ }
649
+ else {
650
+ pageListings = extractListings(nextResult.content, nextResult.url);
651
+ }
504
652
  allListings.push(...pageListings);
505
653
  currentHtml = nextResult.content;
506
654
  currentUrl = nextResult.url;
@@ -658,7 +806,17 @@ program
658
806
  .option('--csv', 'Output site-search results as CSV (requires --site)')
659
807
  .option('--budget <n>', 'Token budget for site-search result content', parseInt)
660
808
  .option('-s, --silent', 'Silent mode')
809
+ .option('--agent', 'Agent mode: sets --json, --silent, and --budget 4000 (override with --budget N)')
661
810
  .action(async (query, options) => {
811
+ // --agent sets sensible defaults for AI agents; explicit flags override
812
+ if (options.agent) {
813
+ if (!options.json)
814
+ options.json = true;
815
+ if (!options.silent)
816
+ options.silent = true;
817
+ if (options.budget === undefined)
818
+ options.budget = 4000;
819
+ }
662
820
  const isJson = options.json;
663
821
  const isSilent = options.silent;
664
822
  // --top overrides --count when both are provided
@@ -773,10 +931,24 @@ program
773
931
  || config.braveApiKey
774
932
  || undefined;
775
933
  const provider = getSearchProvider(providerId);
776
- const results = await provider.searchWeb(query, {
934
+ let results = await provider.searchWeb(query, {
777
935
  count: Math.min(Math.max(count, 1), 10),
778
936
  apiKey,
779
937
  });
938
+ // Apply budget to search results if requested (trim results to fit token budget)
939
+ if (options.budget && options.budget > 0 && results.length > 0) {
940
+ let totalTokens = 0;
941
+ let maxResults = 0;
942
+ for (const r of results) {
943
+ // Estimate ~4 chars per token for title + url + snippet
944
+ const resultTokens = Math.ceil((`${r.title || ''}\n${r.url || ''}\n${r.snippet || ''}`).length / 4);
945
+ if (totalTokens + resultTokens > options.budget)
946
+ break;
947
+ totalTokens += resultTokens;
948
+ maxResults++;
949
+ }
950
+ results = results.slice(0, Math.max(maxResults, 1));
951
+ }
780
952
  if (spinner) {
781
953
  spinner.succeed(`Found ${results.length} results (${providerId})`);
782
954
  }
@@ -1370,24 +1542,52 @@ program
1370
1542
  program
1371
1543
  .command('config')
1372
1544
  .description('View or update CLI configuration')
1373
- .argument('[action]', '"get <key>", "set <key> <value>", or omit for overview')
1545
+ .argument('[action]', '"list", "get <key>", "set <key> <value>", or omit for overview')
1374
1546
  .argument('[key]', 'Config key')
1375
1547
  .argument('[value]', 'Value to set')
1376
1548
  .action(async (action, key, value) => {
1377
1549
  const config = loadConfig();
1378
1550
  // Settable config keys (safe for user modification)
1551
+ // Supports dot-notation for nested keys (e.g., llm.apiKey)
1379
1552
  const SETTABLE_KEYS = {
1380
1553
  braveApiKey: 'Brave Search API key',
1554
+ 'llm.apiKey': 'LLM API key for AI-powered extraction (OpenAI-compatible)',
1555
+ 'llm.model': 'LLM model name (default: gpt-4o-mini)',
1556
+ 'llm.baseUrl': 'LLM API base URL (default: https://api.openai.com/v1)',
1381
1557
  };
1382
1558
  const maskSecret = (k, v) => {
1383
1559
  if (!v)
1384
1560
  return '(not set)';
1385
- if (k === 'apiKey' || k === 'braveApiKey')
1561
+ if (k === 'apiKey' || k === 'braveApiKey' || k === 'llm.apiKey') {
1386
1562
  return v.slice(0, 4) + '...' + v.slice(-4);
1563
+ }
1387
1564
  return String(v);
1388
1565
  };
1389
- if (!action) {
1390
- // Show all config
1566
+ /** Get a potentially nested value using dot-notation (e.g., "llm.apiKey") */
1567
+ function getNestedValue(obj, path) {
1568
+ const parts = path.split('.');
1569
+ let cur = obj;
1570
+ for (const part of parts) {
1571
+ if (cur == null || typeof cur !== 'object')
1572
+ return undefined;
1573
+ cur = cur[part];
1574
+ }
1575
+ return cur;
1576
+ }
1577
+ /** Set a potentially nested value using dot-notation (e.g., "llm.apiKey") */
1578
+ function setNestedValue(obj, path, val) {
1579
+ const parts = path.split('.');
1580
+ let cur = obj;
1581
+ for (let i = 0; i < parts.length - 1; i++) {
1582
+ const part = parts[i];
1583
+ if (cur[part] == null || typeof cur[part] !== 'object')
1584
+ cur[part] = {};
1585
+ cur = cur[part];
1586
+ }
1587
+ cur[parts[parts.length - 1]] = val;
1588
+ }
1589
+ if (!action || action === 'list') {
1590
+ // Show all config (also triggered by `webpeel config list`)
1391
1591
  console.log('WebPeel CLI Configuration');
1392
1592
  console.log(` Config file: ~/.webpeel/config.json`);
1393
1593
  console.log('');
@@ -1395,6 +1595,11 @@ program
1395
1595
  console.log(` braveApiKey: ${maskSecret('braveApiKey', config.braveApiKey)}`);
1396
1596
  console.log(` planTier: ${config.planTier || 'free'}`);
1397
1597
  console.log(` anonymousUsage: ${config.anonymousUsage}`);
1598
+ console.log('');
1599
+ console.log(' LLM:');
1600
+ console.log(` llm.apiKey: ${maskSecret('llm.apiKey', config.llm?.apiKey)}`);
1601
+ console.log(` llm.model: ${config.llm?.model || '(not set, default: gpt-4o-mini)'}`);
1602
+ console.log(` llm.baseUrl: ${config.llm?.baseUrl || '(not set, default: https://api.openai.com/v1)'}`);
1398
1603
  const stats = cacheStats();
1399
1604
  console.log('');
1400
1605
  console.log(' Cache:');
@@ -1420,14 +1625,14 @@ program
1420
1625
  console.error(`Usage: webpeel config set ${key} <value>`);
1421
1626
  process.exit(1);
1422
1627
  }
1423
- config[key] = value;
1628
+ setNestedValue(config, key, value);
1424
1629
  saveConfig(config);
1425
1630
  console.log(`✓ ${key} saved`);
1426
1631
  process.exit(0);
1427
1632
  }
1428
1633
  if (action === 'get') {
1429
1634
  const lookupKey = key || '';
1430
- const val = config[lookupKey];
1635
+ const val = getNestedValue(config, lookupKey) ?? config[lookupKey];
1431
1636
  if (val !== undefined) {
1432
1637
  console.log(maskSecret(lookupKey, String(val)));
1433
1638
  }
@@ -1438,7 +1643,7 @@ program
1438
1643
  process.exit(0);
1439
1644
  }
1440
1645
  // Legacy: `webpeel config <key>` — treat action as the key name
1441
- const val = config[action];
1646
+ const val = getNestedValue(config, action) ?? config[action];
1442
1647
  if (val !== undefined) {
1443
1648
  console.log(maskSecret(action, String(val)));
1444
1649
  }
@@ -2635,6 +2840,242 @@ applyCmd
2635
2840
  process.exit(1);
2636
2841
  }
2637
2842
  });
2843
+ // ============================================================
2844
+ // Profile management commands
2845
+ // ============================================================
2846
+ const profileCmd = program
2847
+ .command('profile')
2848
+ .description('Manage named browser profiles (saved login sessions)');
2849
+ profileCmd
2850
+ .command('create <name>')
2851
+ .description('Create a new profile interactively (launches browser, log in, press Ctrl+C when done)')
2852
+ .option('--description <text>', 'Optional description for this profile')
2853
+ .action(async (name, opts) => {
2854
+ try {
2855
+ await createProfile(name, opts.description);
2856
+ process.exit(0);
2857
+ }
2858
+ catch (error) {
2859
+ console.error(`Error: ${error instanceof Error ? error.message : String(error)}`);
2860
+ process.exit(1);
2861
+ }
2862
+ });
2863
+ profileCmd
2864
+ .command('list')
2865
+ .description('List all saved browser profiles')
2866
+ .action(() => {
2867
+ const profiles = listProfiles();
2868
+ if (profiles.length === 0) {
2869
+ console.log('No profiles found.');
2870
+ console.log('');
2871
+ console.log('Create one with:');
2872
+ console.log(' webpeel profile create <name>');
2873
+ console.log('');
2874
+ console.log('Then use it with:');
2875
+ console.log(' webpeel <url> --profile <name>');
2876
+ process.exit(0);
2877
+ }
2878
+ console.log('');
2879
+ console.log('Saved profiles:');
2880
+ console.log('');
2881
+ // Column widths
2882
+ const nameW = Math.max(8, ...profiles.map((p) => p.name.length));
2883
+ const domainsW = Math.max(10, ...profiles.map((p) => (p.domains.join(', ') || '(none)').length));
2884
+ const header = 'Name'.padEnd(nameW) + ' ' +
2885
+ 'Domains'.padEnd(domainsW) + ' ' +
2886
+ 'Last Used'.padEnd(12) + ' ' +
2887
+ 'Created';
2888
+ console.log(header);
2889
+ console.log('─'.repeat(header.length + 4));
2890
+ for (const p of profiles) {
2891
+ const domainsStr = p.domains.length > 0 ? p.domains.join(', ') : '(none)';
2892
+ const lastUsed = formatRelativeTime(new Date(p.lastUsed));
2893
+ const created = new Date(p.created).toISOString().split('T')[0];
2894
+ console.log(p.name.padEnd(nameW) + ' ' +
2895
+ domainsStr.padEnd(domainsW) + ' ' +
2896
+ lastUsed.padEnd(12) + ' ' +
2897
+ created);
2898
+ }
2899
+ console.log('');
2900
+ process.exit(0);
2901
+ });
2902
+ profileCmd
2903
+ .command('show <name>')
2904
+ .description('Show details for a profile')
2905
+ .action((name) => {
2906
+ const profilePath = getProfilePath(name);
2907
+ if (!profilePath) {
2908
+ console.error(`Error: Profile "${name}" not found.`);
2909
+ console.error('Run "webpeel profile list" to see available profiles.');
2910
+ process.exit(1);
2911
+ }
2912
+ try {
2913
+ const meta = JSON.parse(readFileSync(`${profilePath}/metadata.json`, 'utf-8'));
2914
+ console.log('');
2915
+ console.log(`Profile: ${meta.name}`);
2916
+ if (meta.description)
2917
+ console.log(`Description: ${meta.description}`);
2918
+ console.log(`Created: ${new Date(meta.created).toLocaleString()}`);
2919
+ console.log(`Last used: ${new Date(meta.lastUsed).toLocaleString()}`);
2920
+ console.log(`Domains: ${meta.domains.length > 0 ? meta.domains.join(', ') : '(none)'}`);
2921
+ console.log(`Directory: ${profilePath}`);
2922
+ console.log('');
2923
+ process.exit(0);
2924
+ }
2925
+ catch (e) {
2926
+ console.error(`Error reading profile: ${e instanceof Error ? e.message : String(e)}`);
2927
+ process.exit(1);
2928
+ }
2929
+ });
2930
+ profileCmd
2931
+ .command('delete <name>')
2932
+ .description('Delete a saved profile')
2933
+ .action((name) => {
2934
+ const deleted = deleteProfile(name);
2935
+ if (deleted) {
2936
+ console.log(`Profile "${name}" deleted.`);
2937
+ process.exit(0);
2938
+ }
2939
+ else {
2940
+ console.error(`Error: Profile "${name}" not found.`);
2941
+ console.error('Run "webpeel profile list" to see available profiles.');
2942
+ process.exit(1);
2943
+ }
2944
+ });
2945
+ // ── Hotels command ─────────────────────────────────────────────────────────────
2946
+ program
2947
+ .command('hotels <destination>')
2948
+ .description('Search multiple travel sites for hotels (Kayak, Booking.com, Google Travel)')
2949
+ .option('--checkin <date>', 'Check-in date (ISO or relative, e.g. "tomorrow", "2026-02-20"). Default: tomorrow')
2950
+ .option('--checkout <date>', 'Check-out date (ISO or relative). Default: checkin + 1 day')
2951
+ .option('--sort <method>', 'Sort by: price, rating, value (default: price)', 'price')
2952
+ .option('--limit <n>', 'Max results (default: 20)', '20')
2953
+ .option('--source <name...>', 'Only use specific source(s): kayak, booking, google (repeatable)')
2954
+ .option('--json', 'Output as JSON')
2955
+ .option('--stealth', 'Use stealth mode for all sources')
2956
+ .option('-s, --silent', 'Suppress progress messages')
2957
+ .action(async (destination, options) => {
2958
+ const isJson = options.json;
2959
+ const isSilent = options.silent;
2960
+ // Build checkin/checkout
2961
+ const { parseDate, addDays: hotelAddDays } = await import('./core/hotel-search.js');
2962
+ let checkinStr;
2963
+ let checkoutStr;
2964
+ try {
2965
+ checkinStr = parseDate(options.checkin ?? 'tomorrow');
2966
+ checkoutStr = options.checkout
2967
+ ? parseDate(options.checkout)
2968
+ : hotelAddDays(checkinStr, 1);
2969
+ }
2970
+ catch (err) {
2971
+ const msg = err instanceof Error ? err.message : String(err);
2972
+ if (isJson) {
2973
+ await writeStdout(JSON.stringify({ error: msg, code: 'INVALID_DATE' }) + '\n');
2974
+ }
2975
+ else {
2976
+ console.error(`Error: ${msg}`);
2977
+ }
2978
+ process.exit(1);
2979
+ }
2980
+ const sortMethod = (['price', 'rating', 'value'].includes(options.sort)
2981
+ ? options.sort
2982
+ : 'price');
2983
+ const limit = Math.max(1, parseInt(options.limit, 10) || 20);
2984
+ const sources = options.source
2985
+ ? (Array.isArray(options.source) ? options.source : [options.source])
2986
+ : undefined;
2987
+ // Spinner per-source progress (non-silent, non-JSON)
2988
+ let searchSpinner = null;
2989
+ if (!isSilent && !isJson) {
2990
+ searchSpinner = ora(`Searching hotels in ${destination}...`).start();
2991
+ }
2992
+ else if (!isSilent && !isJson) {
2993
+ console.error(`⏳ Searching kayak.com...`);
2994
+ console.error(`⏳ Searching booking.com...`);
2995
+ console.error(`⏳ Searching google.com...`);
2996
+ }
2997
+ try {
2998
+ const { searchHotels } = await import('./core/hotel-search.js');
2999
+ const result = await searchHotels({
3000
+ destination,
3001
+ checkin: checkinStr,
3002
+ checkout: checkoutStr,
3003
+ sort: sortMethod,
3004
+ limit,
3005
+ sources,
3006
+ stealth: options.stealth,
3007
+ silent: isSilent,
3008
+ });
3009
+ if (searchSpinner)
3010
+ searchSpinner.stop();
3011
+ // Show per-source status
3012
+ if (!isSilent && !isJson) {
3013
+ for (const src of result.sources) {
3014
+ if (src.status === 'ok') {
3015
+ console.error(`✅ ${src.name}: ${src.count} hotels found`);
3016
+ }
3017
+ else {
3018
+ console.error(`❌ ${src.name}: ${src.status}${src.error ? ' — ' + src.error : ''}`);
3019
+ }
3020
+ }
3021
+ }
3022
+ if (isJson) {
3023
+ await writeStdout(JSON.stringify(result, null, 2) + '\n');
3024
+ await cleanup();
3025
+ process.exit(0);
3026
+ }
3027
+ // Human-readable table output
3028
+ const { formatDate: fmtDate } = {
3029
+ formatDate: (iso) => {
3030
+ const d = new Date(iso + 'T12:00:00Z');
3031
+ return d.toLocaleDateString('en-US', { month: 'short', day: 'numeric', year: 'numeric', timeZone: 'UTC' });
3032
+ },
3033
+ };
3034
+ const ci = fmtDate(result.checkin);
3035
+ const co = fmtDate(result.checkout);
3036
+ console.log(`\n🏨 Hotels in ${result.destination}`);
3037
+ console.log(` ${ci} → ${co} | Sorted by ${sortMethod}\n`);
3038
+ if (result.results.length === 0) {
3039
+ console.log(' No hotels found.\n');
3040
+ }
3041
+ else {
3042
+ const colNum = 3;
3043
+ const colName = 42;
3044
+ const colPrice = 8;
3045
+ const colRating = 8;
3046
+ const colSource = 10;
3047
+ const padEnd = (s, w) => s.length > w ? s.slice(0, w - 1) + '…' : s.padEnd(w);
3048
+ const padStart = (s, w) => s.padStart(w);
3049
+ console.log(` ${padStart('#', colNum)} ${padEnd('Hotel', colName)} ${padEnd('Price', colPrice)} ${padEnd('Rating', colRating)} ${padEnd('Source', colSource)}`);
3050
+ result.results.forEach((hotel, i) => {
3051
+ const priceStr = hotel.priceDisplay || '—';
3052
+ const ratingStr = hotel.rating !== null ? String(hotel.rating) : '—';
3053
+ console.log(` ${padStart(String(i + 1), colNum)} ${padEnd(hotel.name, colName)} ${padEnd(priceStr, colPrice)} ${padEnd(ratingStr, colRating)} ${padEnd(hotel.source, colSource)}`);
3054
+ });
3055
+ console.log('');
3056
+ const sourceSummary = result.sources
3057
+ .map(s => `${s.name} (${s.count} ${s.status === 'ok' ? '✅' : s.status === 'blocked' ? '🚫' : '❌'})`)
3058
+ .join(' | ');
3059
+ console.log(`Sources: ${sourceSummary}`);
3060
+ }
3061
+ console.log('');
3062
+ await cleanup();
3063
+ process.exit(0);
3064
+ }
3065
+ catch (error) {
3066
+ if (searchSpinner)
3067
+ searchSpinner.fail('Hotel search failed');
3068
+ const msg = error instanceof Error ? error.message : 'Unknown error';
3069
+ if (isJson) {
3070
+ await writeStdout(JSON.stringify({ error: msg, code: 'FETCH_FAILED' }) + '\n');
3071
+ }
3072
+ else {
3073
+ console.error(`\nError: ${msg}`);
3074
+ }
3075
+ await cleanup();
3076
+ process.exit(1);
3077
+ }
3078
+ });
2638
3079
  program.parse();
2639
3080
  // ============================================================
2640
3081
  // Time formatting helper