webpeel 0.21.79 → 0.21.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,31 @@ import { readFileSync } from 'fs';
6
6
  import { peel, peelBatch, cleanup } from '../../index.js';
7
7
  import { checkUsage, showUsageFooter, loadConfig } from '../../cli-auth.js';
8
8
  import { writeStdout, formatListingsCsv } from '../utils.js';
9
+ /**
10
+ * Parse a date range string like "Mar29-Apr4" into an array of date strings.
11
+ * Returns ["Mar 29", "Mar 30", ..., "Apr 4"]
12
+ */
13
+ function parseDateRange(range) {
14
+ const match = range.match(/(\w{3})\s*(\d{1,2})\s*[-–to]+\s*(\w{3})\s*(\d{1,2})/i);
15
+ if (!match)
16
+ return [];
17
+ const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
18
+ const startMonthIdx = months.findIndex(m => m.toLowerCase() === match[1].toLowerCase().slice(0, 3));
19
+ const endMonthIdx = months.findIndex(m => m.toLowerCase() === match[3].toLowerCase().slice(0, 3));
20
+ if (startMonthIdx === -1 || endMonthIdx === -1)
21
+ return [];
22
+ const startDay = parseInt(match[2]);
23
+ const endDay = parseInt(match[4]);
24
+ const year = new Date().getFullYear();
25
+ const dates = [];
26
+ const start = new Date(year, startMonthIdx, startDay);
27
+ const end = new Date(year, endMonthIdx, endDay);
28
+ for (let d = new Date(start); d <= end; d.setDate(d.getDate() + 1)) {
29
+ const mon = months[d.getMonth()];
30
+ dates.push(`${mon} ${d.getDate()}`);
31
+ }
32
+ return dates;
33
+ }
9
34
  export function registerSearchCommands(program) {
10
35
  // ── search command ────────────────────────────────────────────────────────
11
36
  program
@@ -595,9 +620,90 @@ export function registerSearchCommands(program) {
595
620
  .option('--one-way', 'One-way flight (default)')
596
621
  .option('--round-trip', 'Round-trip flight')
597
622
  .option('-n, --count <n>', 'Max flights to show', '10')
623
+ .option('--dates <range>', 'Compare prices across date range (e.g., "Mar29-Apr4")')
598
624
  .option('--json', 'Output as JSON')
599
625
  .option('-s, --silent', 'Silent mode')
600
626
  .action(async (query, options) => {
627
+ // ── --dates: compare cheapest flight across a date range ──────────────
628
+ if (options.dates) {
629
+ const dates = parseDateRange(options.dates);
630
+ if (dates.length === 0) {
631
+ console.error('Could not parse date range. Format: "Mar29-Apr4"');
632
+ process.exit(1);
633
+ }
634
+ const spinner = options.silent ? null : ora(`Comparing flights across ${dates.length} dates...`).start();
635
+ const tripType = options.roundTrip ? '' : ' one way';
636
+ const rows = [];
637
+ for (const date of dates) {
638
+ if (spinner)
639
+ spinner.text = `Fetching flights for ${date}...`;
640
+ try {
641
+ const dateQuery = `Flights from ${query} ${date}${tripType}`;
642
+ const encoded = encodeURIComponent(dateQuery);
643
+ const url = `https://www.google.com/travel/flights?q=${encoded}`;
644
+ const result = await peel(url, { render: true, timeout: 30000 });
645
+ // Try to extract cheapest flight from structured data or content
646
+ let price = null;
647
+ let airline = null;
648
+ let time = null;
649
+ const flights = result.domainData?.structured?.flights || [];
650
+ if (flights.length > 0) {
651
+ const cheapest = flights.reduce((a, b) => {
652
+ const ap = parseFloat(String(a.price || '').replace(/[^0-9.]/g, '')) || Infinity;
653
+ const bp = parseFloat(String(b.price || '').replace(/[^0-9.]/g, '')) || Infinity;
654
+ return ap <= bp ? a : b;
655
+ });
656
+ price = cheapest.priceStr || (cheapest.price ? `$${cheapest.price}` : null);
657
+ airline = cheapest.airline || cheapest.carrier || null;
658
+ time = cheapest.departTime && cheapest.arriveTime
659
+ ? `${cheapest.departTime} → ${cheapest.arriveTime}`
660
+ : (cheapest.time || cheapest.departure || null);
661
+ }
662
+ else {
663
+ // Extract from markdown content — look for price patterns
664
+ const priceMatch = result.content.match(/\$(\d+)/);
665
+ if (priceMatch)
666
+ price = `$${priceMatch[1]}`;
667
+ const airlineMatch = result.content.match(/\b(American|Delta|United|Southwest|Spirit|JetBlue|Alaska|Frontier|Allegiant|Sun Country)\b/i);
668
+ if (airlineMatch)
669
+ airline = airlineMatch[1];
670
+ const timeMatch = result.content.match(/(\d{1,2}:\d{2}\s*(?:AM|PM))\s*[–—→]\s*(\d{1,2}:\d{2}\s*(?:AM|PM))/i);
671
+ if (timeMatch)
672
+ time = `${timeMatch[1]} → ${timeMatch[2]}`;
673
+ }
674
+ const priceNum = price ? parseFloat(price.replace(/[^0-9.]/g, '')) || Infinity : Infinity;
675
+ rows.push({ date, price, airline, time, priceNum });
676
+ }
677
+ catch {
678
+ rows.push({ date, price: null, airline: null, time: null, priceNum: Infinity });
679
+ }
680
+ }
681
+ if (spinner)
682
+ spinner.succeed(`Compared ${rows.length} dates`);
683
+ if (options.json) {
684
+ console.log(JSON.stringify({ query, dateRange: options.dates, rows }, null, 2));
685
+ }
686
+ else {
687
+ // Find best price
688
+ const best = rows.reduce((a, b) => a.priceNum <= b.priceNum ? a : b);
689
+ console.log(`\n# ✈️ Flight Price Comparison — ${query}\n`);
690
+ console.log('| Date | Airline | Time | Price |');
691
+ console.log('|------|---------|------|-------|');
692
+ for (const row of rows) {
693
+ const star = row.priceNum === best.priceNum ? ' ⭐' : '';
694
+ const priceStr = row.price ? `${row.price}${star}` : 'N/A';
695
+ const airlineStr = row.airline || 'Unknown';
696
+ const timeStr = row.time || '—';
697
+ console.log(`| ${row.date} | ${airlineStr} | ${timeStr} | ${priceStr} |`);
698
+ }
699
+ if (best.price) {
700
+ console.log(`\n⭐ Best price: ${best.date} — ${best.airline || 'Unknown'} ${best.price}`);
701
+ }
702
+ }
703
+ await cleanup();
704
+ process.exit(0);
705
+ }
706
+ // ── Single date (default) ─────────────────────────────────────────────
601
707
  const tripType = options.roundTrip ? '' : ' one way';
602
708
  const encoded = encodeURIComponent(`Flights from ${query}${tripType}`);
603
709
  const url = `https://www.google.com/travel/flights?q=${encoded}`;
package/dist/cli.js CHANGED
File without changes
@@ -123,6 +123,7 @@ const REGISTRY = [
123
123
  { match: (h) => h === 'redfin.com' || h === 'www.redfin.com', extractor: redfinExtractor },
124
124
  // ── Travel ──────────────────────────────────────────────────────────────
125
125
  { match: (h, url = '') => (h === 'www.google.com' || h === 'google.com') && url.includes('/travel/flights'), extractor: googleFlightsExtractor },
126
+ { match: (h, url = '') => (h === 'www.kayak.com' || h === 'kayak.com') && url.includes('/cars/'), extractor: kayakCarRentalExtractor },
126
127
  ];
127
128
  /**
128
129
  * Returns the domain extractor for a URL, or null if none matches.
@@ -6069,3 +6070,273 @@ async function googleFlightsExtractor(_html, url) {
6069
6070
  cleanContent: md.join('\n'),
6070
6071
  };
6071
6072
  }
6073
+ // ---------------------------------------------------------------------------
6074
+ // Kayak Car Rental extractor
6075
+ // ---------------------------------------------------------------------------
6076
+ async function kayakCarRentalExtractor(_html, url) {
6077
+ if (!url.includes('/cars/'))
6078
+ return null;
6079
+ // Rental company homepage URLs
6080
+ const rentalCompanyUrls = {
6081
+ 'Hertz': 'https://www.hertz.com',
6082
+ 'Budget': 'https://www.budget.com',
6083
+ 'Avis': 'https://www.avis.com',
6084
+ 'Enterprise': 'https://www.enterprise.com',
6085
+ 'National': 'https://www.nationalcar.com',
6086
+ 'Alamo': 'https://www.alamo.com',
6087
+ 'Dollar': 'https://www.dollar.com',
6088
+ 'Thrifty': 'https://www.thrifty.com',
6089
+ 'Sixt': 'https://www.sixt.com',
6090
+ 'Fox': 'https://www.foxrentacar.com',
6091
+ 'Payless': 'https://www.paylesscar.com',
6092
+ 'Turn': 'https://www.turn.com',
6093
+ 'EconomyBookings': 'https://www.economybookings.com',
6094
+ 'Priceline': 'https://www.priceline.com',
6095
+ 'Expedia': 'https://www.expedia.com',
6096
+ 'Turo': 'https://www.turo.com',
6097
+ 'KAYAK': 'https://www.kayak.com',
6098
+ 'Booking.com': 'https://www.booking.com',
6099
+ 'DiscoverCars': 'https://www.discovercars.com',
6100
+ 'RentalCars': 'https://www.rentalcars.com',
6101
+ 'Car Rental 8': 'https://www.carrental8.com',
6102
+ 'Hotwire': 'https://www.hotwire.com',
6103
+ };
6104
+ function getCompanyUrl(company) {
6105
+ return rentalCompanyUrls[company] || `https://www.kayak.com`;
6106
+ }
6107
+ // Parse dates from URL: /cars/Location/YYYY-MM-DD/YYYY-MM-DD
6108
+ let numDays = 1;
6109
+ let pickupDate = '';
6110
+ let dropoffDate = '';
6111
+ let locationName = '';
6112
+ const dateMatch = url.match(/\/cars\/([^/]+)\/(\d{4}-\d{2}-\d{2})\/(\d{4}-\d{2}-\d{2})/);
6113
+ if (dateMatch) {
6114
+ locationName = decodeURIComponent(dateMatch[1]);
6115
+ pickupDate = dateMatch[2];
6116
+ dropoffDate = dateMatch[3];
6117
+ const pickup = new Date(pickupDate);
6118
+ const dropoff = new Date(dropoffDate);
6119
+ numDays = Math.max(1, Math.round((dropoff.getTime() - pickup.getTime()) / (1000 * 60 * 60 * 24)));
6120
+ }
6121
+ // Format date range for display (e.g. "Apr 1–3")
6122
+ function formatDateRange(from, to) {
6123
+ if (!from || !to)
6124
+ return '';
6125
+ const fromDate = new Date(from + 'T12:00:00');
6126
+ const toDate = new Date(to + 'T12:00:00');
6127
+ const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
6128
+ const fromMonth = months[fromDate.getUTCMonth()];
6129
+ const toMonth = months[toDate.getUTCMonth()];
6130
+ const fromDay = fromDate.getUTCDate();
6131
+ const toDay = toDate.getUTCDate();
6132
+ if (fromMonth === toMonth)
6133
+ return `${fromMonth} ${fromDay}–${toDay}`;
6134
+ return `${fromMonth} ${fromDay}–${toMonth} ${toDay}`;
6135
+ }
6136
+ // Process content: strip HTML if needed
6137
+ let text = _html;
6138
+ if (text.includes('<!DOCTYPE') || text.includes('<html')) {
6139
+ text = text
6140
+ .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
6141
+ .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
6142
+ .replace(/<[^>]+>/g, '\n')
6143
+ .replace(/&amp;/g, '&')
6144
+ .replace(/&lt;/g, '<')
6145
+ .replace(/&gt;/g, '>')
6146
+ .replace(/&#\d+;/g, '')
6147
+ .replace(/\n{2,}/g, '\n');
6148
+ }
6149
+ const lines = text.split('\n').map(l => l.trim()).filter(Boolean);
6150
+ const KNOWN_COMPANIES = ['Hertz', 'Budget', 'Avis', 'Enterprise', 'National', 'Alamo', 'Dollar', 'Thrifty', 'Sixt', 'Fox', 'Payless', 'Turn', 'EconomyBookings', 'Priceline', 'Expedia', 'Turo', 'KAYAK', 'Booking.com', 'DiscoverCars', 'RentalCars', 'Car Rental 8', 'Hotwire'];
6151
+ const listings = [];
6152
+ for (let i = 0; i < lines.length; i++) {
6153
+ const line = lines[i];
6154
+ // Detect: "or similar {Class}" — this signals a standard car rental listing
6155
+ // The car name is the line BEFORE "or similar"
6156
+ const orSimilarMatch = line.match(/^or similar\s+(.+)$/);
6157
+ if (orSimilarMatch) {
6158
+ const carClass = orSimilarMatch[1].trim();
6159
+ const carName = i > 0 ? lines[i - 1] : '';
6160
+ if (!carName || carName.length > 60)
6161
+ continue;
6162
+ // Look ahead for: pickup location, rating, company, price
6163
+ let location = '';
6164
+ let distanceFromCenter = '';
6165
+ let rating = null;
6166
+ let company = '';
6167
+ let totalPrice = 0;
6168
+ let cancellation = '';
6169
+ for (let j = i + 1; j < Math.min(i + 30, lines.length); j++) {
6170
+ const l = lines[j];
6171
+ // Pickup location
6172
+ if (!location && l.startsWith('Pick-up')) {
6173
+ const locMatch = l.match(/Pick-up (?:city|airport):\s*(.+)/);
6174
+ if (locMatch)
6175
+ location = locMatch[1].trim();
6176
+ continue;
6177
+ }
6178
+ // Distance from center
6179
+ if (!distanceFromCenter) {
6180
+ const distM = l.match(/^([\d.]+)\s+mi\s+from\s+city\s+center/);
6181
+ if (distM) {
6182
+ distanceFromCenter = `${distM[1]} mi from city center`;
6183
+ continue;
6184
+ }
6185
+ }
6186
+ // Rating (number like "9.2", "8.5", "7.2")
6187
+ if (rating === null) {
6188
+ const ratingM = l.match(/^(\d+\.\d+)$/);
6189
+ if (ratingM) {
6190
+ rating = parseFloat(ratingM[1]);
6191
+ continue;
6192
+ }
6193
+ }
6194
+ // Company from "X offer from {Company}" or "{Company}" line
6195
+ if (!company) {
6196
+ const offerMatch = l.match(/offer from (.+)$/);
6197
+ if (offerMatch) {
6198
+ company = offerMatch[1].trim();
6199
+ continue;
6200
+ }
6201
+ // Also detect company name standalone
6202
+ for (const c of KNOWN_COMPANIES) {
6203
+ if (l === c) {
6204
+ company = c;
6205
+ break;
6206
+ }
6207
+ }
6208
+ if (company)
6209
+ continue;
6210
+ }
6211
+ // Cancellation policy
6212
+ if (!cancellation && (l.includes('Free cancellation') || l.includes('No free cancellation'))) {
6213
+ cancellation = l;
6214
+ continue;
6215
+ }
6216
+ // Price — "$NNN" followed by "Total"
6217
+ const priceM = l.match(/^\$(\d[\d,]*)$/);
6218
+ if (priceM) {
6219
+ const nextLine = lines[j + 1] || '';
6220
+ if (nextLine === 'Total' || nextLine.includes('Total')) {
6221
+ totalPrice = parseInt(priceM[1].replace(',', ''));
6222
+ break;
6223
+ }
6224
+ }
6225
+ // Also catch price on same line
6226
+ const inlinePriceM = l.match(/\$(\d[\d,]*)\s*Total/);
6227
+ if (inlinePriceM) {
6228
+ totalPrice = parseInt(inlinePriceM[1].replace(',', ''));
6229
+ break;
6230
+ }
6231
+ // Stop if we hit another car listing marker
6232
+ if (l.match(/^or similar\s/) || l === 'Show more results')
6233
+ break;
6234
+ }
6235
+ if (carName && totalPrice > 0) {
6236
+ const normalizedClass = carClass.replace('Full size', 'Full-size');
6237
+ listings.push({
6238
+ name: carName,
6239
+ carClass: normalizedClass,
6240
+ totalPrice,
6241
+ perDayPrice: Math.round(totalPrice / numDays),
6242
+ company: company || 'Unknown',
6243
+ location: location || 'See booking',
6244
+ distanceFromCenter,
6245
+ rating,
6246
+ cancellation,
6247
+ isTuro: false,
6248
+ });
6249
+ }
6250
+ }
6251
+ }
6252
+ // Deduplicate: first prefer listings with real company info over "Unknown"
6253
+ // Key by name+price; keep the one with best data
6254
+ const byKey = new Map();
6255
+ for (const c of listings) {
6256
+ const key = `${c.name.toLowerCase()}-${c.totalPrice}`;
6257
+ const existing = byKey.get(key);
6258
+ if (!existing) {
6259
+ byKey.set(key, c);
6260
+ }
6261
+ else {
6262
+ // Prefer non-Unknown company, or same company with more info
6263
+ if (existing.company === 'Unknown' && c.company !== 'Unknown') {
6264
+ byKey.set(key, c);
6265
+ }
6266
+ }
6267
+ }
6268
+ const unique = Array.from(byKey.values());
6269
+ if (unique.length === 0)
6270
+ return null;
6271
+ // Filter out Unknown company entries if the total found from page suggests more results exist
6272
+ // Also filter them only if they have no location info (these are likely ad/promo extractions)
6273
+ const knownCompanyListings = unique.filter(c => c.company !== 'Unknown');
6274
+ const finalListings = knownCompanyListings.length > 0 ? knownCompanyListings : unique;
6275
+ // Sort by price
6276
+ finalListings.sort((a, b) => a.totalPrice - b.totalPrice);
6277
+ // Get total count from page if mentioned
6278
+ let totalFound = unique.length;
6279
+ for (const l of lines) {
6280
+ const m = l.match(/^(\d+)\s+results?$/);
6281
+ if (m) {
6282
+ totalFound = parseInt(m[1]);
6283
+ break;
6284
+ }
6285
+ const m2 = l.match(/(\d+)\s+cars?\s+found/);
6286
+ if (m2) {
6287
+ totalFound = parseInt(m2[1]);
6288
+ break;
6289
+ }
6290
+ }
6291
+ // Format location name nicely (e.g. "Punta-Gorda,FL-c34451" → "Punta Gorda, FL")
6292
+ function formatLocation(loc) {
6293
+ return loc
6294
+ .replace(/-c\d+$/, '') // remove trailing "-c12345"
6295
+ .replace(/-/g, ' ') // hyphens to spaces
6296
+ .replace(/,(\S)/g, ', $1'); // ensure space after comma
6297
+ }
6298
+ const dateRange = formatDateRange(pickupDate, dropoffDate);
6299
+ const displayLocation = formatLocation(locationName);
6300
+ const daysLabel = numDays === 1 ? '1 day' : `${numDays} days`;
6301
+ const md = [
6302
+ `# 🚗 Car Rentals — ${displayLocation} · ${dateRange} (${daysLabel})`,
6303
+ '',
6304
+ `*${totalFound} cars found · Source: [Kayak](${url})*`,
6305
+ `*Free cancellation available on most rentals*`,
6306
+ '',
6307
+ ];
6308
+ for (let idx = 0; idx < finalListings.length; idx++) {
6309
+ const c = finalListings[idx];
6310
+ md.push(`## ${idx + 1}. ${c.name} (${c.carClass}) — $${c.totalPrice} total · $${c.perDayPrice}/day`);
6311
+ if (c.distanceFromCenter) {
6312
+ md.push(`📍 ${c.distanceFromCenter}`);
6313
+ }
6314
+ else if (c.location && c.location !== 'See booking') {
6315
+ md.push(`📍 ${c.location}`);
6316
+ }
6317
+ const ratingStr = c.rating !== null ? ` · Rating: ${c.rating}` : '';
6318
+ md.push(`🏪 via ${c.company}${ratingStr}`);
6319
+ if (c.cancellation)
6320
+ md.push(`✅ ${c.cancellation}`);
6321
+ md.push(`🔍 [See price on Kayak](${url})`);
6322
+ md.push(`🛒 [Book on ${c.company}](${getCompanyUrl(c.company)})`);
6323
+ md.push('');
6324
+ }
6325
+ md.push('---');
6326
+ md.push(`📌 *Prices verified via [Kayak](${url}). Click "See price" to confirm current rate, then book with the rental company.*`);
6327
+ return {
6328
+ domain: 'kayak.com/cars',
6329
+ type: 'car-rental',
6330
+ structured: {
6331
+ cars: finalListings,
6332
+ location: displayLocation,
6333
+ pickupDate,
6334
+ dropoffDate,
6335
+ numDays,
6336
+ totalFound,
6337
+ source: 'Kayak',
6338
+ sourceUrl: url,
6339
+ },
6340
+ cleanContent: md.join('\n'),
6341
+ };
6342
+ }
@@ -426,6 +426,22 @@ export async function fetchContent(ctx) {
426
426
  // @ts-ignore — proprietary module, gitignored
427
427
  const { searchFallback } = await import('./search-fallback.js');
428
428
  const searchResult = await searchFallback(ctx.url);
429
+ // If DDG/primary returned very little, also try Bing for richer snippets
430
+ if (!searchResult.cachedContent || searchResult.cachedContent.length < 400) {
431
+ try {
432
+ const { simpleFetch } = await import('./http-fetch.js');
433
+ const bingUrl = `https://www.bing.com/search?q=${encodeURIComponent(ctx.url)}`;
434
+ const bingResult = await simpleFetch(bingUrl, ctx.userAgent, 8000);
435
+ if (bingResult.html && bingResult.html.length > 500) {
436
+ const snippetMatch = bingResult.html.match(/<p[^>]*class="[^"]*snippet[^"]*"[^>]*>(.*?)<\/p>/gi);
437
+ if (snippetMatch) {
438
+ const bingSnippet = snippetMatch.map(s => s.replace(/<[^>]+>/g, '')).join('\n');
439
+ searchResult.cachedContent = (searchResult.cachedContent || '') + '\n\n---\n*Additional context from Bing:*\n' + bingSnippet;
440
+ }
441
+ }
442
+ }
443
+ catch { /* Bing fallback is best-effort */ }
444
+ }
429
445
  if (searchResult.cachedContent && searchResult.cachedContent.length > 50) {
430
446
  ctx.timer.end('fetch');
431
447
  ctx.content = searchResult.cachedContent;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.21.79",
3
+ "version": "0.21.80",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",