webpeel 0.21.78 → 0.21.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,31 @@ import { readFileSync } from 'fs';
6
6
  import { peel, peelBatch, cleanup } from '../../index.js';
7
7
  import { checkUsage, showUsageFooter, loadConfig } from '../../cli-auth.js';
8
8
  import { writeStdout, formatListingsCsv } from '../utils.js';
9
+ /**
10
+ * Parse a date range string like "Mar29-Apr4" into an array of date strings.
11
+ * Returns ["Mar 29", "Mar 30", ..., "Apr 4"]
12
+ */
13
+ function parseDateRange(range) {
14
+ const match = range.match(/(\w{3})\s*(\d{1,2})\s*[-–to]+\s*(\w{3})\s*(\d{1,2})/i);
15
+ if (!match)
16
+ return [];
17
+ const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
18
+ const startMonthIdx = months.findIndex(m => m.toLowerCase() === match[1].toLowerCase().slice(0, 3));
19
+ const endMonthIdx = months.findIndex(m => m.toLowerCase() === match[3].toLowerCase().slice(0, 3));
20
+ if (startMonthIdx === -1 || endMonthIdx === -1)
21
+ return [];
22
+ const startDay = parseInt(match[2]);
23
+ const endDay = parseInt(match[4]);
24
+ const year = new Date().getFullYear();
25
+ const dates = [];
26
+ const start = new Date(year, startMonthIdx, startDay);
27
+ const end = new Date(year, endMonthIdx, endDay);
28
+ for (let d = new Date(start); d <= end; d.setDate(d.getDate() + 1)) {
29
+ const mon = months[d.getMonth()];
30
+ dates.push(`${mon} ${d.getDate()}`);
31
+ }
32
+ return dates;
33
+ }
9
34
  export function registerSearchCommands(program) {
10
35
  // ── search command ────────────────────────────────────────────────────────
11
36
  program
@@ -595,9 +620,90 @@ export function registerSearchCommands(program) {
595
620
  .option('--one-way', 'One-way flight (default)')
596
621
  .option('--round-trip', 'Round-trip flight')
597
622
  .option('-n, --count <n>', 'Max flights to show', '10')
623
+ .option('--dates <range>', 'Compare prices across date range (e.g., "Mar29-Apr4")')
598
624
  .option('--json', 'Output as JSON')
599
625
  .option('-s, --silent', 'Silent mode')
600
626
  .action(async (query, options) => {
627
+ // ── --dates: compare cheapest flight across a date range ──────────────
628
+ if (options.dates) {
629
+ const dates = parseDateRange(options.dates);
630
+ if (dates.length === 0) {
631
+ console.error('Could not parse date range. Format: "Mar29-Apr4"');
632
+ process.exit(1);
633
+ }
634
+ const spinner = options.silent ? null : ora(`Comparing flights across ${dates.length} dates...`).start();
635
+ const tripType = options.roundTrip ? '' : ' one way';
636
+ const rows = [];
637
+ for (const date of dates) {
638
+ if (spinner)
639
+ spinner.text = `Fetching flights for ${date}...`;
640
+ try {
641
+ const dateQuery = `Flights from ${query} ${date}${tripType}`;
642
+ const encoded = encodeURIComponent(dateQuery);
643
+ const url = `https://www.google.com/travel/flights?q=${encoded}`;
644
+ const result = await peel(url, { render: true, timeout: 30000 });
645
+ // Try to extract cheapest flight from structured data or content
646
+ let price = null;
647
+ let airline = null;
648
+ let time = null;
649
+ const flights = result.domainData?.structured?.flights || [];
650
+ if (flights.length > 0) {
651
+ const cheapest = flights.reduce((a, b) => {
652
+ const ap = parseFloat(String(a.price || '').replace(/[^0-9.]/g, '')) || Infinity;
653
+ const bp = parseFloat(String(b.price || '').replace(/[^0-9.]/g, '')) || Infinity;
654
+ return ap <= bp ? a : b;
655
+ });
656
+ price = cheapest.priceStr || (cheapest.price ? `$${cheapest.price}` : null);
657
+ airline = cheapest.airline || cheapest.carrier || null;
658
+ time = cheapest.departTime && cheapest.arriveTime
659
+ ? `${cheapest.departTime} → ${cheapest.arriveTime}`
660
+ : (cheapest.time || cheapest.departure || null);
661
+ }
662
+ else {
663
+ // Extract from markdown content — look for price patterns
664
+ const priceMatch = result.content.match(/\$(\d+)/);
665
+ if (priceMatch)
666
+ price = `$${priceMatch[1]}`;
667
+ const airlineMatch = result.content.match(/\b(American|Delta|United|Southwest|Spirit|JetBlue|Alaska|Frontier|Allegiant|Sun Country)\b/i);
668
+ if (airlineMatch)
669
+ airline = airlineMatch[1];
670
+ const timeMatch = result.content.match(/(\d{1,2}:\d{2}\s*(?:AM|PM))\s*[–—→]\s*(\d{1,2}:\d{2}\s*(?:AM|PM))/i);
671
+ if (timeMatch)
672
+ time = `${timeMatch[1]} → ${timeMatch[2]}`;
673
+ }
674
+ const priceNum = price ? parseFloat(price.replace(/[^0-9.]/g, '')) || Infinity : Infinity;
675
+ rows.push({ date, price, airline, time, priceNum });
676
+ }
677
+ catch {
678
+ rows.push({ date, price: null, airline: null, time: null, priceNum: Infinity });
679
+ }
680
+ }
681
+ if (spinner)
682
+ spinner.succeed(`Compared ${rows.length} dates`);
683
+ if (options.json) {
684
+ console.log(JSON.stringify({ query, dateRange: options.dates, rows }, null, 2));
685
+ }
686
+ else {
687
+ // Find best price
688
+ const best = rows.reduce((a, b) => a.priceNum <= b.priceNum ? a : b);
689
+ console.log(`\n# ✈️ Flight Price Comparison — ${query}\n`);
690
+ console.log('| Date | Airline | Time | Price |');
691
+ console.log('|------|---------|------|-------|');
692
+ for (const row of rows) {
693
+ const star = row.priceNum === best.priceNum ? ' ⭐' : '';
694
+ const priceStr = row.price ? `${row.price}${star}` : 'N/A';
695
+ const airlineStr = row.airline || 'Unknown';
696
+ const timeStr = row.time || '—';
697
+ console.log(`| ${row.date} | ${airlineStr} | ${timeStr} | ${priceStr} |`);
698
+ }
699
+ if (best.price) {
700
+ console.log(`\n⭐ Best price: ${best.date} — ${best.airline || 'Unknown'} ${best.price}`);
701
+ }
702
+ }
703
+ await cleanup();
704
+ process.exit(0);
705
+ }
706
+ // ── Single date (default) ─────────────────────────────────────────────
601
707
  const tripType = options.roundTrip ? '' : ' one way';
602
708
  const encoded = encodeURIComponent(`Flights from ${query}${tripType}`);
603
709
  const url = `https://www.google.com/travel/flights?q=${encoded}`;
@@ -631,6 +737,122 @@ export function registerSearchCommands(program) {
631
737
  process.exit(1);
632
738
  }
633
739
  });
740
+ // ── rental command ────────────────────────────────────────────────────────
741
+ program
742
+ .command('rental <query>')
743
+ .alias('car-rental')
744
+ .description('Search for car rentals via Kayak — e.g. "Punta Gorda FL Apr 1-3"')
745
+ .option('--json', 'Output as JSON')
746
+ .option('-s, --silent', 'Silent mode')
747
+ .action(async (query, options) => {
748
+ // Parse location: strip date portion from query
749
+ const location = query.replace(/\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w*\s+\d+.*/i, '').trim();
750
+ const encodedLocation = encodeURIComponent(location.replace(/\s+/g, '-'));
751
+ // Parse dates: try "Apr 1-3" or "Apr 1 to Apr 3" patterns
752
+ const year = new Date().getFullYear();
753
+ let pickupDate = `${year}-04-01`;
754
+ let returnDate = `${year}-04-03`;
755
+ const rangeMatch = query.match(/\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w*\s+(\d+)\s*[-–to]+\s*(?:(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w*\s+)?(\d+)/i);
756
+ if (rangeMatch) {
757
+ const months = {
758
+ jan: '01', feb: '02', mar: '03', apr: '04', may: '05', jun: '06',
759
+ jul: '07', aug: '08', sep: '09', oct: '10', nov: '11', dec: '12',
760
+ };
761
+ const startMonth = months[rangeMatch[1].toLowerCase().slice(0, 3)];
762
+ const startDay = rangeMatch[2].padStart(2, '0');
763
+ const endMonth = rangeMatch[3] ? months[rangeMatch[3].toLowerCase().slice(0, 3)] : startMonth;
764
+ const endDay = rangeMatch[4].padStart(2, '0');
765
+ pickupDate = `${year}-${startMonth}-${startDay}`;
766
+ returnDate = `${year}-${endMonth}-${endDay}`;
767
+ }
768
+ const searchUrl = `https://www.kayak.com/cars/${encodedLocation}/${pickupDate}/${returnDate}?sort=price_a`;
769
+ const spinner = options.silent ? null : (await import('ora')).default(`Searching car rentals: ${query}...`).start();
770
+ try {
771
+ const result = await peel(searchUrl, { render: true, timeout: 40000 });
772
+ if (spinner)
773
+ spinner.succeed('Car rentals loaded');
774
+ if (options.json) {
775
+ console.log(JSON.stringify({
776
+ query,
777
+ location,
778
+ pickupDate,
779
+ returnDate,
780
+ url: searchUrl,
781
+ content: result.content,
782
+ tokens: result.tokens,
783
+ }, null, 2));
784
+ }
785
+ else {
786
+ console.log(result.content);
787
+ }
788
+ await cleanup();
789
+ process.exit(0);
790
+ }
791
+ catch (error) {
792
+ if (spinner)
793
+ spinner.fail('Car rental search failed');
794
+ console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
795
+ await cleanup();
796
+ process.exit(1);
797
+ }
798
+ });
799
+ // ── cars command ──────────────────────────────────────────────────────────
800
+ program
801
+ .command('cars <query>')
802
+ .description('Search for cars to buy via Cars.com — e.g. "Honda Civic"')
803
+ .option('--zip <zip>', 'ZIP code for local search', '10001')
804
+ .option('--distance <miles>', 'Max distance in miles', '30')
805
+ .option('--max-price <price>', 'Maximum listing price')
806
+ .option('--min-price <price>', 'Minimum listing price')
807
+ .option('--json', 'Output as JSON')
808
+ .option('-s, --silent', 'Silent mode')
809
+ .action(async (query, options) => {
810
+ const zip = options.zip || '10001';
811
+ const distance = options.distance || '30';
812
+ const maxPrice = options.maxPrice || '';
813
+ const minPrice = options.minPrice || '';
814
+ const params = new URLSearchParams({
815
+ keyword: query,
816
+ sort: 'list_price',
817
+ stock_type: 'all',
818
+ zip,
819
+ maximum_distance: distance,
820
+ });
821
+ if (maxPrice)
822
+ params.set('list_price_max', maxPrice);
823
+ if (minPrice)
824
+ params.set('list_price_min', minPrice);
825
+ const url = `https://www.cars.com/shopping/results/?${params.toString()}`;
826
+ const spinner = options.silent ? null : (await import('ora')).default(`Searching cars: ${query}...`).start();
827
+ try {
828
+ const result = await peel(url, { timeout: 25000 });
829
+ if (spinner)
830
+ spinner.succeed('Cars loaded');
831
+ if (options.json) {
832
+ console.log(JSON.stringify({
833
+ query,
834
+ zip,
835
+ distance,
836
+ maxPrice,
837
+ url,
838
+ content: result.content,
839
+ tokens: result.tokens,
840
+ }, null, 2));
841
+ }
842
+ else {
843
+ console.log(result.content);
844
+ }
845
+ await cleanup();
846
+ process.exit(0);
847
+ }
848
+ catch (error) {
849
+ if (spinner)
850
+ spinner.fail('Car search failed');
851
+ console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
852
+ await cleanup();
853
+ process.exit(1);
854
+ }
855
+ });
634
856
  // ── extractors command ────────────────────────────────────────────────────
635
857
  program
636
858
  .command('extractors')
package/dist/cli.js CHANGED
File without changes
@@ -56,6 +56,12 @@ export declare function browserFetch(url: string, options?: {
56
56
  waitSelector?: string;
57
57
  /** Block resource types for faster loading: 'image', 'stylesheet', 'font', 'media', 'script' */
58
58
  blockResources?: string[];
59
+ /**
60
+ * Whether the target is a Single-Page Application (Kayak, Google Flights, Expedia, etc).
61
+ * When true, the DOM stability check uses a longer timeout (12s) to wait for async data loads.
62
+ * When false (default), a shorter 3s stability window is used.
63
+ */
64
+ isSPA?: boolean;
59
65
  }): Promise<FetchResult>;
60
66
  /**
61
67
  * Capture a screenshot of a URL using headless Chromium via Playwright.
@@ -30,7 +30,7 @@ let activePagesCount = 0;
30
30
  export async function browserFetch(url, options = {}) {
31
31
  // SECURITY: Validate URL to prevent SSRF
32
32
  validateUrl(url);
33
- const { userAgent, waitMs = 0, timeoutMs = 30000, screenshot = false, screenshotFullPage = false, headers, cookies, stealth = false, actions, keepPageOpen = false, signal, profileDir, headed = false, storageState, proxy, device = 'desktop', viewportWidth: optViewportWidth, viewportHeight: optViewportHeight, waitUntil: optWaitUntil, waitSelector, blockResources, } = options;
33
+ const { userAgent, waitMs = 0, timeoutMs = 30000, screenshot = false, screenshotFullPage = false, headers, cookies, stealth = false, actions, keepPageOpen = false, signal, profileDir, headed = false, storageState, proxy, device = 'desktop', viewportWidth: optViewportWidth, viewportHeight: optViewportHeight, waitUntil: optWaitUntil, waitSelector, blockResources, isSPA = false, } = options;
34
34
  // Device emulation profiles
35
35
  const deviceProfiles = {
36
36
  desktop: { width: 1920, height: 1080, userAgent: undefined },
@@ -317,27 +317,37 @@ export async function browserFetch(url, options = {}) {
317
317
  throwIfAborted();
318
318
  }
319
319
  // DOM stability check: wait for SPA hydration to settle.
320
- // Polls innerText length every 500ms — if still growing, keep waiting (max 3s extra).
320
+ // Polls innerText length every 500ms — if still growing, keep waiting.
321
+ // SPAs (Kayak, Google Flights, Expedia) get a longer timeout to allow async data loads.
321
322
  {
322
323
  const stabilityStart = Date.now();
323
- const MAX_STABILITY_WAIT_MS = 3000;
324
+ // SPA sites (Kayak, Google Flights, Expedia) need up to 12s for results to load.
325
+ // Normal rendered pages need just 3s extra.
326
+ const MAX_STABILITY_WAIT_MS = isSPA ? 12000 : 3000;
327
+ // SPA: must be stable for 2s (4 consecutive 500ms checks). Normal: 1s (2 checks).
328
+ const STABLE_CHECKS_REQUIRED = isSPA ? 4 : 2;
324
329
  const POLL_INTERVAL_MS = 500;
330
+ const MIN_CONTENT_LENGTH = 200; // Don't consider near-empty pages stable
325
331
  let prevLength = await page.evaluate('document.body?.innerText?.length || 0').catch(() => 0);
326
332
  let stableCount = 0;
327
333
  while (Date.now() - stabilityStart < MAX_STABILITY_WAIT_MS) {
328
334
  throwIfAborted();
329
335
  await page.waitForTimeout(POLL_INTERVAL_MS);
330
336
  const curLength = await page.evaluate('document.body?.innerText?.length || 0').catch(() => 0);
331
- if (curLength === prevLength) {
332
- stableCount++;
333
- if (stableCount >= 2)
334
- break; // stable for 2 consecutive checks (~1s)
335
- }
336
- else {
337
+ if (curLength !== prevLength) {
338
+ // Content changed — reset stability counter
337
339
  stableCount = 0;
338
340
  }
341
+ else if (curLength >= MIN_CONTENT_LENGTH) {
342
+ stableCount++;
343
+ if (stableCount >= STABLE_CHECKS_REQUIRED)
344
+ break; // stable long enough
345
+ }
339
346
  prevLength = curLength;
340
347
  }
348
+ if (isSPA) {
349
+ log.debug(`SPA stability check: ${Date.now() - stabilityStart}ms, length=${prevLength}`);
350
+ }
341
351
  }
342
352
  const finalUrl = page.url();
343
353
  const contentType = response?.headers()?.['content-type'] || '';
@@ -123,6 +123,7 @@ const REGISTRY = [
123
123
  { match: (h) => h === 'redfin.com' || h === 'www.redfin.com', extractor: redfinExtractor },
124
124
  // ── Travel ──────────────────────────────────────────────────────────────
125
125
  { match: (h, url = '') => (h === 'www.google.com' || h === 'google.com') && url.includes('/travel/flights'), extractor: googleFlightsExtractor },
126
+ { match: (h, url = '') => (h === 'www.kayak.com' || h === 'kayak.com') && url.includes('/cars/'), extractor: kayakCarRentalExtractor },
126
127
  ];
127
128
  /**
128
129
  * Returns the domain extractor for a URL, or null if none matches.
@@ -6069,3 +6070,273 @@ async function googleFlightsExtractor(_html, url) {
6069
6070
  cleanContent: md.join('\n'),
6070
6071
  };
6071
6072
  }
6073
+ // ---------------------------------------------------------------------------
6074
+ // Kayak Car Rental extractor
6075
+ // ---------------------------------------------------------------------------
6076
+ async function kayakCarRentalExtractor(_html, url) {
6077
+ if (!url.includes('/cars/'))
6078
+ return null;
6079
+ // Rental company homepage URLs
6080
+ const rentalCompanyUrls = {
6081
+ 'Hertz': 'https://www.hertz.com',
6082
+ 'Budget': 'https://www.budget.com',
6083
+ 'Avis': 'https://www.avis.com',
6084
+ 'Enterprise': 'https://www.enterprise.com',
6085
+ 'National': 'https://www.nationalcar.com',
6086
+ 'Alamo': 'https://www.alamo.com',
6087
+ 'Dollar': 'https://www.dollar.com',
6088
+ 'Thrifty': 'https://www.thrifty.com',
6089
+ 'Sixt': 'https://www.sixt.com',
6090
+ 'Fox': 'https://www.foxrentacar.com',
6091
+ 'Payless': 'https://www.paylesscar.com',
6092
+ 'Turn': 'https://www.turn.com',
6093
+ 'EconomyBookings': 'https://www.economybookings.com',
6094
+ 'Priceline': 'https://www.priceline.com',
6095
+ 'Expedia': 'https://www.expedia.com',
6096
+ 'Turo': 'https://www.turo.com',
6097
+ 'KAYAK': 'https://www.kayak.com',
6098
+ 'Booking.com': 'https://www.booking.com',
6099
+ 'DiscoverCars': 'https://www.discovercars.com',
6100
+ 'RentalCars': 'https://www.rentalcars.com',
6101
+ 'Car Rental 8': 'https://www.carrental8.com',
6102
+ 'Hotwire': 'https://www.hotwire.com',
6103
+ };
6104
+ function getCompanyUrl(company) {
6105
+ return rentalCompanyUrls[company] || `https://www.kayak.com`;
6106
+ }
6107
+ // Parse dates from URL: /cars/Location/YYYY-MM-DD/YYYY-MM-DD
6108
+ let numDays = 1;
6109
+ let pickupDate = '';
6110
+ let dropoffDate = '';
6111
+ let locationName = '';
6112
+ const dateMatch = url.match(/\/cars\/([^/]+)\/(\d{4}-\d{2}-\d{2})\/(\d{4}-\d{2}-\d{2})/);
6113
+ if (dateMatch) {
6114
+ locationName = decodeURIComponent(dateMatch[1]);
6115
+ pickupDate = dateMatch[2];
6116
+ dropoffDate = dateMatch[3];
6117
+ const pickup = new Date(pickupDate);
6118
+ const dropoff = new Date(dropoffDate);
6119
+ numDays = Math.max(1, Math.round((dropoff.getTime() - pickup.getTime()) / (1000 * 60 * 60 * 24)));
6120
+ }
6121
+ // Format date range for display (e.g. "Apr 1–3")
6122
+ function formatDateRange(from, to) {
6123
+ if (!from || !to)
6124
+ return '';
6125
+ const fromDate = new Date(from + 'T12:00:00');
6126
+ const toDate = new Date(to + 'T12:00:00');
6127
+ const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
6128
+ const fromMonth = months[fromDate.getUTCMonth()];
6129
+ const toMonth = months[toDate.getUTCMonth()];
6130
+ const fromDay = fromDate.getUTCDate();
6131
+ const toDay = toDate.getUTCDate();
6132
+ if (fromMonth === toMonth)
6133
+ return `${fromMonth} ${fromDay}–${toDay}`;
6134
+ return `${fromMonth} ${fromDay}–${toMonth} ${toDay}`;
6135
+ }
6136
+ // Process content: strip HTML if needed
6137
+ let text = _html;
6138
+ if (text.includes('<!DOCTYPE') || text.includes('<html')) {
6139
+ text = text
6140
+ .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
6141
+ .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
6142
+ .replace(/<[^>]+>/g, '\n')
6143
+ .replace(/&amp;/g, '&')
6144
+ .replace(/&lt;/g, '<')
6145
+ .replace(/&gt;/g, '>')
6146
+ .replace(/&#\d+;/g, '')
6147
+ .replace(/\n{2,}/g, '\n');
6148
+ }
6149
+ const lines = text.split('\n').map(l => l.trim()).filter(Boolean);
6150
+ const KNOWN_COMPANIES = ['Hertz', 'Budget', 'Avis', 'Enterprise', 'National', 'Alamo', 'Dollar', 'Thrifty', 'Sixt', 'Fox', 'Payless', 'Turn', 'EconomyBookings', 'Priceline', 'Expedia', 'Turo', 'KAYAK', 'Booking.com', 'DiscoverCars', 'RentalCars', 'Car Rental 8', 'Hotwire'];
6151
+ const listings = [];
6152
+ for (let i = 0; i < lines.length; i++) {
6153
+ const line = lines[i];
6154
+ // Detect: "or similar {Class}" — this signals a standard car rental listing
6155
+ // The car name is the line BEFORE "or similar"
6156
+ const orSimilarMatch = line.match(/^or similar\s+(.+)$/);
6157
+ if (orSimilarMatch) {
6158
+ const carClass = orSimilarMatch[1].trim();
6159
+ const carName = i > 0 ? lines[i - 1] : '';
6160
+ if (!carName || carName.length > 60)
6161
+ continue;
6162
+ // Look ahead for: pickup location, rating, company, price
6163
+ let location = '';
6164
+ let distanceFromCenter = '';
6165
+ let rating = null;
6166
+ let company = '';
6167
+ let totalPrice = 0;
6168
+ let cancellation = '';
6169
+ for (let j = i + 1; j < Math.min(i + 30, lines.length); j++) {
6170
+ const l = lines[j];
6171
+ // Pickup location
6172
+ if (!location && l.startsWith('Pick-up')) {
6173
+ const locMatch = l.match(/Pick-up (?:city|airport):\s*(.+)/);
6174
+ if (locMatch)
6175
+ location = locMatch[1].trim();
6176
+ continue;
6177
+ }
6178
+ // Distance from center
6179
+ if (!distanceFromCenter) {
6180
+ const distM = l.match(/^([\d.]+)\s+mi\s+from\s+city\s+center/);
6181
+ if (distM) {
6182
+ distanceFromCenter = `${distM[1]} mi from city center`;
6183
+ continue;
6184
+ }
6185
+ }
6186
+ // Rating (number like "9.2", "8.5", "7.2")
6187
+ if (rating === null) {
6188
+ const ratingM = l.match(/^(\d+\.\d+)$/);
6189
+ if (ratingM) {
6190
+ rating = parseFloat(ratingM[1]);
6191
+ continue;
6192
+ }
6193
+ }
6194
+ // Company from "X offer from {Company}" or "{Company}" line
6195
+ if (!company) {
6196
+ const offerMatch = l.match(/offer from (.+)$/);
6197
+ if (offerMatch) {
6198
+ company = offerMatch[1].trim();
6199
+ continue;
6200
+ }
6201
+ // Also detect company name standalone
6202
+ for (const c of KNOWN_COMPANIES) {
6203
+ if (l === c) {
6204
+ company = c;
6205
+ break;
6206
+ }
6207
+ }
6208
+ if (company)
6209
+ continue;
6210
+ }
6211
+ // Cancellation policy
6212
+ if (!cancellation && (l.includes('Free cancellation') || l.includes('No free cancellation'))) {
6213
+ cancellation = l;
6214
+ continue;
6215
+ }
6216
+ // Price — "$NNN" followed by "Total"
6217
+ const priceM = l.match(/^\$(\d[\d,]*)$/);
6218
+ if (priceM) {
6219
+ const nextLine = lines[j + 1] || '';
6220
+ if (nextLine === 'Total' || nextLine.includes('Total')) {
6221
+ totalPrice = parseInt(priceM[1].replace(',', ''));
6222
+ break;
6223
+ }
6224
+ }
6225
+ // Also catch price on same line
6226
+ const inlinePriceM = l.match(/\$(\d[\d,]*)\s*Total/);
6227
+ if (inlinePriceM) {
6228
+ totalPrice = parseInt(inlinePriceM[1].replace(',', ''));
6229
+ break;
6230
+ }
6231
+ // Stop if we hit another car listing marker
6232
+ if (l.match(/^or similar\s/) || l === 'Show more results')
6233
+ break;
6234
+ }
6235
+ if (carName && totalPrice > 0) {
6236
+ const normalizedClass = carClass.replace('Full size', 'Full-size');
6237
+ listings.push({
6238
+ name: carName,
6239
+ carClass: normalizedClass,
6240
+ totalPrice,
6241
+ perDayPrice: Math.round(totalPrice / numDays),
6242
+ company: company || 'Unknown',
6243
+ location: location || 'See booking',
6244
+ distanceFromCenter,
6245
+ rating,
6246
+ cancellation,
6247
+ isTuro: false,
6248
+ });
6249
+ }
6250
+ }
6251
+ }
6252
+ // Deduplicate: first prefer listings with real company info over "Unknown"
6253
+ // Key by name+price; keep the one with best data
6254
+ const byKey = new Map();
6255
+ for (const c of listings) {
6256
+ const key = `${c.name.toLowerCase()}-${c.totalPrice}`;
6257
+ const existing = byKey.get(key);
6258
+ if (!existing) {
6259
+ byKey.set(key, c);
6260
+ }
6261
+ else {
6262
+ // Prefer non-Unknown company, or same company with more info
6263
+ if (existing.company === 'Unknown' && c.company !== 'Unknown') {
6264
+ byKey.set(key, c);
6265
+ }
6266
+ }
6267
+ }
6268
+ const unique = Array.from(byKey.values());
6269
+ if (unique.length === 0)
6270
+ return null;
6271
+ // Filter out Unknown company entries if the total found from page suggests more results exist
6272
+ // Also filter them only if they have no location info (these are likely ad/promo extractions)
6273
+ const knownCompanyListings = unique.filter(c => c.company !== 'Unknown');
6274
+ const finalListings = knownCompanyListings.length > 0 ? knownCompanyListings : unique;
6275
+ // Sort by price
6276
+ finalListings.sort((a, b) => a.totalPrice - b.totalPrice);
6277
+ // Get total count from page if mentioned
6278
+ let totalFound = unique.length;
6279
+ for (const l of lines) {
6280
+ const m = l.match(/^(\d+)\s+results?$/);
6281
+ if (m) {
6282
+ totalFound = parseInt(m[1]);
6283
+ break;
6284
+ }
6285
+ const m2 = l.match(/(\d+)\s+cars?\s+found/);
6286
+ if (m2) {
6287
+ totalFound = parseInt(m2[1]);
6288
+ break;
6289
+ }
6290
+ }
6291
+ // Format location name nicely (e.g. "Punta-Gorda,FL-c34451" → "Punta Gorda, FL")
6292
+ function formatLocation(loc) {
6293
+ return loc
6294
+ .replace(/-c\d+$/, '') // remove trailing "-c12345"
6295
+ .replace(/-/g, ' ') // hyphens to spaces
6296
+ .replace(/,(\S)/g, ', $1'); // ensure space after comma
6297
+ }
6298
+ const dateRange = formatDateRange(pickupDate, dropoffDate);
6299
+ const displayLocation = formatLocation(locationName);
6300
+ const daysLabel = numDays === 1 ? '1 day' : `${numDays} days`;
6301
+ const md = [
6302
+ `# 🚗 Car Rentals — ${displayLocation} · ${dateRange} (${daysLabel})`,
6303
+ '',
6304
+ `*${totalFound} cars found · Source: [Kayak](${url})*`,
6305
+ `*Free cancellation available on most rentals*`,
6306
+ '',
6307
+ ];
6308
+ for (let idx = 0; idx < finalListings.length; idx++) {
6309
+ const c = finalListings[idx];
6310
+ md.push(`## ${idx + 1}. ${c.name} (${c.carClass}) — $${c.totalPrice} total · $${c.perDayPrice}/day`);
6311
+ if (c.distanceFromCenter) {
6312
+ md.push(`📍 ${c.distanceFromCenter}`);
6313
+ }
6314
+ else if (c.location && c.location !== 'See booking') {
6315
+ md.push(`📍 ${c.location}`);
6316
+ }
6317
+ const ratingStr = c.rating !== null ? ` · Rating: ${c.rating}` : '';
6318
+ md.push(`🏪 via ${c.company}${ratingStr}`);
6319
+ if (c.cancellation)
6320
+ md.push(`✅ ${c.cancellation}`);
6321
+ md.push(`🔍 [See price on Kayak](${url})`);
6322
+ md.push(`🛒 [Book on ${c.company}](${getCompanyUrl(c.company)})`);
6323
+ md.push('');
6324
+ }
6325
+ md.push('---');
6326
+ md.push(`📌 *Prices verified via [Kayak](${url}). Click "See price" to confirm current rate, then book with the rental company.*`);
6327
+ return {
6328
+ domain: 'kayak.com/cars',
6329
+ type: 'car-rental',
6330
+ structured: {
6331
+ cars: finalListings,
6332
+ location: displayLocation,
6333
+ pickupDate,
6334
+ dropoffDate,
6335
+ numDays,
6336
+ totalFound,
6337
+ source: 'Kayak',
6338
+ sourceUrl: url,
6339
+ },
6340
+ cleanContent: md.join('\n'),
6341
+ };
6342
+ }
@@ -426,6 +426,22 @@ export async function fetchContent(ctx) {
426
426
  // @ts-ignore — proprietary module, gitignored
427
427
  const { searchFallback } = await import('./search-fallback.js');
428
428
  const searchResult = await searchFallback(ctx.url);
429
+ // If DDG/primary returned very little, also try Bing for richer snippets
430
+ if (!searchResult.cachedContent || searchResult.cachedContent.length < 400) {
431
+ try {
432
+ const { simpleFetch } = await import('./http-fetch.js');
433
+ const bingUrl = `https://www.bing.com/search?q=${encodeURIComponent(ctx.url)}`;
434
+ const bingResult = await simpleFetch(bingUrl, ctx.userAgent, 8000);
435
+ if (bingResult.html && bingResult.html.length > 500) {
436
+ const snippetMatch = bingResult.html.match(/<p[^>]*class="[^"]*snippet[^"]*"[^>]*>(.*?)<\/p>/gi);
437
+ if (snippetMatch) {
438
+ const bingSnippet = snippetMatch.map(s => s.replace(/<[^>]+>/g, '')).join('\n');
439
+ searchResult.cachedContent = (searchResult.cachedContent || '') + '\n\n---\n*Additional context from Bing:*\n' + bingSnippet;
440
+ }
441
+ }
442
+ }
443
+ catch { /* Bing fallback is best-effort */ }
444
+ }
429
445
  if (searchResult.cachedContent && searchResult.cachedContent.length > 50) {
430
446
  ctx.timer.end('fetch');
431
447
  ctx.content = searchResult.cachedContent;
@@ -232,7 +232,7 @@ function prefetchDns(url) {
232
232
  }
233
233
  }
234
234
  async function fetchWithBrowserStrategy(url, options) {
235
- const { userAgent, waitMs, timeoutMs, screenshot, screenshotFullPage, headers, cookies, actions, keepPageOpen, effectiveStealth, signal, profileDir, headed, storageState, proxy, device, viewportWidth, viewportHeight, waitUntil, waitSelector, blockResources, } = options;
235
+ const { userAgent, waitMs, timeoutMs, screenshot, screenshotFullPage, headers, cookies, actions, keepPageOpen, effectiveStealth, signal, profileDir, headed, storageState, proxy, device, viewportWidth, viewportHeight, waitUntil, waitSelector, blockResources, isSPA, } = options;
236
236
  try {
237
237
  const result = await browserFetch(url, {
238
238
  userAgent,
@@ -256,6 +256,7 @@ async function fetchWithBrowserStrategy(url, options) {
256
256
  waitUntil,
257
257
  waitSelector,
258
258
  blockResources,
259
+ isSPA,
259
260
  });
260
261
  return {
261
262
  ...result,
@@ -500,6 +501,21 @@ export async function smartFetch(url, options = {}) {
500
501
  if (storageState) {
501
502
  effectiveForceBrowser = true;
502
503
  }
504
+ // Detect SPA for smarter DOM stability wait
505
+ const SPA_FETCH_DOMAINS = new Set([
506
+ 'www.google.com', 'flights.google.com', 'www.airbnb.com', 'www.booking.com',
507
+ 'www.expedia.com', 'www.kayak.com', 'www.skyscanner.com', 'www.tripadvisor.com',
508
+ 'www.indeed.com', 'www.glassdoor.com', 'www.zillow.com', 'app.webpeel.dev',
509
+ ]);
510
+ const SPA_FETCH_URL_PATTERNS = [
511
+ /google\.com\/travel/, /google\.com\/maps/, /google\.com\/shopping/,
512
+ ];
513
+ let isSPAUrl = false;
514
+ try {
515
+ const parsedHostname = new URL(url).hostname;
516
+ isSPAUrl = SPA_FETCH_DOMAINS.has(parsedHostname) || SPA_FETCH_URL_PATTERNS.some(p => p.test(url));
517
+ }
518
+ catch { /* invalid URL — ignore */ }
503
519
  const browserOptions = {
504
520
  userAgent,
505
521
  waitMs,
@@ -521,6 +537,7 @@ export async function smartFetch(url, options = {}) {
521
537
  waitUntil,
522
538
  waitSelector,
523
539
  blockResources,
540
+ isSPA: isSPAUrl,
524
541
  };
525
542
  /* ---- Strategy: simple fetch (with optional race) --------------------- */
526
543
  if (!shouldUseBrowser) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.21.78",
3
+ "version": "0.21.80",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",