webpeel 0.21.74 → 0.21.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -588,6 +588,49 @@ export function registerSearchCommands(program) {
588
588
  process.exit(1);
589
589
  }
590
590
  });
591
+ // ── flights command ───────────────────────────────────────────────────────
592
+ program
593
+ .command('flights <query>')
594
+ .description('Search for flights (via Google Flights) — e.g. "NYC to Fort Myers Apr 4"')
595
+ .option('--one-way', 'One-way flight (default)')
596
+ .option('--round-trip', 'Round-trip flight')
597
+ .option('-n, --count <n>', 'Max flights to show', '10')
598
+ .option('--json', 'Output as JSON')
599
+ .option('-s, --silent', 'Silent mode')
600
+ .action(async (query, options) => {
601
+ const tripType = options.roundTrip ? '' : ' one way';
602
+ const encoded = encodeURIComponent(`Flights from ${query}${tripType}`);
603
+ const url = `https://www.google.com/travel/flights?q=${encoded}`;
604
+ const spinner = options.silent ? null : ora(`Searching flights: ${query}...`).start();
605
+ try {
606
+ // render is forced automatically by SPA auto-detect, but be explicit here
607
+ const result = await peel(url, { render: true, timeout: 30000 });
608
+ if (spinner)
609
+ spinner.succeed('Flights loaded');
610
+ if (options.json) {
611
+ console.log(JSON.stringify({
612
+ query,
613
+ url,
614
+ flights: result.domainData?.structured?.flights || [],
615
+ source: 'Google Flights',
616
+ content: result.content,
617
+ tokens: result.tokens,
618
+ }, null, 2));
619
+ }
620
+ else {
621
+ console.log(result.content);
622
+ }
623
+ await cleanup();
624
+ process.exit(0);
625
+ }
626
+ catch (error) {
627
+ if (spinner)
628
+ spinner.fail('Flight search failed');
629
+ console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
630
+ await cleanup();
631
+ process.exit(1);
632
+ }
633
+ });
591
634
  // ── extractors command ────────────────────────────────────────────────────
592
635
  program
593
636
  .command('extractors')
@@ -5986,6 +5986,39 @@ async function googleFlightsExtractor(_html, url) {
5986
5986
  if (unique.length === 0)
5987
5987
  return null;
5988
5988
  unique.sort((a, b) => a.price - b.price);
5989
+ // Helper: get airline booking URL
5990
+ function getAirlineBookingUrl(airline, from, to, dateStr) {
5991
+ // Parse date from "Sat, Apr 4" → "2026-04-04" or "04/04/2026" etc.
5992
+ const months = { Jan: '01', Feb: '02', Mar: '03', Apr: '04', May: '05', Jun: '06', Jul: '07', Aug: '08', Sep: '09', Oct: '10', Nov: '11', Dec: '12' };
5993
+ let isoDate = '';
5994
+ let mmddDate = '';
5995
+ const dm = dateStr.match(/(\w{3})\s+(\d{1,2})/);
5996
+ if (dm) {
5997
+ const mon = months[dm[1]] || '01';
5998
+ const day = dm[2].padStart(2, '0');
5999
+ const year = new Date().getFullYear(); // current year
6000
+ isoDate = `${year}-${mon}-${day}`;
6001
+ mmddDate = `${mon}/${day}/${year}`;
6002
+ }
6003
+ const fromUp = from.toUpperCase();
6004
+ const toUp = to.toUpperCase();
6005
+ // Deep links with pre-filled origin, destination, date, 1 passenger
6006
+ const urlMap = {
6007
+ 'United': `https://www.united.com/ual/en/us/flight-search/book-a-flight/results/rev?f=${fromUp}&t=${toUp}&d=${isoDate}&tt=1&at=1&sc=7&px=1&taxng=1&newHP=True&clm=7&st=bestmatches&tqp=A`,
6008
+ 'Delta': `https://www.delta.com/flight-search/book-a-flight?departure=${fromUp}&arrival=${toUp}&departureDate=${mmddDate}&paxCount=1&tripType=ONE_WAY`,
6009
+ 'JetBlue': `https://www.jetblue.com/booking/flights?from=${fromUp}&to=${toUp}&depart=${isoDate}&is498=true&is498=${true}&pax=1`,
6010
+ 'American': `https://www.aa.com/booking/find-flights?origin=${fromUp}&destination=${toUp}&departureDate=${isoDate}&tripType=OneWay&pax=1`,
6011
+ 'Spirit': `https://www.spirit.com/book/flights?origin=${fromUp}&destination=${toUp}&departureDate=${isoDate}&adults=1&tripType=oneWay`,
6012
+ 'Frontier': `https://www.flyfrontier.com/booking/flights?origin=${fromUp}&destination=${toUp}&date=${isoDate}&adults=1`,
6013
+ 'Southwest': `https://www.southwest.com/air/booking/select.html?originationAirportCode=${fromUp}&destinationAirportCode=${toUp}&departureDate=${isoDate}&adultPassengersCount=1&tripType=oneway`,
6014
+ 'Breeze': `https://www.flybreeze.com/home?from=${fromUp}&to=${toUp}&depart=${isoDate}&pax=1`,
6015
+ 'Alaska': `https://www.alaskaair.com/shopping/flights?A=${fromUp}&B=${toUp}&DT=${isoDate}&FT=ow&C=1`,
6016
+ 'Hawaiian': `https://www.hawaiianairlines.com/book/results?origin=${fromUp}&destination=${toUp}&departDate=${isoDate}&adults=1&tripType=ow`,
6017
+ 'Sun Country': `https://www.suncountry.com/booking/search?from=${fromUp}&to=${toUp}&depart=${isoDate}&passengers=1`,
6018
+ 'Avelo': `https://www.aveloair.com/book?from=${fromUp}&to=${toUp}&depart=${isoDate}`,
6019
+ };
6020
+ return urlMap[airline] || `https://www.google.com/travel/flights?q=${encodeURIComponent(`${airline} flights ${from} to ${to} ${dateStr}`)}`;
6021
+ }
5989
6022
  // Parse route from URL
5990
6023
  const u = new URL(url);
5991
6024
  const query = (u.searchParams.get('q') || '').replace(/Flights?\s+(from\s+)?/i, '').replace(/\s+one\s+way/i, '').trim();
@@ -5998,11 +6031,13 @@ async function googleFlightsExtractor(_html, url) {
5998
6031
  ];
5999
6032
  for (let idx = 0; idx < unique.length; idx++) {
6000
6033
  const f = unique[idx];
6034
+ const bookingUrl = getAirlineBookingUrl(f.airline, f.fromAirport, f.toAirport, f.departDate);
6001
6035
  md.push(`## ${idx + 1}. ${f.airline} — ${f.priceStr}`);
6002
6036
  md.push(`🕐 Depart **${f.departTime}** → Arrive **${f.arriveTime}**${f.departDate ? ` · ${f.departDate}` : ''}`);
6003
6037
  md.push(`🛫 ${f.fromAirport} → ${f.toAirport} · ${f.duration} · ${f.stops}`);
6004
6038
  if (f.bags)
6005
6039
  md.push(`🧳 ${f.bags}`);
6040
+ md.push(`🔗 [Book on ${f.airline}](${bookingUrl})`);
6006
6041
  md.push('');
6007
6042
  }
6008
6043
  md.push('---');
@@ -146,6 +146,43 @@ export function normalizeOptions(ctx) {
146
146
  if (autoScrollOpts) {
147
147
  ctx.render = true;
148
148
  }
149
+ // Auto-detect SPAs that require browser rendering (no --render flag needed)
150
+ if (!ctx.render) {
151
+ const SPA_DOMAINS = new Set([
152
+ 'www.google.com', // Google Flights, Maps, Shopping etc.
153
+ 'flights.google.com',
154
+ 'www.airbnb.com',
155
+ 'www.booking.com',
156
+ 'www.expedia.com',
157
+ 'www.kayak.com',
158
+ 'www.skyscanner.com',
159
+ 'www.tripadvisor.com',
160
+ 'www.indeed.com',
161
+ 'www.glassdoor.com',
162
+ 'www.zillow.com', // already handled but backup
163
+ 'app.webpeel.dev', // our own dashboard is a SPA
164
+ ]);
165
+ // More specific: some google.com paths need render, not all
166
+ const SPA_URL_PATTERNS = [
167
+ /google\.com\/travel/,
168
+ /google\.com\/maps/,
169
+ /google\.com\/shopping/,
170
+ ];
171
+ try {
172
+ const hostname = new URL(ctx.url).hostname;
173
+ if (SPA_DOMAINS.has(hostname)) {
174
+ ctx.render = true;
175
+ log.debug(`Auto-enabling render: SPA domain detected (${hostname})`);
176
+ }
177
+ else if (SPA_URL_PATTERNS.some(p => p.test(ctx.url))) {
178
+ ctx.render = true;
179
+ log.debug(`Auto-enabling render: SPA URL pattern matched`);
180
+ }
181
+ }
182
+ catch {
183
+ // Invalid URL — skip SPA detection
184
+ }
185
+ }
149
186
  }
150
187
  // ---------------------------------------------------------------------------
151
188
  // Stage 2: handleYouTube
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.21.74",
3
+ "version": "0.21.76",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",