webpeel 0.21.76 โ†’ 0.21.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5114,14 +5114,38 @@ async function carsComExtractor(html, url) {
5114
5114
  const mileage = v.mileage ? `${Number(v.mileage).toLocaleString()} mi` : '';
5115
5115
  const bodyStyle = v.bodyStyle || '';
5116
5116
  const fuelType = v.fuelType || '';
5117
+ const sellerZip = v.seller?.zip || '';
5117
5118
  if (title && title !== 'Used ') {
5118
- listings.push({ title, price, mileage, bodyStyle, fuelType, url: cardLink });
5119
+ listings.push({ title, price, mileage, bodyStyle, fuelType, url: cardLink, sellerZip });
5119
5120
  }
5120
5121
  }
5121
5122
  catch { /* skip malformed */ }
5122
5123
  });
5123
5124
  if (listings.length === 0)
5124
5125
  return null; // Let pipeline handle it
5126
+ // Extract dealer names from page HTML (text_style:"small", font_color:"grey")
5127
+ const dealerPattern = /"text":"([^"]{3,50})","on_click_interactions":\[\],"text_style":"small","font_color":"grey/g;
5128
+ const dealerNames = [];
5129
+ let _dm;
5130
+ while ((_dm = dealerPattern.exec(html)) !== null) {
5131
+ const name = _dm[1];
5132
+ if (!name.match(/^\d|^Used|^New|mi\)|^Review|^\$/))
5133
+ dealerNames.push(name);
5134
+ }
5135
+ // Extract locations: "City, ST (X mi)" (e.g., "Ridgefield, NJ (8 mi)")
5136
+ const locPattern = /([A-Z][a-z]+(?:\s[A-Z][a-z]+)*,\s[A-Z]{2}\s\(\d+\s*mi\))/g;
5137
+ const locationList = [];
5138
+ let _lm;
5139
+ while ((_lm = locPattern.exec(html)) !== null) {
5140
+ locationList.push(_lm[1]);
5141
+ }
5142
+ // Match dealers and locations to listings (they appear in page order)
5143
+ for (let i = 0; i < listings.length; i++) {
5144
+ if (i < dealerNames.length)
5145
+ listings[i].dealer = dealerNames[i];
5146
+ if (i < locationList.length)
5147
+ listings[i].location = locationList[i];
5148
+ }
5125
5149
  const priceRange = [minPrice && `$${minPrice}`, maxPrice && `$${maxPrice}`].filter(Boolean).join(' โ€“ ');
5126
5150
  const header = [
5127
5151
  `# ๐Ÿš— Cars.com โ€” ${keyword || 'Vehicle Search'}`,
@@ -5139,9 +5163,16 @@ async function carsComExtractor(html, url) {
5139
5163
  l.price,
5140
5164
  l.mileage,
5141
5165
  l.bodyStyle,
5142
- l.url && `[โ†’](https://www.cars.com${l.url})`,
5143
5166
  ].filter(Boolean);
5144
- return parts.join(' ยท ');
5167
+ const line = parts.join(' ยท ');
5168
+ const details = [];
5169
+ if (l.location)
5170
+ details.push(`๐Ÿ“ ${l.location}`);
5171
+ if (l.dealer)
5172
+ details.push(`๐Ÿช ${l.dealer}`);
5173
+ if (l.url)
5174
+ details.push(`๐Ÿ”— [View listing](https://www.cars.com${l.url})`);
5175
+ return line + (details.length ? '\n ' + details.join(' ยท ') : '');
5145
5176
  });
5146
5177
  return {
5147
5178
  domain: 'cars.com',
@@ -5988,36 +6019,24 @@ async function googleFlightsExtractor(_html, url) {
5988
6019
  unique.sort((a, b) => a.price - b.price);
5989
6020
  // Helper: get airline booking URL
5990
6021
  function getAirlineBookingUrl(airline, from, to, dateStr) {
5991
- // Parse date from "Sat, Apr 4" โ†’ "2026-04-04" or "04/04/2026" etc.
5992
- const months = { Jan: '01', Feb: '02', Mar: '03', Apr: '04', May: '05', Jun: '06', Jul: '07', Aug: '08', Sep: '09', Oct: '10', Nov: '11', Dec: '12' };
5993
- let isoDate = '';
5994
- let mmddDate = '';
5995
- const dm = dateStr.match(/(\w{3})\s+(\d{1,2})/);
5996
- if (dm) {
5997
- const mon = months[dm[1]] || '01';
5998
- const day = dm[2].padStart(2, '0');
5999
- const year = new Date().getFullYear(); // current year
6000
- isoDate = `${year}-${mon}-${day}`;
6001
- mmddDate = `${mon}/${day}/${year}`;
6002
- }
6003
6022
  const fromUp = from.toUpperCase();
6004
6023
  const toUp = to.toUpperCase();
6005
- // Deep links with pre-filled origin, destination, date, 1 passenger
6024
+ // Airline homepages โ€” direct booking pages (deep links don't work without session/cookies)
6006
6025
  const urlMap = {
6007
- 'United': `https://www.united.com/ual/en/us/flight-search/book-a-flight/results/rev?f=${fromUp}&t=${toUp}&d=${isoDate}&tt=1&at=1&sc=7&px=1&taxng=1&newHP=True&clm=7&st=bestmatches&tqp=A`,
6008
- 'Delta': `https://www.delta.com/flight-search/book-a-flight?departure=${fromUp}&arrival=${toUp}&departureDate=${mmddDate}&paxCount=1&tripType=ONE_WAY`,
6009
- 'JetBlue': `https://www.jetblue.com/booking/flights?from=${fromUp}&to=${toUp}&depart=${isoDate}&is498=true&is498=${true}&pax=1`,
6010
- 'American': `https://www.aa.com/booking/find-flights?origin=${fromUp}&destination=${toUp}&departureDate=${isoDate}&tripType=OneWay&pax=1`,
6011
- 'Spirit': `https://www.spirit.com/book/flights?origin=${fromUp}&destination=${toUp}&departureDate=${isoDate}&adults=1&tripType=oneWay`,
6012
- 'Frontier': `https://www.flyfrontier.com/booking/flights?origin=${fromUp}&destination=${toUp}&date=${isoDate}&adults=1`,
6013
- 'Southwest': `https://www.southwest.com/air/booking/select.html?originationAirportCode=${fromUp}&destinationAirportCode=${toUp}&departureDate=${isoDate}&adultPassengersCount=1&tripType=oneway`,
6014
- 'Breeze': `https://www.flybreeze.com/home?from=${fromUp}&to=${toUp}&depart=${isoDate}&pax=1`,
6015
- 'Alaska': `https://www.alaskaair.com/shopping/flights?A=${fromUp}&B=${toUp}&DT=${isoDate}&FT=ow&C=1`,
6016
- 'Hawaiian': `https://www.hawaiianairlines.com/book/results?origin=${fromUp}&destination=${toUp}&departDate=${isoDate}&adults=1&tripType=ow`,
6017
- 'Sun Country': `https://www.suncountry.com/booking/search?from=${fromUp}&to=${toUp}&depart=${isoDate}&passengers=1`,
6018
- 'Avelo': `https://www.aveloair.com/book?from=${fromUp}&to=${toUp}&depart=${isoDate}`,
6026
+ 'United': `https://www.united.com`,
6027
+ 'Delta': `https://www.delta.com`,
6028
+ 'JetBlue': `https://www.jetblue.com`,
6029
+ 'American': `https://www.aa.com`,
6030
+ 'Spirit': `https://www.spirit.com`,
6031
+ 'Frontier': `https://www.flyfrontier.com`,
6032
+ 'Southwest': `https://www.southwest.com`,
6033
+ 'Breeze': `https://www.flybreeze.com`,
6034
+ 'Alaska': `https://www.alaskaair.com`,
6035
+ 'Hawaiian': `https://www.hawaiianairlines.com`,
6036
+ 'Sun Country': `https://www.suncountry.com`,
6037
+ 'Avelo': `https://www.aveloair.com`,
6019
6038
  };
6020
- return urlMap[airline] || `https://www.google.com/travel/flights?q=${encodeURIComponent(`${airline} flights ${from} to ${to} ${dateStr}`)}`;
6039
+ return urlMap[airline] || `https://www.google.com/travel/flights?q=${encodeURIComponent(`${airline} flights ${fromUp} to ${toUp} ${dateStr}`)}`;
6021
6040
  }
6022
6041
  // Parse route from URL
6023
6042
  const u = new URL(url);
@@ -6037,11 +6056,12 @@ async function googleFlightsExtractor(_html, url) {
6037
6056
  md.push(`๐Ÿ›ซ ${f.fromAirport} โ†’ ${f.toAirport} ยท ${f.duration} ยท ${f.stops}`);
6038
6057
  if (f.bags)
6039
6058
  md.push(`๐Ÿงณ ${f.bags}`);
6040
- md.push(`๐Ÿ”— [Book on ${f.airline}](${bookingUrl})`);
6059
+ md.push(`๐Ÿ” [See price on Google Flights](${url})`);
6060
+ md.push(`๐Ÿ›’ [Book on ${f.airline}](${bookingUrl})`);
6041
6061
  md.push('');
6042
6062
  }
6043
6063
  md.push('---');
6044
- md.push(`๐Ÿ“Œ *Prices change frequently. [View live prices on Google Flights](${url})*`);
6064
+ md.push(`๐Ÿ“Œ *All prices verified via [Google Flights](${url}). Click "See price" to confirm, then book directly with the airline.*`);
6045
6065
  return {
6046
6066
  domain: 'google.com/travel/flights',
6047
6067
  type: 'flights',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.21.76",
3
+ "version": "0.21.78",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",