webpeel 0.21.76 โ 0.21.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/domain-extractors.js +51 -31
- package/package.json +1 -1
|
@@ -5114,14 +5114,38 @@ async function carsComExtractor(html, url) {
|
|
|
5114
5114
|
const mileage = v.mileage ? `${Number(v.mileage).toLocaleString()} mi` : '';
|
|
5115
5115
|
const bodyStyle = v.bodyStyle || '';
|
|
5116
5116
|
const fuelType = v.fuelType || '';
|
|
5117
|
+
const sellerZip = v.seller?.zip || '';
|
|
5117
5118
|
if (title && title !== 'Used ') {
|
|
5118
|
-
listings.push({ title, price, mileage, bodyStyle, fuelType, url: cardLink });
|
|
5119
|
+
listings.push({ title, price, mileage, bodyStyle, fuelType, url: cardLink, sellerZip });
|
|
5119
5120
|
}
|
|
5120
5121
|
}
|
|
5121
5122
|
catch { /* skip malformed */ }
|
|
5122
5123
|
});
|
|
5123
5124
|
if (listings.length === 0)
|
|
5124
5125
|
return null; // Let pipeline handle it
|
|
5126
|
+
// Extract dealer names from page HTML (text_style:"small", font_color:"grey")
|
|
5127
|
+
const dealerPattern = /"text":"([^"]{3,50})","on_click_interactions":\[\],"text_style":"small","font_color":"grey/g;
|
|
5128
|
+
const dealerNames = [];
|
|
5129
|
+
let _dm;
|
|
5130
|
+
while ((_dm = dealerPattern.exec(html)) !== null) {
|
|
5131
|
+
const name = _dm[1];
|
|
5132
|
+
if (!name.match(/^\d|^Used|^New|mi\)|^Review|^\$/))
|
|
5133
|
+
dealerNames.push(name);
|
|
5134
|
+
}
|
|
5135
|
+
// Extract locations: "City, ST (X mi)" (e.g., "Ridgefield, NJ (8 mi)")
|
|
5136
|
+
const locPattern = /([A-Z][a-z]+(?:\s[A-Z][a-z]+)*,\s[A-Z]{2}\s\(\d+\s*mi\))/g;
|
|
5137
|
+
const locationList = [];
|
|
5138
|
+
let _lm;
|
|
5139
|
+
while ((_lm = locPattern.exec(html)) !== null) {
|
|
5140
|
+
locationList.push(_lm[1]);
|
|
5141
|
+
}
|
|
5142
|
+
// Match dealers and locations to listings (they appear in page order)
|
|
5143
|
+
for (let i = 0; i < listings.length; i++) {
|
|
5144
|
+
if (i < dealerNames.length)
|
|
5145
|
+
listings[i].dealer = dealerNames[i];
|
|
5146
|
+
if (i < locationList.length)
|
|
5147
|
+
listings[i].location = locationList[i];
|
|
5148
|
+
}
|
|
5125
5149
|
const priceRange = [minPrice && `$${minPrice}`, maxPrice && `$${maxPrice}`].filter(Boolean).join(' โ ');
|
|
5126
5150
|
const header = [
|
|
5127
5151
|
`# ๐ Cars.com โ ${keyword || 'Vehicle Search'}`,
|
|
@@ -5139,9 +5163,16 @@ async function carsComExtractor(html, url) {
|
|
|
5139
5163
|
l.price,
|
|
5140
5164
|
l.mileage,
|
|
5141
5165
|
l.bodyStyle,
|
|
5142
|
-
l.url && `[โ](https://www.cars.com${l.url})`,
|
|
5143
5166
|
].filter(Boolean);
|
|
5144
|
-
|
|
5167
|
+
const line = parts.join(' ยท ');
|
|
5168
|
+
const details = [];
|
|
5169
|
+
if (l.location)
|
|
5170
|
+
details.push(`๐ ${l.location}`);
|
|
5171
|
+
if (l.dealer)
|
|
5172
|
+
details.push(`๐ช ${l.dealer}`);
|
|
5173
|
+
if (l.url)
|
|
5174
|
+
details.push(`๐ [View listing](https://www.cars.com${l.url})`);
|
|
5175
|
+
return line + (details.length ? '\n ' + details.join(' ยท ') : '');
|
|
5145
5176
|
});
|
|
5146
5177
|
return {
|
|
5147
5178
|
domain: 'cars.com',
|
|
@@ -5988,36 +6019,24 @@ async function googleFlightsExtractor(_html, url) {
|
|
|
5988
6019
|
unique.sort((a, b) => a.price - b.price);
|
|
5989
6020
|
// Helper: get airline booking URL
|
|
5990
6021
|
function getAirlineBookingUrl(airline, from, to, dateStr) {
|
|
5991
|
-
// Parse date from "Sat, Apr 4" โ "2026-04-04" or "04/04/2026" etc.
|
|
5992
|
-
const months = { Jan: '01', Feb: '02', Mar: '03', Apr: '04', May: '05', Jun: '06', Jul: '07', Aug: '08', Sep: '09', Oct: '10', Nov: '11', Dec: '12' };
|
|
5993
|
-
let isoDate = '';
|
|
5994
|
-
let mmddDate = '';
|
|
5995
|
-
const dm = dateStr.match(/(\w{3})\s+(\d{1,2})/);
|
|
5996
|
-
if (dm) {
|
|
5997
|
-
const mon = months[dm[1]] || '01';
|
|
5998
|
-
const day = dm[2].padStart(2, '0');
|
|
5999
|
-
const year = new Date().getFullYear(); // current year
|
|
6000
|
-
isoDate = `${year}-${mon}-${day}`;
|
|
6001
|
-
mmddDate = `${mon}/${day}/${year}`;
|
|
6002
|
-
}
|
|
6003
6022
|
const fromUp = from.toUpperCase();
|
|
6004
6023
|
const toUp = to.toUpperCase();
|
|
6005
|
-
//
|
|
6024
|
+
// Airline homepages โ direct booking pages (deep links don't work without session/cookies)
|
|
6006
6025
|
const urlMap = {
|
|
6007
|
-
'United': `https://www.united.com
|
|
6008
|
-
'Delta': `https://www.delta.com
|
|
6009
|
-
'JetBlue': `https://www.jetblue.com
|
|
6010
|
-
'American': `https://www.aa.com
|
|
6011
|
-
'Spirit': `https://www.spirit.com
|
|
6012
|
-
'Frontier': `https://www.flyfrontier.com
|
|
6013
|
-
'Southwest': `https://www.southwest.com
|
|
6014
|
-
'Breeze': `https://www.flybreeze.com
|
|
6015
|
-
'Alaska': `https://www.alaskaair.com
|
|
6016
|
-
'Hawaiian': `https://www.hawaiianairlines.com
|
|
6017
|
-
'Sun Country': `https://www.suncountry.com
|
|
6018
|
-
'Avelo': `https://www.aveloair.com
|
|
6026
|
+
'United': `https://www.united.com`,
|
|
6027
|
+
'Delta': `https://www.delta.com`,
|
|
6028
|
+
'JetBlue': `https://www.jetblue.com`,
|
|
6029
|
+
'American': `https://www.aa.com`,
|
|
6030
|
+
'Spirit': `https://www.spirit.com`,
|
|
6031
|
+
'Frontier': `https://www.flyfrontier.com`,
|
|
6032
|
+
'Southwest': `https://www.southwest.com`,
|
|
6033
|
+
'Breeze': `https://www.flybreeze.com`,
|
|
6034
|
+
'Alaska': `https://www.alaskaair.com`,
|
|
6035
|
+
'Hawaiian': `https://www.hawaiianairlines.com`,
|
|
6036
|
+
'Sun Country': `https://www.suncountry.com`,
|
|
6037
|
+
'Avelo': `https://www.aveloair.com`,
|
|
6019
6038
|
};
|
|
6020
|
-
return urlMap[airline] || `https://www.google.com/travel/flights?q=${encodeURIComponent(`${airline} flights ${
|
|
6039
|
+
return urlMap[airline] || `https://www.google.com/travel/flights?q=${encodeURIComponent(`${airline} flights ${fromUp} to ${toUp} ${dateStr}`)}`;
|
|
6021
6040
|
}
|
|
6022
6041
|
// Parse route from URL
|
|
6023
6042
|
const u = new URL(url);
|
|
@@ -6037,11 +6056,12 @@ async function googleFlightsExtractor(_html, url) {
|
|
|
6037
6056
|
md.push(`๐ซ ${f.fromAirport} โ ${f.toAirport} ยท ${f.duration} ยท ${f.stops}`);
|
|
6038
6057
|
if (f.bags)
|
|
6039
6058
|
md.push(`๐งณ ${f.bags}`);
|
|
6040
|
-
md.push(
|
|
6059
|
+
md.push(`๐ [See price on Google Flights](${url})`);
|
|
6060
|
+
md.push(`๐ [Book on ${f.airline}](${bookingUrl})`);
|
|
6041
6061
|
md.push('');
|
|
6042
6062
|
}
|
|
6043
6063
|
md.push('---');
|
|
6044
|
-
md.push(`๐ *
|
|
6064
|
+
md.push(`๐ *All prices verified via [Google Flights](${url}). Click "See price" to confirm, then book directly with the airline.*`);
|
|
6045
6065
|
return {
|
|
6046
6066
|
domain: 'google.com/travel/flights',
|
|
6047
6067
|
type: 'flights',
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.21.
|
|
3
|
+
"version": "0.21.78",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "AGPL-3.0-only",
|