webpeel 0.21.74 → 0.21.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -588,6 +588,49 @@ export function registerSearchCommands(program) {
|
|
|
588
588
|
process.exit(1);
|
|
589
589
|
}
|
|
590
590
|
});
|
|
591
|
+
// ── flights command ───────────────────────────────────────────────────────
|
|
592
|
+
program
|
|
593
|
+
.command('flights <query>')
|
|
594
|
+
.description('Search for flights (via Google Flights) — e.g. "NYC to Fort Myers Apr 4"')
|
|
595
|
+
.option('--one-way', 'One-way flight (default)')
|
|
596
|
+
.option('--round-trip', 'Round-trip flight')
|
|
597
|
+
.option('-n, --count <n>', 'Max flights to show', '10')
|
|
598
|
+
.option('--json', 'Output as JSON')
|
|
599
|
+
.option('-s, --silent', 'Silent mode')
|
|
600
|
+
.action(async (query, options) => {
|
|
601
|
+
const tripType = options.roundTrip ? '' : ' one way';
|
|
602
|
+
const encoded = encodeURIComponent(`Flights from ${query}${tripType}`);
|
|
603
|
+
const url = `https://www.google.com/travel/flights?q=${encoded}`;
|
|
604
|
+
const spinner = options.silent ? null : ora(`Searching flights: ${query}...`).start();
|
|
605
|
+
try {
|
|
606
|
+
// render is forced automatically by SPA auto-detect, but be explicit here
|
|
607
|
+
const result = await peel(url, { render: true, timeout: 30000 });
|
|
608
|
+
if (spinner)
|
|
609
|
+
spinner.succeed('Flights loaded');
|
|
610
|
+
if (options.json) {
|
|
611
|
+
console.log(JSON.stringify({
|
|
612
|
+
query,
|
|
613
|
+
url,
|
|
614
|
+
flights: result.domainData?.structured?.flights || [],
|
|
615
|
+
source: 'Google Flights',
|
|
616
|
+
content: result.content,
|
|
617
|
+
tokens: result.tokens,
|
|
618
|
+
}, null, 2));
|
|
619
|
+
}
|
|
620
|
+
else {
|
|
621
|
+
console.log(result.content);
|
|
622
|
+
}
|
|
623
|
+
await cleanup();
|
|
624
|
+
process.exit(0);
|
|
625
|
+
}
|
|
626
|
+
catch (error) {
|
|
627
|
+
if (spinner)
|
|
628
|
+
spinner.fail('Flight search failed');
|
|
629
|
+
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
630
|
+
await cleanup();
|
|
631
|
+
process.exit(1);
|
|
632
|
+
}
|
|
633
|
+
});
|
|
591
634
|
// ── extractors command ────────────────────────────────────────────────────
|
|
592
635
|
program
|
|
593
636
|
.command('extractors')
|
|
@@ -5986,6 +5986,39 @@ async function googleFlightsExtractor(_html, url) {
|
|
|
5986
5986
|
if (unique.length === 0)
|
|
5987
5987
|
return null;
|
|
5988
5988
|
unique.sort((a, b) => a.price - b.price);
|
|
5989
|
+
// Helper: get airline booking URL
|
|
5990
|
+
function getAirlineBookingUrl(airline, from, to, dateStr) {
|
|
5991
|
+
// Parse date from "Sat, Apr 4" → "2026-04-04" or "04/04/2026" etc.
|
|
5992
|
+
const months = { Jan: '01', Feb: '02', Mar: '03', Apr: '04', May: '05', Jun: '06', Jul: '07', Aug: '08', Sep: '09', Oct: '10', Nov: '11', Dec: '12' };
|
|
5993
|
+
let isoDate = '';
|
|
5994
|
+
let mmddDate = '';
|
|
5995
|
+
const dm = dateStr.match(/(\w{3})\s+(\d{1,2})/);
|
|
5996
|
+
if (dm) {
|
|
5997
|
+
const mon = months[dm[1]] || '01';
|
|
5998
|
+
const day = dm[2].padStart(2, '0');
|
|
5999
|
+
const year = new Date().getFullYear(); // current year
|
|
6000
|
+
isoDate = `${year}-${mon}-${day}`;
|
|
6001
|
+
mmddDate = `${mon}/${day}/${year}`;
|
|
6002
|
+
}
|
|
6003
|
+
const fromUp = from.toUpperCase();
|
|
6004
|
+
const toUp = to.toUpperCase();
|
|
6005
|
+
// Deep links with pre-filled origin, destination, date, 1 passenger
|
|
6006
|
+
const urlMap = {
|
|
6007
|
+
'United': `https://www.united.com/ual/en/us/flight-search/book-a-flight/results/rev?f=${fromUp}&t=${toUp}&d=${isoDate}&tt=1&at=1&sc=7&px=1&taxng=1&newHP=True&clm=7&st=bestmatches&tqp=A`,
|
|
6008
|
+
'Delta': `https://www.delta.com/flight-search/book-a-flight?departure=${fromUp}&arrival=${toUp}&departureDate=${mmddDate}&paxCount=1&tripType=ONE_WAY`,
|
|
6009
|
+
'JetBlue': `https://www.jetblue.com/booking/flights?from=${fromUp}&to=${toUp}&depart=${isoDate}&is498=true&is498=${true}&pax=1`,
|
|
6010
|
+
'American': `https://www.aa.com/booking/find-flights?origin=${fromUp}&destination=${toUp}&departureDate=${isoDate}&tripType=OneWay&pax=1`,
|
|
6011
|
+
'Spirit': `https://www.spirit.com/book/flights?origin=${fromUp}&destination=${toUp}&departureDate=${isoDate}&adults=1&tripType=oneWay`,
|
|
6012
|
+
'Frontier': `https://www.flyfrontier.com/booking/flights?origin=${fromUp}&destination=${toUp}&date=${isoDate}&adults=1`,
|
|
6013
|
+
'Southwest': `https://www.southwest.com/air/booking/select.html?originationAirportCode=${fromUp}&destinationAirportCode=${toUp}&departureDate=${isoDate}&adultPassengersCount=1&tripType=oneway`,
|
|
6014
|
+
'Breeze': `https://www.flybreeze.com/home?from=${fromUp}&to=${toUp}&depart=${isoDate}&pax=1`,
|
|
6015
|
+
'Alaska': `https://www.alaskaair.com/shopping/flights?A=${fromUp}&B=${toUp}&DT=${isoDate}&FT=ow&C=1`,
|
|
6016
|
+
'Hawaiian': `https://www.hawaiianairlines.com/book/results?origin=${fromUp}&destination=${toUp}&departDate=${isoDate}&adults=1&tripType=ow`,
|
|
6017
|
+
'Sun Country': `https://www.suncountry.com/booking/search?from=${fromUp}&to=${toUp}&depart=${isoDate}&passengers=1`,
|
|
6018
|
+
'Avelo': `https://www.aveloair.com/book?from=${fromUp}&to=${toUp}&depart=${isoDate}`,
|
|
6019
|
+
};
|
|
6020
|
+
return urlMap[airline] || `https://www.google.com/travel/flights?q=${encodeURIComponent(`${airline} flights ${from} to ${to} ${dateStr}`)}`;
|
|
6021
|
+
}
|
|
5989
6022
|
// Parse route from URL
|
|
5990
6023
|
const u = new URL(url);
|
|
5991
6024
|
const query = (u.searchParams.get('q') || '').replace(/Flights?\s+(from\s+)?/i, '').replace(/\s+one\s+way/i, '').trim();
|
|
@@ -5998,11 +6031,13 @@ async function googleFlightsExtractor(_html, url) {
|
|
|
5998
6031
|
];
|
|
5999
6032
|
for (let idx = 0; idx < unique.length; idx++) {
|
|
6000
6033
|
const f = unique[idx];
|
|
6034
|
+
const bookingUrl = getAirlineBookingUrl(f.airline, f.fromAirport, f.toAirport, f.departDate);
|
|
6001
6035
|
md.push(`## ${idx + 1}. ${f.airline} — ${f.priceStr}`);
|
|
6002
6036
|
md.push(`🕐 Depart **${f.departTime}** → Arrive **${f.arriveTime}**${f.departDate ? ` · ${f.departDate}` : ''}`);
|
|
6003
6037
|
md.push(`🛫 ${f.fromAirport} → ${f.toAirport} · ${f.duration} · ${f.stops}`);
|
|
6004
6038
|
if (f.bags)
|
|
6005
6039
|
md.push(`🧳 ${f.bags}`);
|
|
6040
|
+
md.push(`🔗 [Book on ${f.airline}](${bookingUrl})`);
|
|
6006
6041
|
md.push('');
|
|
6007
6042
|
}
|
|
6008
6043
|
md.push('---');
|
package/dist/core/pipeline.js
CHANGED
|
@@ -146,6 +146,43 @@ export function normalizeOptions(ctx) {
|
|
|
146
146
|
if (autoScrollOpts) {
|
|
147
147
|
ctx.render = true;
|
|
148
148
|
}
|
|
149
|
+
// Auto-detect SPAs that require browser rendering (no --render flag needed)
|
|
150
|
+
if (!ctx.render) {
|
|
151
|
+
const SPA_DOMAINS = new Set([
|
|
152
|
+
'www.google.com', // Google Flights, Maps, Shopping etc.
|
|
153
|
+
'flights.google.com',
|
|
154
|
+
'www.airbnb.com',
|
|
155
|
+
'www.booking.com',
|
|
156
|
+
'www.expedia.com',
|
|
157
|
+
'www.kayak.com',
|
|
158
|
+
'www.skyscanner.com',
|
|
159
|
+
'www.tripadvisor.com',
|
|
160
|
+
'www.indeed.com',
|
|
161
|
+
'www.glassdoor.com',
|
|
162
|
+
'www.zillow.com', // already handled but backup
|
|
163
|
+
'app.webpeel.dev', // our own dashboard is a SPA
|
|
164
|
+
]);
|
|
165
|
+
// More specific: some google.com paths need render, not all
|
|
166
|
+
const SPA_URL_PATTERNS = [
|
|
167
|
+
/google\.com\/travel/,
|
|
168
|
+
/google\.com\/maps/,
|
|
169
|
+
/google\.com\/shopping/,
|
|
170
|
+
];
|
|
171
|
+
try {
|
|
172
|
+
const hostname = new URL(ctx.url).hostname;
|
|
173
|
+
if (SPA_DOMAINS.has(hostname)) {
|
|
174
|
+
ctx.render = true;
|
|
175
|
+
log.debug(`Auto-enabling render: SPA domain detected (${hostname})`);
|
|
176
|
+
}
|
|
177
|
+
else if (SPA_URL_PATTERNS.some(p => p.test(ctx.url))) {
|
|
178
|
+
ctx.render = true;
|
|
179
|
+
log.debug(`Auto-enabling render: SPA URL pattern matched`);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
catch {
|
|
183
|
+
// Invalid URL — skip SPA detection
|
|
184
|
+
}
|
|
185
|
+
}
|
|
149
186
|
}
|
|
150
187
|
// ---------------------------------------------------------------------------
|
|
151
188
|
// Stage 2: handleYouTube
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.21.
|
|
3
|
+
"version": "0.21.76",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "AGPL-3.0-only",
|