webpeel 0.21.74 → 0.21.75
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -588,6 +588,49 @@ export function registerSearchCommands(program) {
|
|
|
588
588
|
process.exit(1);
|
|
589
589
|
}
|
|
590
590
|
});
|
|
591
|
+
// ── flights command ───────────────────────────────────────────────────────
|
|
592
|
+
program
|
|
593
|
+
.command('flights <query>')
|
|
594
|
+
.description('Search for flights (via Google Flights) — e.g. "NYC to Fort Myers Apr 4"')
|
|
595
|
+
.option('--one-way', 'One-way flight (default)')
|
|
596
|
+
.option('--round-trip', 'Round-trip flight')
|
|
597
|
+
.option('-n, --count <n>', 'Max flights to show', '10')
|
|
598
|
+
.option('--json', 'Output as JSON')
|
|
599
|
+
.option('-s, --silent', 'Silent mode')
|
|
600
|
+
.action(async (query, options) => {
|
|
601
|
+
const tripType = options.roundTrip ? '' : ' one way';
|
|
602
|
+
const encoded = encodeURIComponent(`Flights from ${query}${tripType}`);
|
|
603
|
+
const url = `https://www.google.com/travel/flights?q=${encoded}`;
|
|
604
|
+
const spinner = options.silent ? null : ora(`Searching flights: ${query}...`).start();
|
|
605
|
+
try {
|
|
606
|
+
// render is forced automatically by SPA auto-detect, but be explicit here
|
|
607
|
+
const result = await peel(url, { render: true, timeout: 30000 });
|
|
608
|
+
if (spinner)
|
|
609
|
+
spinner.succeed('Flights loaded');
|
|
610
|
+
if (options.json) {
|
|
611
|
+
console.log(JSON.stringify({
|
|
612
|
+
query,
|
|
613
|
+
url,
|
|
614
|
+
flights: result.domainData?.structured?.flights || [],
|
|
615
|
+
source: 'Google Flights',
|
|
616
|
+
content: result.content,
|
|
617
|
+
tokens: result.tokens,
|
|
618
|
+
}, null, 2));
|
|
619
|
+
}
|
|
620
|
+
else {
|
|
621
|
+
console.log(result.content);
|
|
622
|
+
}
|
|
623
|
+
await cleanup();
|
|
624
|
+
process.exit(0);
|
|
625
|
+
}
|
|
626
|
+
catch (error) {
|
|
627
|
+
if (spinner)
|
|
628
|
+
spinner.fail('Flight search failed');
|
|
629
|
+
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
630
|
+
await cleanup();
|
|
631
|
+
process.exit(1);
|
|
632
|
+
}
|
|
633
|
+
});
|
|
591
634
|
// ── extractors command ────────────────────────────────────────────────────
|
|
592
635
|
program
|
|
593
636
|
.command('extractors')
|
|
@@ -5986,6 +5986,26 @@ async function googleFlightsExtractor(_html, url) {
|
|
|
5986
5986
|
if (unique.length === 0)
|
|
5987
5987
|
return null;
|
|
5988
5988
|
unique.sort((a, b) => a.price - b.price);
|
|
5989
|
+
// Helper: get airline booking URL
|
|
5990
|
+
function getAirlineBookingUrl(airline, from, to) {
|
|
5991
|
+
const fromLower = from.toLowerCase();
|
|
5992
|
+
const toLower = to.toLowerCase();
|
|
5993
|
+
const urlMap = {
|
|
5994
|
+
'United': `https://www.united.com/en-us/flights-from-${fromLower}-to-${toLower}`,
|
|
5995
|
+
'Delta': `https://www.delta.com/flight-search/search`,
|
|
5996
|
+
'JetBlue': `https://www.jetblue.com/booking/flights`,
|
|
5997
|
+
'American': `https://www.aa.com/booking/find-flights`,
|
|
5998
|
+
'Spirit': `https://www.spirit.com/book/flights`,
|
|
5999
|
+
'Frontier': `https://www.flyfrontier.com/booking/`,
|
|
6000
|
+
'Southwest': `https://www.southwest.com/air/booking/`,
|
|
6001
|
+
'Breeze': `https://www.flybreeze.com/home`,
|
|
6002
|
+
'Alaska': `https://www.alaskaair.com/booking/flights`,
|
|
6003
|
+
'Hawaiian': `https://www.hawaiianairlines.com/book-a-trip`,
|
|
6004
|
+
'Sun Country': `https://www.suncountry.com/booking/search`,
|
|
6005
|
+
'Avelo': `https://www.aveloair.com/book`,
|
|
6006
|
+
};
|
|
6007
|
+
return urlMap[airline] || `https://www.google.com/search?q=${encodeURIComponent(`${airline} flights ${from} to ${to}`)}`;
|
|
6008
|
+
}
|
|
5989
6009
|
// Parse route from URL
|
|
5990
6010
|
const u = new URL(url);
|
|
5991
6011
|
const query = (u.searchParams.get('q') || '').replace(/Flights?\s+(from\s+)?/i, '').replace(/\s+one\s+way/i, '').trim();
|
|
@@ -5998,11 +6018,13 @@ async function googleFlightsExtractor(_html, url) {
|
|
|
5998
6018
|
];
|
|
5999
6019
|
for (let idx = 0; idx < unique.length; idx++) {
|
|
6000
6020
|
const f = unique[idx];
|
|
6021
|
+
const bookingUrl = getAirlineBookingUrl(f.airline, f.fromAirport, f.toAirport);
|
|
6001
6022
|
md.push(`## ${idx + 1}. ${f.airline} — ${f.priceStr}`);
|
|
6002
6023
|
md.push(`🕐 Depart **${f.departTime}** → Arrive **${f.arriveTime}**${f.departDate ? ` · ${f.departDate}` : ''}`);
|
|
6003
6024
|
md.push(`🛫 ${f.fromAirport} → ${f.toAirport} · ${f.duration} · ${f.stops}`);
|
|
6004
6025
|
if (f.bags)
|
|
6005
6026
|
md.push(`🧳 ${f.bags}`);
|
|
6027
|
+
md.push(`🔗 [Book on ${f.airline}](${bookingUrl})`);
|
|
6006
6028
|
md.push('');
|
|
6007
6029
|
}
|
|
6008
6030
|
md.push('---');
|
package/dist/core/pipeline.js
CHANGED
|
@@ -146,6 +146,43 @@ export function normalizeOptions(ctx) {
|
|
|
146
146
|
if (autoScrollOpts) {
|
|
147
147
|
ctx.render = true;
|
|
148
148
|
}
|
|
149
|
+
// Auto-detect SPAs that require browser rendering (no --render flag needed)
|
|
150
|
+
if (!ctx.render) {
|
|
151
|
+
const SPA_DOMAINS = new Set([
|
|
152
|
+
'www.google.com', // Google Flights, Maps, Shopping etc.
|
|
153
|
+
'flights.google.com',
|
|
154
|
+
'www.airbnb.com',
|
|
155
|
+
'www.booking.com',
|
|
156
|
+
'www.expedia.com',
|
|
157
|
+
'www.kayak.com',
|
|
158
|
+
'www.skyscanner.com',
|
|
159
|
+
'www.tripadvisor.com',
|
|
160
|
+
'www.indeed.com',
|
|
161
|
+
'www.glassdoor.com',
|
|
162
|
+
'www.zillow.com', // already handled but backup
|
|
163
|
+
'app.webpeel.dev', // our own dashboard is a SPA
|
|
164
|
+
]);
|
|
165
|
+
// More specific: some google.com paths need render, not all
|
|
166
|
+
const SPA_URL_PATTERNS = [
|
|
167
|
+
/google\.com\/travel/,
|
|
168
|
+
/google\.com\/maps/,
|
|
169
|
+
/google\.com\/shopping/,
|
|
170
|
+
];
|
|
171
|
+
try {
|
|
172
|
+
const hostname = new URL(ctx.url).hostname;
|
|
173
|
+
if (SPA_DOMAINS.has(hostname)) {
|
|
174
|
+
ctx.render = true;
|
|
175
|
+
log.debug(`Auto-enabling render: SPA domain detected (${hostname})`);
|
|
176
|
+
}
|
|
177
|
+
else if (SPA_URL_PATTERNS.some(p => p.test(ctx.url))) {
|
|
178
|
+
ctx.render = true;
|
|
179
|
+
log.debug(`Auto-enabling render: SPA URL pattern matched`);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
catch {
|
|
183
|
+
// Invalid URL — skip SPA detection
|
|
184
|
+
}
|
|
185
|
+
}
|
|
149
186
|
}
|
|
150
187
|
// ---------------------------------------------------------------------------
|
|
151
188
|
// Stage 2: handleYouTube
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.21.
|
|
3
|
+
"version": "0.21.75",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "AGPL-3.0-only",
|