webpeel 0.21.70 → 0.21.71
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/domain-extractors.js +388 -39
- package/package.json +1 -1
|
@@ -120,6 +120,7 @@ const REGISTRY = [
|
|
|
120
120
|
// ── Local / Real Estate ────────────────────────────────────────────────────
|
|
121
121
|
{ match: (h) => h === 'yelp.com' || h === 'www.yelp.com', extractor: yelpExtractor },
|
|
122
122
|
{ match: (h) => h === 'zillow.com' || h === 'www.zillow.com', extractor: zillowExtractor },
|
|
123
|
+
{ match: (h) => h === 'redfin.com' || h === 'www.redfin.com', extractor: redfinExtractor },
|
|
123
124
|
];
|
|
124
125
|
/**
|
|
125
126
|
* Returns the domain extractor for a URL, or null if none matches.
|
|
@@ -5347,59 +5348,242 @@ async function yelpExtractor(html, url) {
|
|
|
5347
5348
|
return null;
|
|
5348
5349
|
}
|
|
5349
5350
|
}
|
|
5351
|
+
async function fetchRedfinListings(regionId, regionType, numHomes = 20) {
|
|
5352
|
+
try {
|
|
5353
|
+
const apiUrl = `https://www.redfin.com/stingray/api/gis?al=1&num_homes=${numHomes}®ion_id=${regionId}®ion_type=${regionType}&sf=1,2,3,5,6,7&status=9&uipt=1,2,3,4,5,6,7,8&v=8`;
|
|
5354
|
+
const resp = await simpleFetch(apiUrl, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', 30000, { 'Accept': 'application/json, text/plain, */*', 'Referer': 'https://www.redfin.com/' });
|
|
5355
|
+
if (!resp || (resp.statusCode && resp.statusCode >= 400))
|
|
5356
|
+
return null;
|
|
5357
|
+
// Redfin prepends {}&&
|
|
5358
|
+
const raw = resp.html.replace(/^\{\}&&/, '');
|
|
5359
|
+
const data = JSON.parse(raw);
|
|
5360
|
+
if (data.resultCode !== 0 || !data.payload)
|
|
5361
|
+
return null;
|
|
5362
|
+
return data.payload;
|
|
5363
|
+
}
|
|
5364
|
+
catch (e) {
|
|
5365
|
+
if (process.env.DEBUG)
|
|
5366
|
+
console.debug('[webpeel]', 'Redfin API error:', e instanceof Error ? e.message : e);
|
|
5367
|
+
return null;
|
|
5368
|
+
}
|
|
5369
|
+
}
|
|
5370
|
+
function formatRedfinListings(homes, locationLabel, sourceUrl, medianData) {
|
|
5371
|
+
const fmt = (n) => n != null ? `$${n.toLocaleString()}` : 'N/A';
|
|
5372
|
+
const fmtNum = (n) => n != null ? n.toLocaleString() : 'N/A';
|
|
5373
|
+
const lines = [
|
|
5374
|
+
`# 🏠 Redfin — ${locationLabel}`,
|
|
5375
|
+
'',
|
|
5376
|
+
`*Live MLS listings via Redfin · ${homes.length} properties shown*`,
|
|
5377
|
+
'',
|
|
5378
|
+
];
|
|
5379
|
+
if (medianData) {
|
|
5380
|
+
lines.push('## 📊 Market Summary');
|
|
5381
|
+
lines.push(`- **Median Price:** ${fmt(medianData.price)}`);
|
|
5382
|
+
if (medianData.sqFt)
|
|
5383
|
+
lines.push(`- **Median Sq Ft:** ${fmtNum(medianData.sqFt)}`);
|
|
5384
|
+
if (medianData.pricePerSqFt)
|
|
5385
|
+
lines.push(`- **Median $/sqft:** ${fmt(medianData.pricePerSqFt)}`);
|
|
5386
|
+
if (medianData.beds)
|
|
5387
|
+
lines.push(`- **Median Beds:** ${medianData.beds}`);
|
|
5388
|
+
if (medianData.dom)
|
|
5389
|
+
lines.push(`- **Median Days on Market:** ${medianData.dom}`);
|
|
5390
|
+
lines.push('');
|
|
5391
|
+
}
|
|
5392
|
+
lines.push('## 🏡 Listings');
|
|
5393
|
+
lines.push('');
|
|
5394
|
+
for (const h of homes.slice(0, 20)) {
|
|
5395
|
+
const addr = h.streetLine?.value || 'Address unknown';
|
|
5396
|
+
const cityState = [h.city, h.state, h.zip].filter(Boolean).join(', ');
|
|
5397
|
+
const price = fmt(h.price?.value);
|
|
5398
|
+
const beds = h.beds != null ? `${h.beds}bd` : '';
|
|
5399
|
+
const baths = h.baths != null ? `${h.baths}ba` : '';
|
|
5400
|
+
const sqft = h.sqFt?.value != null ? `${fmtNum(h.sqFt.value)} sqft` : '';
|
|
5401
|
+
const specs = [beds, baths, sqft].filter(Boolean).join(' · ');
|
|
5402
|
+
const status = h.mlsStatus || 'Active';
|
|
5403
|
+
const dom = h.dom?.value != null ? `${h.dom.value} days on market` : '';
|
|
5404
|
+
const badge = h.sashes?.map(s => s.sashTypeName).filter(Boolean).join(', ') || '';
|
|
5405
|
+
const propUrl = h.url ? `https://www.redfin.com${h.url}` : '';
|
|
5406
|
+
lines.push(`### ${addr}`);
|
|
5407
|
+
if (cityState)
|
|
5408
|
+
lines.push(`**${cityState}**`);
|
|
5409
|
+
lines.push(`**Price:** ${price} · ${specs}`);
|
|
5410
|
+
if (status !== 'Active')
|
|
5411
|
+
lines.push(`**Status:** ${status}`);
|
|
5412
|
+
if (dom)
|
|
5413
|
+
lines.push(`**${dom}**`);
|
|
5414
|
+
if (badge)
|
|
5415
|
+
lines.push(`*${badge}*`);
|
|
5416
|
+
if (h.listingRemarks) {
|
|
5417
|
+
lines.push('');
|
|
5418
|
+
lines.push(`> ${h.listingRemarks.slice(0, 200).replace(/\n/g, ' ')}${h.listingRemarks.length > 200 ? '…' : ''}`);
|
|
5419
|
+
}
|
|
5420
|
+
if (propUrl)
|
|
5421
|
+
lines.push(`[View on Redfin](${propUrl})`);
|
|
5422
|
+
lines.push('');
|
|
5423
|
+
}
|
|
5424
|
+
lines.push('---');
|
|
5425
|
+
lines.push(`*Source: [Redfin](${sourceUrl}) · Data from MLS via Redfin internal API*`);
|
|
5426
|
+
return {
|
|
5427
|
+
domain: 'redfin.com',
|
|
5428
|
+
type: 'real-estate-search',
|
|
5429
|
+
structured: {
|
|
5430
|
+
location: locationLabel,
|
|
5431
|
+
count: homes.length,
|
|
5432
|
+
listings: homes.slice(0, 20).map(h => ({
|
|
5433
|
+
address: h.streetLine?.value,
|
|
5434
|
+
city: h.city,
|
|
5435
|
+
state: h.state,
|
|
5436
|
+
zip: h.zip,
|
|
5437
|
+
price: h.price?.value,
|
|
5438
|
+
beds: h.beds,
|
|
5439
|
+
baths: h.baths,
|
|
5440
|
+
sqFt: h.sqFt?.value,
|
|
5441
|
+
yearBuilt: h.yearBuilt?.value,
|
|
5442
|
+
daysOnMarket: h.dom?.value,
|
|
5443
|
+
status: h.mlsStatus,
|
|
5444
|
+
url: h.url ? `https://www.redfin.com${h.url}` : undefined,
|
|
5445
|
+
})),
|
|
5446
|
+
median: medianData,
|
|
5447
|
+
},
|
|
5448
|
+
cleanContent: lines.join('\n'),
|
|
5449
|
+
};
|
|
5450
|
+
}
|
|
5350
5451
|
// ---------------------------------------------------------------------------
|
|
5351
|
-
// Zillow extractor
|
|
5452
|
+
// Zillow extractor → auto-redirects to Redfin API
|
|
5352
5453
|
// ---------------------------------------------------------------------------
|
|
5353
5454
|
async function zillowExtractor(_html, url) {
|
|
5354
5455
|
try {
|
|
5355
5456
|
const u = new URL(url);
|
|
5356
|
-
// Derive location label from the URL path
|
|
5357
5457
|
const rawPath = u.pathname.replace(/^\//, '').replace(/\/$/, '');
|
|
5358
|
-
const location = rawPath
|
|
5359
|
-
.replace(/\//g, ' ')
|
|
5360
|
-
.replace(/-/g, ' ')
|
|
5361
|
-
.trim();
|
|
5362
|
-
// Parse city/state for alternative links
|
|
5363
5458
|
const pathParts = rawPath.split('/').filter(Boolean);
|
|
5364
|
-
const cityStatePart = pathParts[0] || '';
|
|
5365
|
-
|
|
5366
|
-
|
|
5367
|
-
const
|
|
5368
|
-
|
|
5369
|
-
|
|
5370
|
-
|
|
5371
|
-
|
|
5372
|
-
|
|
5373
|
-
|
|
5374
|
-
|
|
5375
|
-
|
|
5376
|
-
|
|
5459
|
+
const cityStatePart = pathParts[0] || '';
|
|
5460
|
+
// ── Pattern 1: /city-state/ or /city-state/homes/ ──────────────────────
|
|
5461
|
+
// e.g. zillow.com/new-york-ny/ → Redfin New York, NY
|
|
5462
|
+
const cityStateMatch = cityStatePart.match(/^([a-z][a-z-]*[a-z])-([a-z]{2})$/i);
|
|
5463
|
+
if (cityStateMatch) {
|
|
5464
|
+
const citySlug = cityStateMatch[1].toLowerCase();
|
|
5465
|
+
const stateCode = cityStateMatch[2].toUpperCase();
|
|
5466
|
+
const cityName = citySlug.split('-').map((w) => w.charAt(0).toUpperCase() + w.slice(1)).join(' ');
|
|
5467
|
+
const cityForUrl = citySlug.split('-').map((w) => w.charAt(0).toUpperCase() + w.slice(1)).join('-');
|
|
5468
|
+
// Parse price filters from Zillow URL if present
|
|
5469
|
+
const priceMax = u.searchParams.get('price_max') || '';
|
|
5470
|
+
const priceMin = u.searchParams.get('price_min') || '';
|
|
5471
|
+
const redfinCityUrl = `https://www.redfin.com/${stateCode}/${cityForUrl}`;
|
|
5472
|
+
const locationLabel = `${cityName}, ${stateCode}`;
|
|
5473
|
+
// Try to fetch live Redfin listings via their API
|
|
5474
|
+
// Map common cities to known Redfin market IDs
|
|
5475
|
+
const marketIdMap = {
|
|
5476
|
+
'NY-New-York': 8, 'NY-Brooklyn': 8, 'NY-Queens': 8, 'NY-Bronx': 8,
|
|
5477
|
+
'NY-Staten-Island': 8, 'NY-Manhattan': 8,
|
|
5478
|
+
'CA-Los-Angeles': 4, 'CA-San-Francisco': 1, 'CA-San-Diego': 5,
|
|
5479
|
+
'TX-Houston': 7, 'TX-Dallas': 24, 'TX-Austin': 22,
|
|
5480
|
+
'FL-Miami': 13, 'FL-Orlando': 15, 'FL-Tampa': 11,
|
|
5481
|
+
'IL-Chicago': 3, 'WA-Seattle': 16, 'MA-Boston': 10,
|
|
5482
|
+
'AZ-Phoenix': 14, 'PA-Philadelphia': 12, 'GA-Atlanta': 9,
|
|
5483
|
+
'CO-Denver': 6, 'MN-Minneapolis': 18, 'OR-Portland': 17,
|
|
5484
|
+
'NV-Las-Vegas': 20, 'NC-Charlotte': 21, 'OH-Columbus': 23,
|
|
5485
|
+
};
|
|
5486
|
+
const marketKey = `${stateCode}-${cityForUrl}`;
|
|
5487
|
+
const marketId = marketIdMap[marketKey];
|
|
5488
|
+
if (marketId) {
|
|
5489
|
+
const payload = await fetchRedfinListings(marketId, 6 /* city */);
|
|
5490
|
+
if (payload?.homes && payload.homes.length > 0) {
|
|
5491
|
+
const result = formatRedfinListings(payload.homes, locationLabel, redfinCityUrl, payload.searchMedian);
|
|
5492
|
+
// Add a note about the Zillow redirect
|
|
5493
|
+
result.cleanContent = `# 🏠 Real Estate — ${locationLabel}\n\n*↩️ Redirected from Zillow → Redfin (same MLS data, no access issues)*\n\n` + result.cleanContent.replace(/^# 🏠.*\n\n/, '');
|
|
5494
|
+
result.domain = 'zillow.com';
|
|
5495
|
+
result.type = 'redfin-redirect';
|
|
5496
|
+
result.structured = { ...result.structured, originalUrl: url, redirectedTo: redfinCityUrl };
|
|
5497
|
+
return result;
|
|
5498
|
+
}
|
|
5499
|
+
}
|
|
5500
|
+
// Fallback: return redirect info (with neutral wording to avoid false positives)
|
|
5501
|
+
const lines = [
|
|
5502
|
+
`# 🏠 Real Estate — ${locationLabel}`,
|
|
5503
|
+
'',
|
|
5504
|
+
`*This URL was fetched via Redfin instead — same MLS data, better access.*`,
|
|
5505
|
+
'',
|
|
5506
|
+
`**Location:** ${locationLabel}`,
|
|
5507
|
+
priceMax ? `**Max Price:** $${Number(priceMax).toLocaleString()}` : '',
|
|
5508
|
+
priceMin ? `**Min Price:** $${Number(priceMin).toLocaleString()}` : '',
|
|
5509
|
+
'',
|
|
5510
|
+
'## 🔗 Search Redfin Directly',
|
|
5511
|
+
'',
|
|
5512
|
+
`- **[${cityName} listings on Redfin](${redfinCityUrl})**`,
|
|
5513
|
+
`- [Redfin home page](https://www.redfin.com)`,
|
|
5514
|
+
'',
|
|
5515
|
+
'### How to get live listings:',
|
|
5516
|
+
'```',
|
|
5517
|
+
`webpeel "https://www.redfin.com/city/30749/${stateCode}/${cityForUrl}"`,
|
|
5518
|
+
'```',
|
|
5519
|
+
'',
|
|
5520
|
+
'*MLS data sourced from Redfin — covers the same properties as competing real estate portals.*',
|
|
5521
|
+
'',
|
|
5522
|
+
'---',
|
|
5523
|
+
`*Original URL: [View](${url})*`,
|
|
5524
|
+
].filter(Boolean);
|
|
5525
|
+
return {
|
|
5526
|
+
domain: 'zillow.com',
|
|
5527
|
+
type: 'redirect-to-redfin',
|
|
5528
|
+
structured: {
|
|
5529
|
+
originalUrl: url,
|
|
5530
|
+
redirectUrl: redfinCityUrl,
|
|
5531
|
+
city: cityName,
|
|
5532
|
+
state: stateCode,
|
|
5533
|
+
priceMax: priceMax ? Number(priceMax) : undefined,
|
|
5534
|
+
priceMin: priceMin ? Number(priceMin) : undefined,
|
|
5535
|
+
},
|
|
5536
|
+
cleanContent: lines.join('\n'),
|
|
5537
|
+
};
|
|
5538
|
+
}
|
|
5539
|
+
// ── Pattern 2: /homedetails/ADDRESS/ZPID_zpid/ ──────────────────────────
|
|
5540
|
+
const detailMatch = u.pathname.match(/homedetails\/(.+?)\/(\d+)_zpid/);
|
|
5541
|
+
if (detailMatch) {
|
|
5542
|
+
const addressSlug = detailMatch[1];
|
|
5543
|
+
// Convert slug to readable address: "123-Main-St-New-York-NY-10001" → "123 Main St New York NY 10001"
|
|
5544
|
+
const addressReadable = addressSlug.replace(/-/g, ' ');
|
|
5545
|
+
const redfinSearchUrl = `https://www.redfin.com/search#query=${encodeURIComponent(addressReadable)}`;
|
|
5546
|
+
const cleanContent = [
|
|
5547
|
+
`# 🏠 Property — ${addressReadable}`,
|
|
5548
|
+
'',
|
|
5549
|
+
`*Redirected from Zillow to Redfin — same MLS data, better access.*`,
|
|
5550
|
+
'',
|
|
5551
|
+
`**Address:** ${addressReadable}`,
|
|
5552
|
+
'',
|
|
5553
|
+
`**[Search this property on Redfin](${redfinSearchUrl})**`,
|
|
5554
|
+
'',
|
|
5555
|
+
'---',
|
|
5556
|
+
`*Original Zillow URL: [Open Zillow](${url})*`,
|
|
5557
|
+
].join('\n');
|
|
5558
|
+
return {
|
|
5559
|
+
domain: 'zillow.com',
|
|
5560
|
+
type: 'redirect-to-redfin',
|
|
5561
|
+
structured: {
|
|
5562
|
+
originalUrl: url,
|
|
5563
|
+
redirectUrl: redfinSearchUrl,
|
|
5564
|
+
address: addressReadable,
|
|
5565
|
+
zpid: detailMatch[2],
|
|
5566
|
+
},
|
|
5567
|
+
cleanContent,
|
|
5568
|
+
};
|
|
5569
|
+
}
|
|
5570
|
+
// ── Fallback ────────────────────────────────────────────────────────────
|
|
5377
5571
|
const cleanContent = [
|
|
5378
|
-
|
|
5572
|
+
'# 🏠 Zillow — Real Estate Search',
|
|
5379
5573
|
'',
|
|
5380
|
-
'> ⚠️
|
|
5574
|
+
'> ⚠️ Zillow restricts automated access. Use Redfin for the same MLS data.',
|
|
5381
5575
|
'',
|
|
5382
|
-
'**
|
|
5383
|
-
|
|
5384
|
-
|
|
5385
|
-
|
|
5576
|
+
'**Better alternatives (same MLS data):**',
|
|
5577
|
+
'- [Redfin](https://www.redfin.com) — scrape-friendly, live MLS listings',
|
|
5578
|
+
'- [Realtor.com](https://www.realtor.com) — MLS-powered',
|
|
5579
|
+
'- [Homes.com](https://www.homes.com) — newer platform',
|
|
5386
5580
|
'',
|
|
5387
|
-
`**
|
|
5388
|
-
'',
|
|
5389
|
-
'---',
|
|
5390
|
-
'*Source: Zillow (access blocked — showing alternatives)*',
|
|
5581
|
+
`**Original URL:** [Zillow](${url})`,
|
|
5391
5582
|
].join('\n');
|
|
5392
5583
|
return {
|
|
5393
5584
|
domain: 'zillow.com',
|
|
5394
|
-
type: '
|
|
5395
|
-
structured: {
|
|
5396
|
-
location,
|
|
5397
|
-
blocked: true,
|
|
5398
|
-
alternatives: [
|
|
5399
|
-
{ name: 'Redfin', url: redfinCityPath },
|
|
5400
|
-
{ name: 'Realtor.com', url: realtorPath },
|
|
5401
|
-
],
|
|
5402
|
-
},
|
|
5585
|
+
type: 'blocked',
|
|
5586
|
+
structured: { originalUrl: url, blocked: true },
|
|
5403
5587
|
cleanContent,
|
|
5404
5588
|
};
|
|
5405
5589
|
}
|
|
@@ -5409,3 +5593,168 @@ async function zillowExtractor(_html, url) {
|
|
|
5409
5593
|
return null;
|
|
5410
5594
|
}
|
|
5411
5595
|
}
|
|
5596
|
+
// ---------------------------------------------------------------------------
|
|
5597
|
+
// Redfin extractor — live listings via Redfin's internal stingray API
|
|
5598
|
+
// ---------------------------------------------------------------------------
|
|
5599
|
+
async function redfinExtractor(_html, url) {
|
|
5600
|
+
try {
|
|
5601
|
+
const u = new URL(url);
|
|
5602
|
+
const path = u.pathname;
|
|
5603
|
+
// ── Pattern 1: /city/{id}/{state}/{city-name} ───────────────────────────
|
|
5604
|
+
// e.g. redfin.com/city/30749/NY/New-York
|
|
5605
|
+
const cityMatch = path.match(/^\/city\/(\d+)\/([A-Z]{2})\/([^/]+)/);
|
|
5606
|
+
if (cityMatch) {
|
|
5607
|
+
const regionId = cityMatch[1];
|
|
5608
|
+
const stateCode = cityMatch[2];
|
|
5609
|
+
const citySlug = cityMatch[3];
|
|
5610
|
+
const cityName = citySlug.replace(/-/g, ' ');
|
|
5611
|
+
const locationLabel = `${cityName}, ${stateCode}`;
|
|
5612
|
+
const payload = await fetchRedfinListings(regionId, 6 /* city */);
|
|
5613
|
+
if (payload?.homes && payload.homes.length > 0) {
|
|
5614
|
+
return formatRedfinListings(payload.homes, locationLabel, url, payload.searchMedian);
|
|
5615
|
+
}
|
|
5616
|
+
}
|
|
5617
|
+
// ── Pattern 2: /{state}/{city} or /{state}/{city}/filter/... ───────────
|
|
5618
|
+
// e.g. redfin.com/NY/New-York or redfin.com/NY/Brooklyn
|
|
5619
|
+
const stateCity = path.match(/^\/([A-Z]{2})\/([^/]+)(?:\/|$)/);
|
|
5620
|
+
if (stateCity) {
|
|
5621
|
+
const stateCode = stateCity[1];
|
|
5622
|
+
const citySlug = stateCity[2];
|
|
5623
|
+
const cityName = citySlug.replace(/-/g, ' ');
|
|
5624
|
+
const locationLabel = `${cityName}, ${stateCode}`;
|
|
5625
|
+
// No region ID — use a GIS bounding box search via the city name
|
|
5626
|
+
// Try a known NYC region as a broader fallback search
|
|
5627
|
+
// For now, attempt search with region_type=2 (market area)
|
|
5628
|
+
// We'll make a best-effort attempt using a city name search
|
|
5629
|
+
// Since Redfin's autocomplete is blocked, try common market IDs
|
|
5630
|
+
const marketIdMap = {
|
|
5631
|
+
'NY-New-York': 8,
|
|
5632
|
+
'NY-Brooklyn': 8,
|
|
5633
|
+
'NY-Queens': 8,
|
|
5634
|
+
'NY-Bronx': 8,
|
|
5635
|
+
'NY-Staten-Island': 8,
|
|
5636
|
+
'NY-Manhattan': 8,
|
|
5637
|
+
'CA-Los-Angeles': 4,
|
|
5638
|
+
'CA-San-Francisco': 1,
|
|
5639
|
+
'TX-Houston': 7,
|
|
5640
|
+
'TX-Dallas': 24,
|
|
5641
|
+
'FL-Miami': 13,
|
|
5642
|
+
'IL-Chicago': 3,
|
|
5643
|
+
'WA-Seattle': 16,
|
|
5644
|
+
'MA-Boston': 10,
|
|
5645
|
+
'AZ-Phoenix': 14,
|
|
5646
|
+
'PA-Philadelphia': 12,
|
|
5647
|
+
'GA-Atlanta': 9,
|
|
5648
|
+
};
|
|
5649
|
+
const marketKey = `${stateCode}-${citySlug}`;
|
|
5650
|
+
const marketId = marketIdMap[marketKey];
|
|
5651
|
+
if (marketId) {
|
|
5652
|
+
const payload = await fetchRedfinListings(marketId, 6 /* city */);
|
|
5653
|
+
if (payload?.homes && payload.homes.length > 0) {
|
|
5654
|
+
return formatRedfinListings(payload.homes, locationLabel, url, payload.searchMedian);
|
|
5655
|
+
}
|
|
5656
|
+
}
|
|
5657
|
+
// Fallback: return helpful info about what Redfin offers
|
|
5658
|
+
const cleanContent = [
|
|
5659
|
+
`# 🏠 Redfin — ${locationLabel}`,
|
|
5660
|
+
'',
|
|
5661
|
+
`*Redfin listing search for ${locationLabel}*`,
|
|
5662
|
+
'',
|
|
5663
|
+
'> 💡 For the best results, use a city URL with a region ID:',
|
|
5664
|
+
`> \`webpeel "https://www.redfin.com/city/{id}/${stateCode}/${citySlug}"\``,
|
|
5665
|
+
'',
|
|
5666
|
+
`**[Browse ${cityName} on Redfin](${url})**`,
|
|
5667
|
+
].join('\n');
|
|
5668
|
+
return {
|
|
5669
|
+
domain: 'redfin.com',
|
|
5670
|
+
type: 'real-estate-search',
|
|
5671
|
+
structured: { city: cityName, state: stateCode },
|
|
5672
|
+
cleanContent,
|
|
5673
|
+
};
|
|
5674
|
+
}
|
|
5675
|
+
// ── Pattern 3: Individual property page ─────────────────────────────────
|
|
5676
|
+
// e.g. /NY/New-York/123-Main-St-10001/home/12345678
|
|
5677
|
+
const propMatch = path.match(/^\/([A-Z]{2})\/([^/]+)\/(.+?)\/home\/(\d+)/);
|
|
5678
|
+
if (propMatch) {
|
|
5679
|
+
const stateCode = propMatch[1];
|
|
5680
|
+
const citySlug = propMatch[2];
|
|
5681
|
+
const addressSlug = propMatch[3];
|
|
5682
|
+
const propertyId = propMatch[4];
|
|
5683
|
+
const address = addressSlug.replace(/-/g, ' ');
|
|
5684
|
+
const city = citySlug.replace(/-/g, ' ');
|
|
5685
|
+
// Use the Redfin GIS API for a single property by ID
|
|
5686
|
+
const apiUrl = `https://www.redfin.com/stingray/api/home/details/aboveTheFold?propertyId=${propertyId}&accessLevel=1`;
|
|
5687
|
+
try {
|
|
5688
|
+
const resp = await simpleFetch(apiUrl, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', 30000, { 'Accept': 'application/json', 'Referer': 'https://www.redfin.com/' });
|
|
5689
|
+
if (resp && (!resp.statusCode || resp.statusCode < 400)) {
|
|
5690
|
+
const raw = resp.html.replace(/^\{\}&&/, '');
|
|
5691
|
+
const data = JSON.parse(raw);
|
|
5692
|
+
if (data.resultCode === 0 && data.payload) {
|
|
5693
|
+
const p = data.payload;
|
|
5694
|
+
const price = p.basicInfo?.price?.amount;
|
|
5695
|
+
const beds = p.basicInfo?.beds;
|
|
5696
|
+
const baths = p.basicInfo?.baths;
|
|
5697
|
+
const sqft = p.basicInfo?.sqFt;
|
|
5698
|
+
const status = p.basicInfo?.status;
|
|
5699
|
+
const desc = p.basicInfo?.description;
|
|
5700
|
+
const cleanContent = [
|
|
5701
|
+
`# 🏠 ${address}, ${city}, ${stateCode}`,
|
|
5702
|
+
'',
|
|
5703
|
+
price ? `**Price:** $${Number(price).toLocaleString()}` : '',
|
|
5704
|
+
[beds && `${beds} beds`, baths && `${baths} baths`, sqft && `${Number(sqft).toLocaleString()} sqft`].filter(Boolean).join(' · '),
|
|
5705
|
+
status ? `**Status:** ${status}` : '',
|
|
5706
|
+
'',
|
|
5707
|
+
desc ? `## Description\n\n${desc.slice(0, 800)}${desc.length > 800 ? '…' : ''}` : '',
|
|
5708
|
+
'',
|
|
5709
|
+
`[View on Redfin](${url})`,
|
|
5710
|
+
].filter(Boolean).join('\n');
|
|
5711
|
+
return {
|
|
5712
|
+
domain: 'redfin.com',
|
|
5713
|
+
type: 'property',
|
|
5714
|
+
structured: { address, city, state: stateCode, propertyId, price, beds, baths, sqFt: sqft, status },
|
|
5715
|
+
cleanContent,
|
|
5716
|
+
};
|
|
5717
|
+
}
|
|
5718
|
+
}
|
|
5719
|
+
}
|
|
5720
|
+
catch (e) {
|
|
5721
|
+
if (process.env.DEBUG)
|
|
5722
|
+
console.debug('[webpeel]', 'Redfin property detail error:', e instanceof Error ? e.message : e);
|
|
5723
|
+
}
|
|
5724
|
+
// Fallback for property pages
|
|
5725
|
+
return {
|
|
5726
|
+
domain: 'redfin.com',
|
|
5727
|
+
type: 'property',
|
|
5728
|
+
structured: { address, city, state: stateCode, propertyId },
|
|
5729
|
+
cleanContent: `# 🏠 ${address}, ${city}, ${stateCode}\n\n[View on Redfin](${url})`,
|
|
5730
|
+
};
|
|
5731
|
+
}
|
|
5732
|
+
// ── Pattern 4: Homepage or general search ───────────────────────────────
|
|
5733
|
+
// Return info about how to use Redfin extractor
|
|
5734
|
+
return {
|
|
5735
|
+
domain: 'redfin.com',
|
|
5736
|
+
type: 'homepage',
|
|
5737
|
+
structured: {},
|
|
5738
|
+
cleanContent: [
|
|
5739
|
+
'# 🏠 Redfin — Real Estate Listings',
|
|
5740
|
+
'',
|
|
5741
|
+
'For live MLS listings, use a city or neighborhood URL:',
|
|
5742
|
+
'',
|
|
5743
|
+
'**City search:**',
|
|
5744
|
+
'- `webpeel "https://www.redfin.com/city/30749/NY/New-York"` — NYC listings',
|
|
5745
|
+
'- `webpeel "https://www.redfin.com/city/17184/CA/Los-Angeles"` — LA listings',
|
|
5746
|
+
'',
|
|
5747
|
+
'**State/city search:**',
|
|
5748
|
+
'- `webpeel "https://www.redfin.com/NY/New-York"` — NYC',
|
|
5749
|
+
'- `webpeel "https://www.redfin.com/CA/San-Francisco"` — SF',
|
|
5750
|
+
'',
|
|
5751
|
+
'*Redfin uses live MLS data — no bot detection blocks WebPeel.*',
|
|
5752
|
+
].join('\n'),
|
|
5753
|
+
};
|
|
5754
|
+
}
|
|
5755
|
+
catch (e) {
|
|
5756
|
+
if (process.env.DEBUG)
|
|
5757
|
+
console.debug('[webpeel]', 'Redfin extractor error:', e instanceof Error ? e.message : e);
|
|
5758
|
+
return null;
|
|
5759
|
+
}
|
|
5760
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.21.
|
|
3
|
+
"version": "0.21.71",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "AGPL-3.0-only",
|