@rent-scraper/scrape-listings 1.0.23 → 1.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1 +1 @@
|
|
|
1
|
-
import{parseError as t}from"@rent-scraper/utils";import{r as i}from"../shared/scrape-listings.
|
|
1
|
+
import{parseError as t}from"@rent-scraper/utils";import{r as i}from"../shared/scrape-listings.CuQhPGu5.mjs";import{log as m}from"@clack/prompts";import"minimist";import"dayjs";import"path";import"axios";import"fs/promises";import"@rent-scraper/api";import"@rent-scraper/api/config";import"node:timers/promises";import"@rent-scraper/utils/config";import"picocolors";i().then(()=>{process.exit(0)}).catch(r=>{const{message:o}=t(r);m.error(o),process.exit(1)});
|
package/dist/index.d.mts
CHANGED
|
@@ -9,11 +9,12 @@ declare const fetchListingHtmlByUrlAndExport: (source: ListingsSource, url: stri
|
|
|
9
9
|
interface ScrapeListingHtmlOptions extends ZillowListingHtmlOptions {
|
|
10
10
|
run?: number;
|
|
11
11
|
reruns?: number;
|
|
12
|
+
skipBotCheck?: boolean;
|
|
12
13
|
}
|
|
13
14
|
declare const scrapeListingHtmlByZipCodes: (source: ListingsSource, zipCodes: number[], inputDirectory: string, outputDirectory?: string, options?: ScrapeListingHtmlOptions) => Promise<void>;
|
|
14
15
|
declare const scrapeListingHtmlByZipCodesAndListingDetails: (source: ListingsSource, zipCodes: number[], inputDirectory: string, options: ScrapeListingHtmlOptions) => Promise<void>;
|
|
15
|
-
declare const scrapeListingHtmlByInputDirectory: (source: ListingsSource, inputDirectory: string, outputDirectory?: string) => Promise<void>;
|
|
16
|
-
declare const scrapeListingHtmlByIds: (source: ListingsSource, ids: string[], outputDirectory: string) => Promise<void>;
|
|
16
|
+
declare const scrapeListingHtmlByInputDirectory: (source: ListingsSource, inputDirectory: string, outputDirectory?: string, options?: Pick<ScrapeListingHtmlOptions, "skipBotCheck">) => Promise<void>;
|
|
17
|
+
declare const scrapeListingHtmlByIds: (source: ListingsSource, ids: string[], outputDirectory: string, options?: Pick<ScrapeListingHtmlOptions, "skipBotCheck">) => Promise<void>;
|
|
17
18
|
|
|
18
19
|
declare function runScrapeListings(): Promise<void>;
|
|
19
20
|
|
package/dist/index.mjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export{f as fetchListingHtmlByUrlAndExport,r as runScrapeListings,s as scrapeListingDetailsFromHtmlByFilePaths,a as scrapeListingDetailsFromHtmlByZipCodes,e as scrapeListingHtmlByIds,d as scrapeListingHtmlByInputDirectory,b as scrapeListingHtmlByZipCodes,c as scrapeListingHtmlByZipCodesAndListingDetails,g as scrapeRedfinListingResultsByZipCodes,h as scrapeZillowListingResultsByZipCodes}from"./shared/scrape-listings.
|
|
1
|
+
export{f as fetchListingHtmlByUrlAndExport,r as runScrapeListings,s as scrapeListingDetailsFromHtmlByFilePaths,a as scrapeListingDetailsFromHtmlByZipCodes,e as scrapeListingHtmlByIds,d as scrapeListingHtmlByInputDirectory,b as scrapeListingHtmlByZipCodes,c as scrapeListingHtmlByZipCodesAndListingDetails,g as scrapeRedfinListingResultsByZipCodes,h as scrapeZillowListingResultsByZipCodes}from"./shared/scrape-listings.CuQhPGu5.mjs";import"minimist";import"dayjs";import"path";import"axios";import"fs/promises";import"@rent-scraper/api";import"@rent-scraper/utils";import"@rent-scraper/api/config";import"@clack/prompts";import"node:timers/promises";import"@rent-scraper/utils/config";import"picocolors";
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import ae from"minimist";import I from"dayjs";import n from"path";import A from"axios";import{mkdir as M,writeFile as D,readFile as E,unlink as G}from"fs/promises";import{checkForZillowBotFiltering as H,fetchHtmlFromRedfinListingUrl as ne,fetchHtmlFromZillowListingUrl as oe,scrapeDataFromRedfinListingHtml as le,scrapeDataFromZillowListingHtml as ce,getZillowListingResults as we,getRedfinListingResults as ue,waitForSolvedZillowCaptcha as de}from"@rent-scraper/api";import{ErrorLog as S,throwError as Z,checkForFile as j,parseJsonFile as ge,parseError as k,readFilesInDirectory as Y,roundValue as T,chunkArray as me,parsePercentage as pe,compareArrays as K}from"@rent-scraper/utils";import{getZillowOutputPath as F,getRedfinOutputPath as U,getZillowDaysListed as fe,getRedfinDaysListed as he,getRedfinZipCodes as $e,getZillowZipCodes as ye}from"@rent-scraper/api/config";import{spinner as _,log as g,progress as Le,intro as ve,confirm as je,isCancel as ze,cancel as Me,outro as Ze}from"@clack/prompts";import{setTimeout as V}from"node:timers/promises";import{checkBrowserServer as Q}from"@rent-scraper/utils/config";import be from"picocolors";const P=process.env.DEBUG,O=async(e,s,t,i)=>{const{timeoutMs:l}=i??{};if(await j(t))P&&g.warn(`${t} exists, skipping`);else try{P&&g.info(`writing ${t}`);const a=e==="redfin"?await ne(s):await oe(s,{timeoutMs:l});await D(`${t}`,a)}catch(a){Z(`error fetching html for ${s}`,a)}},Ce=async(e,{timeoutMs:s})=>{if(await j(e)){const t=JSON.parse(await E(e,"utf8"))||{},{hdpUrl:i}=t||{};if(i){const l=`https://www.zillow.com${i}`,a=e.replace(".json",".html");await j(a)?P&&g.warn(`file already exists, ${a}`):await O("zillow",l,a,{timeoutMs:s})}else Z(`file is empty, ${e}`)}else P&&g.warn(`file does not exist at this path, ${e}, skipping`)},W=async(e,s)=>{const{timeoutMs:t}=s??{};await Promise.all(e.map(async i=>await Ce(i,{timeoutMs:t})))},xe=(e,s="zillow")=>{if(s==="zillow"){const{zpid:t,detailUrl:i}=e||{};return{id:t,url:i}}else if(s==="redfin"){const{propertyId:t,url:i}=e?.homeData||{};return{id:t,url:`https://www.redfin.com${i}`}}},q=async(e,s,t,i=t,l)=>{const{timeoutMs:a,run:r=1,reruns:c=0,skipBotCheck:h=!1}=l??{},o=new S;t||Z("inputDirectory is required"),e==="zillow"&&!h&&await H();const L=[],f=_();f.start("Downloading listings html files");for(let p=1;p<=c+1;p++){c>0&&p>1&&o.add(`rerun ${p-1} of ${c}`);const $=L.length;(p===1||$)&&await Promise.all(($?L:s).map(async y=>{const v=`${t}/${y}.json`;if(await j(v))try{const{results:z}=await ge(v)||{};if(!z)o.add(`empty file, ${v}`);else{const b=i?`${i}/${y}`:`${t}/${y}`;await M(b,{recursive:!0}),z?.length?await Promise.all(z.map(async R=>{try{const{id:x,url:B}=xe(R,e)??{};if(!B)return o.add(`url missing for ${x}`);const ie=`${x}.html`,re=`${b}/${ie}`;await O(e,B,re,{timeoutMs:a})}catch(x){const{message:B}=k(x);o.add("scrape listing html error: "+(B??`error fetching listing for id, ${x}`))}})):o.add(`no results for file, ${v}`)}}catch(z){L.push(y);const{message:b}=k(z);o.add("scrape listing html error: "+(b??`error reading json data, ${v}, ${z}`))}else P&&g.warn(`file does not exist, ${v}, skipping`)}))}f.stop("Listings HTML files have been saved to:");const w=await F(),m=await U(),u=e==="zillow"?w:m;g.message(n.join(u,e,"listings",n.basename(t)));const d=n.join(u,e,"logs");if(await M(d,{recursive:!0}),o.get().filter(p=>!p.includes("rerun ")).length>c){const p=`${n.basename(t)}-html-errors-${r}.txt`,$=n.join(d,p);await o.write($,[...new Set(o.get())].join(`
|
|
2
|
+
`)),P&&g.error(`There were errors during processing, see ${n.resolve($)}`)}},De=async(e,s,t,i)=>{const{timeoutMs:l,run:a=1,reruns:r=0,skipBotCheck:c=!1}=i??{},h=new S;t||Z("inputDirectory is required"),e==="zillow"&&!c&&await H();const o=[];for(let u=1;u<=r+1;u++){r>0&&u>1&&h.add(`rerun ${u-1} of ${r}`);const d=o.length;(u===1||d)&&await Promise.all((d?o:s).map(async p=>{const $=`${t}/${p}`;if(await j($))try{const y=await Y($,{extension:".json",prependDirectory:!0});await W(y,{timeoutMs:l})}catch(y){o.push(p);const{message:v}=k(y);h.add("scrape listing html error: "+(v??`Error during fetch for ${p}, ${y}`))}else h.add(`listing directory does not exist, ${$}`)}))}const L=await F(),f=await U(),w=e==="zillow"?L:f,m=n.join(w,e,"logs");if(await M(m,{recursive:!0}),h.get().filter(u=>!u.includes("rerun ")).length>r){const u=`${n.basename(t)}-html-errors-${a}.txt`,d=n.join(m,u);await h.write(d,[...new Set(h.get())].join(`
|
|
3
|
+
`)),P&&g.error(`There were errors during processing, see ${n.resolve(d)}`)}},Be=async(e,s,t=s,i)=>{const{skipBotCheck:l=!1}=i??{},a=new S;if(e==="zillow"&&!l&&await H(),s||Z("inputDirectory is required"),await M(t,{recursive:!0}),await j(s))try{const w=await Y(s,{extension:".json",prependDirectory:!0});await W(w)}catch(w){const{message:m}=k(w);a.add("scrape listing html error: "+(m??`Error during fetch for ${s}, ${w}`))}else a.add(`inputDirectory does not exist, ${s}`);const r=await F(),c=await U(),h=e==="zillow"?r:c,o=n.join(h,e,"logs");await M(o,{recursive:!0});const L=`${n.basename(s)}-html-errors.txt`,f=n.join(o,L);a.get().length>0&&(await a.write(f,[...new Set(a.get())].join(`
|
|
4
|
+
`)),P&&g.error(`There were errors during processing, see ${n.resolve(f)}`))},ke=(e,s)=>e==="redfin"?`https://www.redfin.com/home/${s}`:e==="zillow"?`https://www.zillow.com/homedetails/${s}_zpid`:null,Pe=async(e,s,t,i)=>{const{skipBotCheck:l=!1}=i??{};e==="zillow"&&!l&&await H(),t||Z("outputDirectory is required"),await M(t,{recursive:!0}),await Promise.all(s.map(async a=>{const r=ke(e,a),c=`${t}/${a}.html`;r&&await O(e,r,c)}))},C=process.env.DEBUG,X=async e=>{if(await j(e)){const s=e.replace(".html",".json");if(await j(s))C&&g.warning(`file already exists, ${s}`);else{C&&g.message(`scraping data for ${e}`);const t=(await E(e)).toString(),i=await ce(t);if(!i?.priceHistory&&i?.bestMatchedUnit?.hdpUrl){C&&g.warning(`rescraping ${e} - https://www.zillow.com${i?.bestMatchedUnit?.hdpUrl}`),await j(e)&&(C&&g.warning(`deleting ${e}`),await G(e)),await j(s)&&(C&&g.warning(`deleting ${s}`),await G(s));const l=`https://www.zillow.com${i?.bestMatchedUnit?.hdpUrl}`;try{await O("zillow",l,e),await X(e)}catch(a){const{message:r}=k(a);g.error(r)}}else i?(C&&g.info(`writing ${s}`),await D(`${s}`,JSON.stringify(i))):Z(`problem scraping data for ${e}`)}}},He=async e=>{if(await j(e)){const s=e.replace(".html",".json");if(await j(s))C&&g.warning(`file already exists, ${s}`);else{C&&g.message(`scraping data for ${e}`);const t=(await E(e)).toString(),i=le(t);i?(C&&g.warning(`writing ${s}`),await D(`${s}`,JSON.stringify(i))):Z(`problem scraping data for ${e}`)}}},ee=async(e,s)=>{await Promise.all(s.map(async t=>e==="redfin"?await He(t):await X(t)))},N=async(e,s,t)=>{const i=new S;let l=0;t||Z("inputDirectory is required");const a=Le({style:"heavy",max:100,size:50});a.start("Scraping listings data");const r=s.length,c=r<20?r+1:20,h=r>c?T(r/c,1):r,o=me(s,h);for(const[u,d]of o.entries()){const p=T((Number(u)+1)/c*100,1);a.advance(T(1/c*100,1),`Scraping listings (${pe(p)})`),await Promise.all(d.map(async $=>{C&&g.message(`processing files for ${$}`);const y=`${t}/${$}`;if(await j(y)){const v=await Y(y,{extension:".html",prependDirectory:!0});l=l+v.length,await ee(e,v)}else i.add(`listing directory does not exist, ${y}`)}))}const L=await F(),f=await U(),w=e==="zillow"?L:f;a.stop("Listings data has been saved to:"),g.message(n.join(w,e,"listings",n.basename(t)));const m=n.join(w,e,"logs");if(await M(m,{recursive:!0}),i.get().length>0){const u=`${n.basename(t)}-listing-errors.txt`,d=n.join(m,u);await i.write(d,[...new Set(i.get())].join(`
|
|
5
|
+
`)),g.error(`There were errors during processing, see ${n.resolve(d)}`)}return{numListings:l}},J=process.env.DEBUG,Se=async(e,s,t,i)=>{const{daysOnZillow:l,timeoutMs:a}=i??{};if(await j(s))J&&g.warning(`${e} exists, skipping`),t.push(e);else{const r=await we({zipCode:e,daysOnZillow:l,mergePageResults:!0,timeoutMs:a});r?(J&&g.info(`writing ${s}`),await D(s,JSON.stringify(r)),t.push(e)):J&&g.warning(`no results for ${e}`)}},se=async(e,s,t)=>{const{daysListed:i,timeoutMs:l,run:a=1,reruns:r=0,fetchListings:c=!1,skipBotCheck:h=!1}=t??{},o=new S;h||await H({fetchListings:c});const L=[],f=[];await M(s,{recursive:!0});const w=_();w.start("Scraping Zillow search results");for(let d=1;d<=r+1;d++){r>0&&d>1&&o.add(`rerun ${d-1} of ${r}`);const p=f.length;(d===1||p)&&await Promise.all((p?f:e).map(async $=>{const y=`${$}.json`,v=`${s}/${y}`;try{await Se($,v,L,{daysOnZillow:i,timeoutMs:l})}catch(z){f.push($);const{message:b}=k(z);o.add("scrape listing results error: "+(b??`Error during fetch for ${$}, ${z}`))}}))}w.stop("Zillow search results have been saved to:");const m=await F();g.message(n.join(m,"zillow","results",n.basename(s)));const u=n.join(m,"zillow","logs");if(await M(u,{recursive:!0}),o.get().filter(d=>!d.includes("rerun ")).length>r){const d=`${n.basename(s)}-results-errors-${a}.txt`,p=n.join(u,d);await o.write(p,[...new Set(o.get())].join(`
|
|
6
|
+
`)),g.error(`There were errors during processing, see ${n.resolve(p)}`)}return{validZipCodes:L}},Fe=async(e,s,t,i)=>{const{daysListed:l,timeoutMs:a}=i??{};if(await j(s))console.log(`${e} exists, skipping`),t.push(e);else{const r=await ue({zipCode:e,daysListed:l,timeoutMs:a});r?(console.log(`writing ${s}`),await D(s,JSON.stringify(r)),t.push(e)):console.log(`no results for ${e}`)}},te=async(e,s,t)=>{const{daysListed:i,timeoutMs:l,run:a=1,reruns:r=0}=t??{},c=new S,h=[],o=[];await M(s,{recursive:!0});for(let w=1;w<=r+1;w++){r>0&&w>1&&c.add(`rerun ${w-1} of ${r}`);const m=o.length;(w===1||m)&&await Promise.all((m?o:e).map(async u=>{const d=`${u}.json`,p=`${s}/${d}`;try{await Fe(u,p,h,{daysListed:i,timeoutMs:l})}catch($){o.push(u);const{message:y}=k($);c.add("scrape listing results error: "+(y??`Error during fetch for ${u}, ${$}`))}}))}const L=await U(),f=n.join(L,"redfin","logs");if(await M(f,{recursive:!0}),c.get().filter(w=>!w.includes("rerun ")).length>r){const w=`${n.basename(s)}-results-errors-${a}.txt`,m=n.join(f,w);await c.write(m,[...new Set(c.get())].join(`
|
|
7
|
+
`)),console.log(`\x1B[41m
|
|
8
|
+
%s\x1B[0m`,`There were errors during processing, see ${n.resolve(m)}`)}return{validZipCodes:h}},Ue=async()=>{await A.post("http://localhost:8082/browser/close")},Re=async()=>{await A.post("http://localhost:8082/server/shutdown")},Oe=async(e,s,t,{daysListed:i,timeoutMs:l,run:a,reruns:r})=>{try{await H()}catch(o){const{message:L}=k(o);g.error(L);const f=await je({message:"You need to complete a captcha in your browser. Press Return to launch your browser and continue.",active:"OK",inactive:"Cancel"});if(ze(f)||!f)return Me("Create config canceled. Please try again."),process.exit(1);await V(1e3),Ze("Browser Launching..."),await V(1e3),await de()}await Ue();const{validZipCodes:c}=await se(e,s,{daysListed:i,timeoutMs:l,run:a,reruns:r});await q("zillow",c,s,t,{timeoutMs:l,run:a,reruns:r});const{numListings:h}=await N("zillow",c,t);return{numListings:h,validZipCodes:c}},Ee=async(e,s,t,{daysListed:i,timeoutMs:l,run:a,reruns:r})=>{const{validZipCodes:c}=await te(e,s,{daysListed:i,timeoutMs:l,run:a,reruns:r});await q("redfin",c,s,t,{timeoutMs:l,run:a,reruns:r});const{numListings:h}=await N("redfin",c,t);return{numListings:h,validZipCodes:c}};async function Ye(){ve(be.inverse(" scrape listings "));const e=ae(process.argv.slice(2)),s=e.source??"zillow";s==="zillow"&&(await Q()||Z("Please launch the browser server before scraping."));const t=await F(),i=await U(),l=s==="zillow"?t:i,a=e["days-listed"]??(s==="zillow"?await fe():await he())??1,r=e.runs??1,c=e.reruns??0,h=e["timeout-ms"]??6e4,o=e["results-directory"]??n.join(l,s,"results",`${I().format("YYYY-MM-DD-HHmm")}`),L=e["listings-directory"]??n.join(l,s,"listings",`${I().format("YYYY-MM-DD-HHmm")}`),f=e["logs-directory"]??n.join(l,s,"logs");if(s==="redfin"){const w=await $e();for(let m=1;m<=r;m++){w||Z("zip codes required, please run the createConfig script");const u=w,{numListings:d,validZipCodes:p}=await Ee(u,o,L,{daysListed:a,timeoutMs:h,run:m,reruns:c});if(m===r){const $=Object.entries({numListings:d}).map(([x,B])=>`${x}: ${B}`).join(`
|
|
9
|
+
`),y=K(u,p).join(`
|
|
10
|
+
`);await M(f,{recursive:!0});const v=`${n.basename(o)}-scraping-results.txt`,z=n.join(f,v),b=`${n.basename(o)}-invalid-zipcodes.txt`,R=n.join(f,b);await D(z,$),await D(R,y)}}}else if(s==="zillow"){const w=await ye();for(let m=1;m<=r;m++){w||Z("zip codes required, please run the createConfig script");const u=w,{numListings:d,validZipCodes:p}=await Oe(u,o,L,{daysListed:a,timeoutMs:h,run:m,reruns:c});if(m===r){const $=Object.entries({numListings:d}).map(([x,B])=>`${x}: ${B}`).join(`
|
|
11
|
+
`),y=K(u,p).join(`
|
|
12
|
+
`);await M(f,{recursive:!0});const v=`${n.basename(o)}-scraping-results.txt`,z=n.join(f,v),b=`${n.basename(o)}-invalid-zipcodes.txt`,R=n.join(f,b);await D(z,$),await D(R,y)}}}g.success("Scraping complete!"),(s==="zillow"||s==="redfin")&&await Q()&&await Re()}export{N as a,q as b,De as c,Be as d,Pe as e,O as f,te as g,se as h,Ye as r,ee as s};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@rent-scraper/scrape-listings",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.25",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"exports": {
|
|
6
6
|
".": {
|
|
@@ -30,8 +30,8 @@
|
|
|
30
30
|
"dayjs": "^1.11.13",
|
|
31
31
|
"minimist": "^1.2.8",
|
|
32
32
|
"picocolors": "^1.1.1",
|
|
33
|
-
"@rent-scraper/api": "1.0.
|
|
34
|
-
"@rent-scraper/utils": "1.0.
|
|
33
|
+
"@rent-scraper/api": "1.0.25",
|
|
34
|
+
"@rent-scraper/utils": "1.0.25"
|
|
35
35
|
},
|
|
36
36
|
"devDependencies": {
|
|
37
37
|
"@types/minimist": "^1.2.5",
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
import re from"minimist";import A from"dayjs";import a from"path";import G from"axios";import{mkdir as z,writeFile as D,readFile as E,unlink as I}from"fs/promises";import{checkForZillowBotFiltering as H,fetchHtmlFromRedfinListingUrl as ae,fetchHtmlFromZillowListingUrl as ne,scrapeDataFromRedfinListingHtml as oe,scrapeDataFromZillowListingHtml as le,getZillowListingResults as ce,getRedfinListingResults as we,waitForSolvedZillowCaptcha as ue}from"@rent-scraper/api";import{ErrorLog as P,throwError as M,checkForFile as j,parseJsonFile as de,parseError as S,readFilesInDirectory as Y,roundValue as T,chunkArray as ge,parsePercentage as me,compareArrays as _}from"@rent-scraper/utils";import{getZillowOutputPath as O,getRedfinOutputPath as k,getZillowDaysListed as pe,getRedfinDaysListed as fe,getRedfinZipCodes as he,getZillowZipCodes as $e}from"@rent-scraper/api/config";import{spinner as K,log as g,progress as ye,intro as Le,confirm as ve,isCancel as je,cancel as ze,outro as Me}from"@clack/prompts";import{setTimeout as V}from"node:timers/promises";import{checkBrowserServer as Q}from"@rent-scraper/utils/config";import Ze from"picocolors";const B=process.env.DEBUG,U=async(e,t,s,i)=>{const{timeoutMs:l}=i??{};if(await j(s))B&&g.warn(`${s} exists, skipping`);else try{B&&g.info(`writing ${s}`);const n=e==="redfin"?await ae(t):await ne(t,{timeoutMs:l});await D(`${s}`,n)}catch(n){M(`error fetching html for ${t}`,n)}},be=async(e,{timeoutMs:t})=>{if(await j(e)){const s=JSON.parse(await E(e,"utf8"))||{},{hdpUrl:i}=s||{};if(i){const l=`https://www.zillow.com${i}`,n=e.replace(".json",".html");await j(n)?B&&g.warn(`file already exists, ${n}`):await U("zillow",l,n,{timeoutMs:t})}else M(`file is empty, ${e}`)}else B&&g.warn(`file does not exist at this path, ${e}, skipping`)},W=async(e,t)=>{const{timeoutMs:s}=t??{};await Promise.all(e.map(async i=>await be(i,{timeoutMs:s})))},xe=(e,t="zillow")=>{if(t==="zillow"){const{zpid:s,detailUrl:i}=e||{};return{id:s,url:i}}else if(t==="redfin"){const{propertyId:s,url:i}=e?.homeData||{};return{id:s,url:`https://www.redfin.com${i}`}}},q=async(e,t,s,i=s,l)=>{const{timeoutMs:n,run:r=1,reruns:o=0}=l??{},u=new P;s||M("inputDirectory is required"),e==="zillow"&&await H();const d=[],y=K();y.start("Downloading listings html files");for(let w=1;w<=o+1;w++){o>0&&w>1&&u.add(`rerun ${w-1} of ${o}`);const $=d.length;(w===1||$)&&await Promise.all(($?d:t).map(async h=>{const L=`${s}/${h}.json`;if(await j(L))try{const{results:v}=await de(L)||{};if(!v)u.add(`empty file, ${L}`);else{const Z=i?`${i}/${h}`:`${s}/${h}`;await z(Z,{recursive:!0}),v?.length?await Promise.all(v.map(async F=>{try{const{id:x,url:C}=xe(F,e)??{};if(!C)return u.add(`url missing for ${x}`);const R=`${x}.html`,ie=`${Z}/${R}`;await U(e,C,ie,{timeoutMs:n})}catch(x){const{message:C}=S(x);u.add("scrape listing html error: "+(C??`error fetching listing for id, ${x}`))}})):u.add(`no results for file, ${L}`)}}catch(v){d.push(h);const{message:Z}=S(v);u.add("scrape listing html error: "+(Z??`error reading json data, ${L}, ${v}`))}else B&&g.warn(`file does not exist, ${L}, skipping`)}))}y.stop("Listings HTML files have been saved to:");const f=await O(),m=await k(),c=e==="zillow"?f:m;g.message(a.join(c,e,"listings",a.basename(s)));const p=a.join(c,e,"logs");if(await z(p,{recursive:!0}),u.get().filter(w=>!w.includes("rerun ")).length>o){const w=`${a.basename(s)}-html-errors-${r}.txt`,$=a.join(p,w);await u.write($,[...new Set(u.get())].join(`
|
|
2
|
-
`)),B&&g.error(`There were errors during processing, see ${a.resolve($)}`)}},De=async(e,t,s,i)=>{const{timeoutMs:l,run:n=1,reruns:r=0}=i??{},o=new P;s||M("inputDirectory is required"),e==="zillow"&&await H();const u=[];for(let c=1;c<=r+1;c++){r>0&&c>1&&o.add(`rerun ${c-1} of ${r}`);const p=u.length;(c===1||p)&&await Promise.all((p?u:t).map(async w=>{const $=`${s}/${w}`;if(await j($))try{const h=await Y($,{extension:".json",prependDirectory:!0});await W(h,{timeoutMs:l})}catch(h){u.push(w);const{message:L}=S(h);o.add("scrape listing html error: "+(L??`Error during fetch for ${w}, ${h}`))}else o.add(`listing directory does not exist, ${$}`)}))}const d=await O(),y=await k(),f=e==="zillow"?d:y,m=a.join(f,e,"logs");if(await z(m,{recursive:!0}),o.get().filter(c=>!c.includes("rerun ")).length>r){const c=`${a.basename(s)}-html-errors-${n}.txt`,p=a.join(m,c);await o.write(p,[...new Set(o.get())].join(`
|
|
3
|
-
`)),B&&g.error(`There were errors during processing, see ${a.resolve(p)}`)}},Ce=async(e,t,s=t)=>{const i=new P;if(e==="zillow"&&await H(),t||M("inputDirectory is required"),await z(s,{recursive:!0}),await j(t))try{const y=await Y(t,{extension:".json",prependDirectory:!0});await W(y)}catch(y){const{message:f}=S(y);i.add("scrape listing html error: "+(f??`Error during fetch for ${t}, ${y}`))}else i.add(`inputDirectory does not exist, ${t}`);const l=await O(),n=await k(),r=e==="zillow"?l:n,o=a.join(r,e,"logs");await z(o,{recursive:!0});const u=`${a.basename(t)}-html-errors.txt`,d=a.join(o,u);i.get().length>0&&(await i.write(d,[...new Set(i.get())].join(`
|
|
4
|
-
`)),B&&g.error(`There were errors during processing, see ${a.resolve(d)}`))},Se=(e,t)=>e==="redfin"?`https://www.redfin.com/home/${t}`:e==="zillow"?`https://www.zillow.com/homedetails/${t}_zpid`:null,Be=async(e,t,s)=>{e==="zillow"&&await H(),s||M("outputDirectory is required"),await z(s,{recursive:!0}),await Promise.all(t.map(async i=>{const l=Se(e,i),n=`${s}/${i}.html`;l&&await U(e,l,n)}))},b=process.env.DEBUG,X=async e=>{if(await j(e)){const t=e.replace(".html",".json");if(await j(t))b&&g.warning(`file already exists, ${t}`);else{b&&g.message(`scraping data for ${e}`);const s=(await E(e)).toString(),i=await le(s);if(!i?.priceHistory&&i?.bestMatchedUnit?.hdpUrl){b&&g.warning(`rescraping ${e} - https://www.zillow.com${i?.bestMatchedUnit?.hdpUrl}`),await j(e)&&(b&&g.warning(`deleting ${e}`),await I(e)),await j(t)&&(b&&g.warning(`deleting ${t}`),await I(t));const l=`https://www.zillow.com${i?.bestMatchedUnit?.hdpUrl}`;try{await U("zillow",l,e),await X(e)}catch(n){const{message:r}=S(n);g.error(r)}}else i?(b&&g.info(`writing ${t}`),await D(`${t}`,JSON.stringify(i))):M(`problem scraping data for ${e}`)}}},Fe=async e=>{if(await j(e)){const t=e.replace(".html",".json");if(await j(t))b&&g.warning(`file already exists, ${t}`);else{b&&g.message(`scraping data for ${e}`);const s=(await E(e)).toString(),i=oe(s);i?(b&&g.warning(`writing ${t}`),await D(`${t}`,JSON.stringify(i))):M(`problem scraping data for ${e}`)}}},ee=async(e,t)=>{await Promise.all(t.map(async s=>e==="redfin"?await Fe(s):await X(s)))},J=async(e,t,s)=>{const i=new P;let l=0;s||M("inputDirectory is required");const n=ye({style:"heavy",max:100,size:50});n.start("Scraping listings data");const r=t.length,o=r<20?r+1:20,u=r>o?T(r/o,1):r,d=ge(t,u);for(const[p,w]of d.entries()){const $=T((Number(p)+1)/o*100,1);n.advance(T(1/o*100,1),`Scraping listings (${me($)})`),await Promise.all(w.map(async h=>{b&&g.message(`processing files for ${h}`);const L=`${s}/${h}`;if(await j(L)){const v=await Y(L,{extension:".html",prependDirectory:!0});l=l+v.length,await ee(e,v)}else i.add(`listing directory does not exist, ${L}`)}))}const y=await O(),f=await k(),m=e==="zillow"?y:f;n.stop("Listings data has been saved to:"),g.message(a.join(m,e,"listings",a.basename(s)));const c=a.join(m,e,"logs");if(await z(c,{recursive:!0}),i.get().length>0){const p=`${a.basename(s)}-listing-errors.txt`,w=a.join(c,p);await i.write(w,[...new Set(i.get())].join(`
|
|
5
|
-
`)),g.error(`There were errors during processing, see ${a.resolve(w)}`)}return{numListings:l}},N=process.env.DEBUG,He=async(e,t,s,i)=>{const{daysOnZillow:l,timeoutMs:n}=i??{};if(await j(t))N&&g.warning(`${e} exists, skipping`),s.push(e);else{const r=await ce({zipCode:e,daysOnZillow:l,mergePageResults:!0,timeoutMs:n});r?(N&&g.info(`writing ${t}`),await D(t,JSON.stringify(r)),s.push(e)):N&&g.warning(`no results for ${e}`)}},te=async(e,t,s)=>{const{daysListed:i,timeoutMs:l,run:n=1,reruns:r=0,fetchListings:o=!1,skipBotCheck:u=!1}=s??{},d=new P;u||await H({fetchListings:o});const y=[],f=[];await z(t,{recursive:!0});const m=K();m.start("Scraping Zillow search results");for(let w=1;w<=r+1;w++){r>0&&w>1&&d.add(`rerun ${w-1} of ${r}`);const $=f.length;(w===1||$)&&await Promise.all(($?f:e).map(async h=>{const L=`${h}.json`,v=`${t}/${L}`;try{await He(h,v,y,{daysOnZillow:i,timeoutMs:l})}catch(Z){f.push(h);const{message:F}=S(Z);d.add("scrape listing results error: "+(F??`Error during fetch for ${h}, ${Z}`))}}))}m.stop("Zillow search results have been saved to:");const c=await O();g.message(a.join(c,"zillow","results",a.basename(t)));const p=a.join(c,"zillow","logs");if(await z(p,{recursive:!0}),d.get().filter(w=>!w.includes("rerun ")).length>r){const w=`${a.basename(t)}-results-errors-${n}.txt`,$=a.join(p,w);await d.write($,[...new Set(d.get())].join(`
|
|
6
|
-
`)),g.error(`There were errors during processing, see ${a.resolve($)}`)}return{validZipCodes:y}},Pe=async(e,t,s,i)=>{const{daysListed:l,timeoutMs:n}=i??{};if(await j(t))console.log(`${e} exists, skipping`),s.push(e);else{const r=await we({zipCode:e,daysListed:l,timeoutMs:n});r?(console.log(`writing ${t}`),await D(t,JSON.stringify(r)),s.push(e)):console.log(`no results for ${e}`)}},se=async(e,t,s)=>{const{daysListed:i,timeoutMs:l,run:n=1,reruns:r=0}=s??{},o=new P,u=[],d=[];await z(t,{recursive:!0});for(let m=1;m<=r+1;m++){r>0&&m>1&&o.add(`rerun ${m-1} of ${r}`);const c=d.length;(m===1||c)&&await Promise.all((c?d:e).map(async p=>{const w=`${p}.json`,$=`${t}/${w}`;try{await Pe(p,$,u,{daysListed:i,timeoutMs:l})}catch(h){d.push(p);const{message:L}=S(h);o.add("scrape listing results error: "+(L??`Error during fetch for ${p}, ${h}`))}}))}const y=await k(),f=a.join(y,"redfin","logs");if(await z(f,{recursive:!0}),o.get().filter(m=>!m.includes("rerun ")).length>r){const m=`${a.basename(t)}-results-errors-${n}.txt`,c=a.join(f,m);await o.write(c,[...new Set(o.get())].join(`
|
|
7
|
-
`)),console.log(`\x1B[41m
|
|
8
|
-
%s\x1B[0m`,`There were errors during processing, see ${a.resolve(c)}`)}return{validZipCodes:u}},Oe=async()=>{await G.post("http://localhost:8082/browser/close")},ke=async()=>{await G.post("http://localhost:8082/server/shutdown")},Re=async(e,t,s,{daysListed:i,timeoutMs:l,run:n,reruns:r})=>{try{await H()}catch(d){const{message:y}=S(d);g.error(y);const f=await ve({message:"You need to complete a captcha in your browser. Press Return to launch your browser and continue.",active:"OK",inactive:"Cancel"});if(je(f)||!f)return ze("Create config canceled. Please try again."),process.exit(1);await V(1e3),Me("Browser Launching..."),await V(1e3),await ue()}await Oe();const{validZipCodes:o}=await te(e,t,{daysListed:i,timeoutMs:l,run:n,reruns:r});await q("zillow",o,t,s,{timeoutMs:l,run:n,reruns:r});const{numListings:u}=await J("zillow",o,s);return{numListings:u,validZipCodes:o}},Ue=async(e,t,s,{daysListed:i,timeoutMs:l,run:n,reruns:r})=>{const{validZipCodes:o}=await se(e,t,{daysListed:i,timeoutMs:l,run:n,reruns:r});await q("redfin",o,t,s,{timeoutMs:l,run:n,reruns:r});const{numListings:u}=await J("redfin",o,s);return{numListings:u,validZipCodes:o}};async function Ee(){Le(Ze.inverse(" scrape listings "));const e=re(process.argv.slice(2)),t=e.source??"zillow";t==="zillow"&&(await Q()||M("Please launch the browser server before scraping."));const s=await O(),i=await k(),l=t==="zillow"?s:i,n=e["days-listed"]??(t==="zillow"?await pe():await fe())??1,r=e.runs??1,o=e.reruns??0,u=e["timeout-ms"]??6e4,d=e["results-directory"]??a.join(l,t,"results",`${A().format("YYYY-MM-DD-HHmm")}`),y=e["listings-directory"]??a.join(l,t,"listings",`${A().format("YYYY-MM-DD-HHmm")}`),f=e["logs-directory"]??a.join(l,t,"logs");if(t==="redfin"){const m=await he();for(let c=1;c<=r;c++){m||M("zip codes required, please run the createConfig script");const p=m,{numListings:w,validZipCodes:$}=await Ue(p,d,y,{daysListed:n,timeoutMs:u,run:c,reruns:o});if(c===r){const h=Object.entries({numListings:w}).map(([C,R])=>`${C}: ${R}`).join(`
|
|
9
|
-
`),L=_(p,$).join(`
|
|
10
|
-
`);await z(f,{recursive:!0});const v=`${a.basename(d)}-scraping-results.txt`,Z=a.join(f,v),F=`${a.basename(d)}-invalid-zipcodes.txt`,x=a.join(f,F);await D(Z,h),await D(x,L)}}}else if(t==="zillow"){const m=await $e();for(let c=1;c<=r;c++){m||M("zip codes required, please run the createConfig script");const p=m,{numListings:w,validZipCodes:$}=await Re(p,d,y,{daysListed:n,timeoutMs:u,run:c,reruns:o});if(c===r){const h=Object.entries({numListings:w}).map(([C,R])=>`${C}: ${R}`).join(`
|
|
11
|
-
`),L=_(p,$).join(`
|
|
12
|
-
`);await z(f,{recursive:!0});const v=`${a.basename(d)}-scraping-results.txt`,Z=a.join(f,v),F=`${a.basename(d)}-invalid-zipcodes.txt`,x=a.join(f,F);await D(Z,h),await D(x,L)}}}g.success("Scraping complete!"),(t==="zillow"||t==="redfin")&&await Q()&&await ke()}export{J as a,q as b,De as c,Ce as d,Be as e,U as f,se as g,te as h,Ee as r,ee as s};
|