@rent-scraper/scrape-listings 1.0.11 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- import{parseError as t}from"@rent-scraper/utils";import{r as i}from"../shared/scrape-listings.B6jI3DWZ.mjs";import{log as m}from"@clack/prompts";import"minimist";import"dayjs";import"path";import"axios";import"fs/promises";import"@rent-scraper/api";import"@rent-scraper/api/config";import"node:timers/promises";import"@rent-scraper/utils/config";import"picocolors";i().then(()=>{process.exit(0)}).catch(r=>{const{message:o}=t(r);m.error(o),process.exit(1)});
1
+ import{parseError as t}from"@rent-scraper/utils";import{r as i}from"../shared/scrape-listings.mP1uY07i.mjs";import{log as m}from"@clack/prompts";import"minimist";import"dayjs";import"path";import"axios";import"fs/promises";import"@rent-scraper/api";import"@rent-scraper/api/config";import"node:timers/promises";import"@rent-scraper/utils/config";import"picocolors";i().then(()=>{process.exit(0)}).catch(r=>{const{message:o}=t(r);m.error(o),process.exit(1)});
package/dist/index.mjs CHANGED
@@ -1 +1 @@
1
- export{f as fetchListingHtmlByUrlAndExport,r as runScrapeListings,s as scrapeListingDetailsFromHtmlByFilePaths,a as scrapeListingDetailsFromHtmlByZipCodes,e as scrapeListingHtmlByIds,d as scrapeListingHtmlByInputDirectory,b as scrapeListingHtmlByZipCodes,c as scrapeListingHtmlByZipCodesAndListingDetails,g as scrapeRedfinListingResultsByZipCodes,h as scrapeZillowListingResultsByZipCodes}from"./shared/scrape-listings.B6jI3DWZ.mjs";import"minimist";import"dayjs";import"path";import"axios";import"fs/promises";import"@rent-scraper/api";import"@rent-scraper/utils";import"@rent-scraper/api/config";import"@clack/prompts";import"node:timers/promises";import"@rent-scraper/utils/config";import"picocolors";
1
+ export{f as fetchListingHtmlByUrlAndExport,r as runScrapeListings,s as scrapeListingDetailsFromHtmlByFilePaths,a as scrapeListingDetailsFromHtmlByZipCodes,e as scrapeListingHtmlByIds,d as scrapeListingHtmlByInputDirectory,b as scrapeListingHtmlByZipCodes,c as scrapeListingHtmlByZipCodesAndListingDetails,g as scrapeRedfinListingResultsByZipCodes,h as scrapeZillowListingResultsByZipCodes}from"./shared/scrape-listings.mP1uY07i.mjs";import"minimist";import"dayjs";import"path";import"axios";import"fs/promises";import"@rent-scraper/api";import"@rent-scraper/utils";import"@rent-scraper/api/config";import"@clack/prompts";import"node:timers/promises";import"@rent-scraper/utils/config";import"picocolors";
@@ -1,12 +1,12 @@
1
- import re from"minimist";import I from"dayjs";import a from"path";import A from"axios";import{mkdir as z,writeFile as D,readFile as E,unlink as G}from"fs/promises";import{checkForZillowBotFiltering as H,fetchHtmlFromRedfinListingUrl as ne,fetchHtmlFromZillowListingUrl as oe,scrapeDataFromRedfinListingHtml as le,scrapeDataFromZillowListingHtml as ce,getZillowListingResults as we,getRedfinListingResults as de,waitForSolvedZillowCaptcha as ue}from"@rent-scraper/api";import{ErrorLog as B,throwError as M,checkForFile as m,parseJsonFile as ge,parseError as S,readFilesInDirectory as Y,roundValue as T,chunkArray as me,parsePercentage as pe,compareArrays as _}from"@rent-scraper/utils";import{getZillowOutputPath as F,getRedfinOutputPath as R,getZillowDaysListed as fe,getRedfinDaysListed as $e,getRedfinZipCodes as he,getZillowZipCodes as ye}from"@rent-scraper/api/config";import{spinner as K,log as g,progress as Le,intro as ve,confirm as je,isCancel as ze,cancel as Me,outro as Ze}from"@clack/prompts";import{setTimeout as V}from"node:timers/promises";import{checkBrowserServer as be}from"@rent-scraper/utils/config";import xe from"picocolors";const P=process.env.DEBUG,k=async(e,t,i,s)=>{const{timeoutMs:o}=s??{};if(await m(i))P&&g.warn(`${i} exists, skipping`);else try{P&&g.info(`writing ${i}`);const r=e==="redfin"?await ne(t):await oe(t,{timeoutMs:o});await D(`${i}`,r)}catch(r){M(`error fetching html for ${t}`,r)}},De=async(e,{timeoutMs:t})=>{if(await m(e)){const i=JSON.parse(await E(e,"utf8"))||{},{hdpUrl:s}=i||{};if(s){const o=`https://www.zillow.com${s}`,r=e.replace(".json",".html");await m(r)?P&&g.warn(`file already exists, ${r}`):await k("zillow",o,r,{timeoutMs:t})}else M(`file is empty, ${e}`)}else P&&g.warn(`file does not exist at this path, ${e}, skipping`)},Q=async(e,t)=>{const{timeoutMs:i}=t??{};await Promise.all(e.map(async s=>await De(s,{timeoutMs:i})))},Ce=(e,t="zillow")=>{if(t==="zillow"){const{zpid:i,detailUrl:s}=e||{};return{id:i,url:s}}else if(t==="redfin"){const{propertyId:i,url:s}=e?.homeData||{};return{id:i,url:`https://www.redfin.com${s}`}}},q=async(e,t,i,s=i,o)=>{const{timeoutMs:r,run:c=1,reruns:n=0}=o??{},l=new B;i||M("inputDirectory is required"),e==="zillow"&&await H();const p=[],L=p.length,f=K();f.start("Downloading listings html files");for(let h=1;h<=n+1;h++)l.add(`rerun ${h} of ${n}`),(h===1||L)&&await Promise.all((L?p:t).map(async y=>{const v=`${i}/${y}.json`;if(await m(v))try{const{results:j}=await ge(v)||{};if(!j)l.add(`empty file, ${v}`);else{const b=s?`${s}/${y}`:`${i}/${y}`;await m(b)||await z(b,{recursive:!0}),j?.length?await Promise.all(await j.map(async O=>{try{const{id:x,url:C}=Ce(O,e)??{};if(!C)return l.add(`url missing for ${x}`);const U=`${x}.html`,ae=`${b}/${U}`;await k(e,C,ae,{timeoutMs:r})}catch(x){const{message:C}=S(x);l.add(C??`error fetching listing for id, ${x}`)}})):l.add(`no results for file, ${v}`)}}catch(j){p.push(y);const{message:b}=S(j);l.add(b??`error reading json data, ${v}, ${j}`)}else P&&g.warn(`file does not exist, ${v}, skipping`)}));f.stop("Listings HTML files have been saved to:");const $=await F(),d=await R(),w=e==="zillow"?$:d;g.message(a.join(w,e,"listings",a.basename(i)));const u=a.join(w,e,"logs");if(await m(u)||await z(u,{recursive:!0}),l.get().filter(h=>!h.includes("rerun ")).length>n){const h=`${a.basename(i)}-html-errors-${c}.txt`,y=a.join(u,h);await l.write(y,[...new Set(l.get())].join(`
2
- `)),P&&g.error(`There were errors during processing, see ${a.resolve(y)}`)}},Se=async(e,t,i,s)=>{const{timeoutMs:o,run:r=1,reruns:c=0}=s??{},n=new B;i||M("inputDirectory is required"),e==="zillow"&&await H();const l=[],p=l.length;for(let w=1;w<=c+1;w++)n.add(`rerun ${w} of ${c}`),(w===1||p)&&await Promise.all((p?l:t).map(async u=>{const h=`${i}/${u}`;if(await m(h))try{const y=await Y(h,{extension:".json",prependDirectory:!0});await Q(y,{timeoutMs:o})}catch(y){l.push(u);const{message:v}=S(y);n.add(v??`Error during fetch for ${u}, ${y}`)}else n.add(`listing directory does not exist, ${h}`)}));const L=await F(),f=await R(),$=e==="zillow"?L:f,d=a.join($,e,"logs");if(await m(d)||await z(d,{recursive:!0}),n.get().filter(w=>!w.includes("rerun ")).length>c){const w=`${a.basename(i)}-html-errors-${r}.txt`,u=a.join(d,w);await n.write(u,[...new Set(n.get())].join(`
3
- `)),P&&g.error(`There were errors during processing, see ${a.resolve(u)}`)}},Pe=async(e,t,i=t)=>{const s=new B;if(e==="zillow"&&await H(),t||M("inputDirectory is required"),await m(i)||await z(i,{recursive:!0}),await m(t))try{const L=await Y(t,{extension:".json",prependDirectory:!0});await Q(L)}catch(L){const{message:f}=S(L);s.add(f??`Error during fetch for ${t}, ${L}`)}else s.add(`inputDirectory does not exist, ${t}`);const o=await F(),r=await R(),c=e==="zillow"?o:r,n=a.join(c,e,"logs");await m(n)||await z(n,{recursive:!0});const l=`${a.basename(t)}-html-errors.txt`,p=a.join(n,l);s.get().length>0&&(await s.write(p,[...new Set(s.get())].join(`
4
- `)),P&&g.error(`There were errors during processing, see ${a.resolve(p)}`))},He=(e,t)=>e==="redfin"?`https://www.redfin.com/home/${t}`:e==="zillow"?`https://www.zillow.com/homedetails/${t}_zpid`:null,Be=async(e,t,i)=>{e==="zillow"&&await H(),i||M("outputDirectory is required"),await m(i)||await z(i,{recursive:!0}),await Promise.all(t.map(async s=>{const o=He(e,s),r=`${i}/${s}.html`;o&&await k(e,o,r)}))},Z=process.env.DEBUG,W=async e=>{if(await m(e)){const t=e.replace(".html",".json");if(await m(t))Z&&g.warning(`file already exists, ${t}`);else{Z&&g.message(`scraping data for ${e}`);const i=(await E(e)).toString(),s=await ce(i);if(!s?.priceHistory&&s?.bestMatchedUnit?.hdpUrl){Z&&g.warning(`rescraping ${e} - https://www.zillow.com${s?.bestMatchedUnit?.hdpUrl}`),await m(e)&&(Z&&g.warning(`deleting ${e}`),await G(e)),await m(t)&&(Z&&g.warning(`deleting ${t}`),await G(t));const o=`https://www.zillow.com${s?.bestMatchedUnit?.hdpUrl}`;try{await k("zillow",o,e),await W(e)}catch(r){const{message:c}=S(r);g.error(c)}}else s?(Z&&g.info(`writing ${t}`),await D(`${t}`,JSON.stringify(s))):M(`problem scraping data for ${e}`)}}},Fe=async e=>{if(await m(e)){const t=e.replace(".html",".json");if(await m(t))Z&&g.warning(`file already exists, ${t}`);else{Z&&g.message(`scraping data for ${e}`);const i=(await E(e)).toString(),s=le(i);s?(Z&&g.warning(`writing ${t}`),await D(`${t}`,JSON.stringify(s))):M(`problem scraping data for ${e}`)}}},X=async(e,t)=>{await Promise.all(t.map(async i=>e==="redfin"?await Fe(i):await W(i)))},J=async(e,t,i)=>{const s=new B;let o=0;i||M("inputDirectory is required");const r=Le({style:"heavy",max:100,size:50});r.start("Scraping listings data");const c=20,n=t.length,l=n>c?T(n/c,1):n,p=me(t,l);for(const[w,u]of p.entries()){const h=T((Number(w)+1)/c*100,1);r.advance(T(1/c*100,1),`Scraping listings (${pe(h)})`),await Promise.all(u.map(async y=>{Z&&g.message(`processing files for ${y}`);const v=`${i}/${y}`;if(await m(v)){const j=await Y(v,{extension:".html",prependDirectory:!0});o=o+j.length,await X(e,j)}else s.add(`listing directory does not exist, ${v}`)}))}const L=await F(),f=await R(),$=e==="zillow"?L:f;r.stop("Listings data has been saved to:"),g.message(a.join($,e,"listings",a.basename(i)));const d=a.join($,e,"logs");if(await m(d)||await z(d,{recursive:!0}),s.get().length>0){const w=`${a.basename(i)}-listing-errors.txt`,u=a.join(d,w);await s.write(u,[...new Set(s.get())].join(`
5
- `)),g.error(`There were errors during processing, see ${a.resolve(u)}`)}return{numListings:o}},ee=[],N=process.env.DEBUG,Re=async(e,t,i)=>{const{daysOnZillow:s,timeoutMs:o}=i??{};if(await m(t))N&&g.warning(`${e} exists, skipping`);else{const r=await we({zipCode:e,daysOnZillow:s,mergePageResults:!0,timeoutMs:o});r?(N&&g.info(`writing ${t}`),await D(t,JSON.stringify(r)),ee.push(e)):N&&g.warning(`no results for ${e}`)}},te=async(e,t,i)=>{const{daysListed:s,timeoutMs:o,run:r=1,reruns:c=0,fetchListings:n=!1}=i??{},l=new B;await H({fetchListings:n});const p=[],L=p.length;await m(t)||await z(t,{recursive:!0});const f=K();f.start("Scraping Zillow search results");for(let w=1;w<=c+1;w++)l.add(`rerun ${w} of ${c}`),(w===1||L)&&await Promise.all((L?p:e).map(async u=>{const h=`${u}.json`,y=`${t}/${h}`;try{await Re(u,y,{daysOnZillow:s,timeoutMs:o})}catch(v){p.push(u);const{message:j}=S(v);l.add(j??`Error during fetch for ${u}, ${v}`)}}));f.stop("Zillow search results have been saved to:");const $=await F();g.message(a.join($,"zillow","results",a.basename(t)));const d=a.join($,"zillow","logs");if(await m(d)||await z(d,{recursive:!0}),l.get().filter(w=>!w.includes("rerun ")).length>c){const w=`${a.basename(t)}-results-errors-${r}.txt`,u=a.join(d,w);await l.write(u,[...new Set(l.get())].join(`
6
- `)),g.error(`There were errors during processing, see ${a.resolve(u)}`)}return{validZipCodes:ee}},ie=[],Oe=async(e,t,i)=>{const{daysListed:s,timeoutMs:o}=i??{};if(await m(t))console.log(`${e} exists, skipping`);else{const r=await de({zipCode:e,daysListed:s,timeoutMs:o});r?(console.log(`writing ${t}`),await D(t,JSON.stringify(r)),ie.push(e)):console.log(`no results for ${e}`)}},se=async(e,t,i)=>{const{daysListed:s,timeoutMs:o,run:r=1,reruns:c=0}=i??{},n=new B,l=[],p=l.length;await m(t)||await z(t,{recursive:!0});for(let $=1;$<=c+1;$++)n.add(`rerun ${$} of ${c}`),($===1||p)&&await Promise.all((p?l:e).map(async d=>{const w=`${d}.json`,u=`${t}/${w}`;try{await Oe(d,u,{daysListed:s,timeoutMs:o})}catch(h){l.push(d);const{message:y}=S(h);n.add(y??`Error during fetch for ${d}, ${h}`)}}));const L=await R(),f=a.join(L,"redfin","logs");if(await m(f)||await z(f,{recursive:!0}),n.get().filter($=>!$.includes("rerun ")).length>c){const $=`${a.basename(t)}-results-errors-${r}.txt`,d=a.join(f,$);await n.write(d,[...new Set(n.get())].join(`
1
+ import re from"minimist";import I from"dayjs";import a from"path";import A from"axios";import{mkdir as z,writeFile as D,readFile as E,unlink as G}from"fs/promises";import{checkForZillowBotFiltering as H,fetchHtmlFromRedfinListingUrl as ne,fetchHtmlFromZillowListingUrl as oe,scrapeDataFromRedfinListingHtml as le,scrapeDataFromZillowListingHtml as ce,getZillowListingResults as we,getRedfinListingResults as de,waitForSolvedZillowCaptcha as ue}from"@rent-scraper/api";import{ErrorLog as B,throwError as M,checkForFile as m,parseJsonFile as ge,parseError as S,readFilesInDirectory as Y,roundValue as T,chunkArray as me,parsePercentage as pe,compareArrays as _}from"@rent-scraper/utils";import{getZillowOutputPath as F,getRedfinOutputPath as R,getZillowDaysListed as fe,getRedfinDaysListed as $e,getRedfinZipCodes as he,getZillowZipCodes as ye}from"@rent-scraper/api/config";import{spinner as K,log as g,progress as Le,intro as ve,confirm as je,isCancel as ze,cancel as Me,outro as Ze}from"@clack/prompts";import{setTimeout as V}from"node:timers/promises";import{checkBrowserServer as be}from"@rent-scraper/utils/config";import xe from"picocolors";const P=process.env.DEBUG,k=async(e,t,i,s)=>{const{timeoutMs:o}=s??{};if(await m(i))P&&g.warn(`${i} exists, skipping`);else try{P&&g.info(`writing ${i}`);const r=e==="redfin"?await ne(t):await oe(t,{timeoutMs:o});await D(`${i}`,r)}catch(r){M(`error fetching html for ${t}`,r)}},De=async(e,{timeoutMs:t})=>{if(await m(e)){const i=JSON.parse(await E(e,"utf8"))||{},{hdpUrl:s}=i||{};if(s){const o=`https://www.zillow.com${s}`,r=e.replace(".json",".html");await m(r)?P&&g.warn(`file already exists, ${r}`):await k("zillow",o,r,{timeoutMs:t})}else M(`file is empty, ${e}`)}else P&&g.warn(`file does not exist at this path, ${e}, skipping`)},Q=async(e,t)=>{const{timeoutMs:i}=t??{};await Promise.all(e.map(async s=>await De(s,{timeoutMs:i})))},Ce=(e,t="zillow")=>{if(t==="zillow"){const{zpid:i,detailUrl:s}=e||{};return{id:i,url:s}}else if(t==="redfin"){const{propertyId:i,url:s}=e?.homeData||{};return{id:i,url:`https://www.redfin.com${s}`}}},q=async(e,t,i,s=i,o)=>{const{timeoutMs:r,run:l=1,reruns:n=0}=o??{},c=new B;i||M("inputDirectory is required"),e==="zillow"&&await H();const p=[],L=p.length,f=K();f.start("Downloading listings html files");for(let h=1;h<=n+1;h++)c.add(`rerun ${h} of ${n}`),(h===1||L)&&await Promise.all((L?p:t).map(async y=>{const v=`${i}/${y}.json`;if(await m(v))try{const{results:j}=await ge(v)||{};if(!j)c.add(`empty file, ${v}`);else{const b=s?`${s}/${y}`:`${i}/${y}`;await m(b)||await z(b,{recursive:!0}),j?.length?await Promise.all(await j.map(async O=>{try{const{id:x,url:C}=Ce(O,e)??{};if(!C)return c.add(`url missing for ${x}`);const U=`${x}.html`,ae=`${b}/${U}`;await k(e,C,ae,{timeoutMs:r})}catch(x){const{message:C}=S(x);c.add(C??`error fetching listing for id, ${x}`)}})):c.add(`no results for file, ${v}`)}}catch(j){p.push(y);const{message:b}=S(j);c.add(b??`error reading json data, ${v}, ${j}`)}else P&&g.warn(`file does not exist, ${v}, skipping`)}));f.stop("Listings HTML files have been saved to:");const $=await F(),d=await R(),w=e==="zillow"?$:d;g.message(a.join(w,e,"listings",a.basename(i)));const u=a.join(w,e,"logs");if(await m(u)||await z(u,{recursive:!0}),c.get().filter(h=>!h.includes("rerun ")).length>n){const h=`${a.basename(i)}-html-errors-${l}.txt`,y=a.join(u,h);await c.write(y,[...new Set(c.get())].join(`
2
+ `)),P&&g.error(`There were errors during processing, see ${a.resolve(y)}`)}},Se=async(e,t,i,s)=>{const{timeoutMs:o,run:r=1,reruns:l=0}=s??{},n=new B;i||M("inputDirectory is required"),e==="zillow"&&await H();const c=[],p=c.length;for(let w=1;w<=l+1;w++)n.add(`rerun ${w} of ${l}`),(w===1||p)&&await Promise.all((p?c:t).map(async u=>{const h=`${i}/${u}`;if(await m(h))try{const y=await Y(h,{extension:".json",prependDirectory:!0});await Q(y,{timeoutMs:o})}catch(y){c.push(u);const{message:v}=S(y);n.add(v??`Error during fetch for ${u}, ${y}`)}else n.add(`listing directory does not exist, ${h}`)}));const L=await F(),f=await R(),$=e==="zillow"?L:f,d=a.join($,e,"logs");if(await m(d)||await z(d,{recursive:!0}),n.get().filter(w=>!w.includes("rerun ")).length>l){const w=`${a.basename(i)}-html-errors-${r}.txt`,u=a.join(d,w);await n.write(u,[...new Set(n.get())].join(`
3
+ `)),P&&g.error(`There were errors during processing, see ${a.resolve(u)}`)}},Pe=async(e,t,i=t)=>{const s=new B;if(e==="zillow"&&await H(),t||M("inputDirectory is required"),await m(i)||await z(i,{recursive:!0}),await m(t))try{const L=await Y(t,{extension:".json",prependDirectory:!0});await Q(L)}catch(L){const{message:f}=S(L);s.add(f??`Error during fetch for ${t}, ${L}`)}else s.add(`inputDirectory does not exist, ${t}`);const o=await F(),r=await R(),l=e==="zillow"?o:r,n=a.join(l,e,"logs");await m(n)||await z(n,{recursive:!0});const c=`${a.basename(t)}-html-errors.txt`,p=a.join(n,c);s.get().length>0&&(await s.write(p,[...new Set(s.get())].join(`
4
+ `)),P&&g.error(`There were errors during processing, see ${a.resolve(p)}`))},He=(e,t)=>e==="redfin"?`https://www.redfin.com/home/${t}`:e==="zillow"?`https://www.zillow.com/homedetails/${t}_zpid`:null,Be=async(e,t,i)=>{e==="zillow"&&await H(),i||M("outputDirectory is required"),await m(i)||await z(i,{recursive:!0}),await Promise.all(t.map(async s=>{const o=He(e,s),r=`${i}/${s}.html`;o&&await k(e,o,r)}))},Z=process.env.DEBUG,W=async e=>{if(await m(e)){const t=e.replace(".html",".json");if(await m(t))Z&&g.warning(`file already exists, ${t}`);else{Z&&g.message(`scraping data for ${e}`);const i=(await E(e)).toString(),s=await ce(i);if(!s?.priceHistory&&s?.bestMatchedUnit?.hdpUrl){Z&&g.warning(`rescraping ${e} - https://www.zillow.com${s?.bestMatchedUnit?.hdpUrl}`),await m(e)&&(Z&&g.warning(`deleting ${e}`),await G(e)),await m(t)&&(Z&&g.warning(`deleting ${t}`),await G(t));const o=`https://www.zillow.com${s?.bestMatchedUnit?.hdpUrl}`;try{await k("zillow",o,e),await W(e)}catch(r){const{message:l}=S(r);g.error(l)}}else s?(Z&&g.info(`writing ${t}`),await D(`${t}`,JSON.stringify(s))):M(`problem scraping data for ${e}`)}}},Fe=async e=>{if(await m(e)){const t=e.replace(".html",".json");if(await m(t))Z&&g.warning(`file already exists, ${t}`);else{Z&&g.message(`scraping data for ${e}`);const i=(await E(e)).toString(),s=le(i);s?(Z&&g.warning(`writing ${t}`),await D(`${t}`,JSON.stringify(s))):M(`problem scraping data for ${e}`)}}},X=async(e,t)=>{await Promise.all(t.map(async i=>e==="redfin"?await Fe(i):await W(i)))},J=async(e,t,i)=>{const s=new B;let o=0;i||M("inputDirectory is required");const r=Le({style:"heavy",max:100,size:50});r.start("Scraping listings data");const l=t.length,n=l<20?l+1:20,c=l>n?T(l/n,1):l,p=me(t,c);for(const[w,u]of p.entries()){const h=T((Number(w)+1)/n*100,1);r.advance(T(1/n*100,1),`Scraping listings (${pe(h)})`),await Promise.all(u.map(async y=>{Z&&g.message(`processing files for ${y}`);const v=`${i}/${y}`;if(await m(v)){const j=await Y(v,{extension:".html",prependDirectory:!0});o=o+j.length,await X(e,j)}else s.add(`listing directory does not exist, ${v}`)}))}const L=await F(),f=await R(),$=e==="zillow"?L:f;r.stop("Listings data has been saved to:"),g.message(a.join($,e,"listings",a.basename(i)));const d=a.join($,e,"logs");if(await m(d)||await z(d,{recursive:!0}),s.get().length>0){const w=`${a.basename(i)}-listing-errors.txt`,u=a.join(d,w);await s.write(u,[...new Set(s.get())].join(`
5
+ `)),g.error(`There were errors during processing, see ${a.resolve(u)}`)}return{numListings:o}},ee=[],N=process.env.DEBUG,Re=async(e,t,i)=>{const{daysOnZillow:s,timeoutMs:o}=i??{};if(await m(t))N&&g.warning(`${e} exists, skipping`);else{const r=await we({zipCode:e,daysOnZillow:s,mergePageResults:!0,timeoutMs:o});r?(N&&g.info(`writing ${t}`),await D(t,JSON.stringify(r)),ee.push(e)):N&&g.warning(`no results for ${e}`)}},te=async(e,t,i)=>{const{daysListed:s,timeoutMs:o,run:r=1,reruns:l=0,fetchListings:n=!1}=i??{},c=new B;await H({fetchListings:n});const p=[],L=p.length;await m(t)||await z(t,{recursive:!0});const f=K();f.start("Scraping Zillow search results");for(let w=1;w<=l+1;w++)c.add(`rerun ${w} of ${l}`),(w===1||L)&&await Promise.all((L?p:e).map(async u=>{const h=`${u}.json`,y=`${t}/${h}`;try{await Re(u,y,{daysOnZillow:s,timeoutMs:o})}catch(v){p.push(u);const{message:j}=S(v);c.add(j??`Error during fetch for ${u}, ${v}`)}}));f.stop("Zillow search results have been saved to:");const $=await F();g.message(a.join($,"zillow","results",a.basename(t)));const d=a.join($,"zillow","logs");if(await m(d)||await z(d,{recursive:!0}),c.get().filter(w=>!w.includes("rerun ")).length>l){const w=`${a.basename(t)}-results-errors-${r}.txt`,u=a.join(d,w);await c.write(u,[...new Set(c.get())].join(`
6
+ `)),g.error(`There were errors during processing, see ${a.resolve(u)}`)}return{validZipCodes:ee}},ie=[],Oe=async(e,t,i)=>{const{daysListed:s,timeoutMs:o}=i??{};if(await m(t))console.log(`${e} exists, skipping`);else{const r=await de({zipCode:e,daysListed:s,timeoutMs:o});r?(console.log(`writing ${t}`),await D(t,JSON.stringify(r)),ie.push(e)):console.log(`no results for ${e}`)}},se=async(e,t,i)=>{const{daysListed:s,timeoutMs:o,run:r=1,reruns:l=0}=i??{},n=new B,c=[],p=c.length;await m(t)||await z(t,{recursive:!0});for(let $=1;$<=l+1;$++)n.add(`rerun ${$} of ${l}`),($===1||p)&&await Promise.all((p?c:e).map(async d=>{const w=`${d}.json`,u=`${t}/${w}`;try{await Oe(d,u,{daysListed:s,timeoutMs:o})}catch(h){c.push(d);const{message:y}=S(h);n.add(y??`Error during fetch for ${d}, ${h}`)}}));const L=await R(),f=a.join(L,"redfin","logs");if(await m(f)||await z(f,{recursive:!0}),n.get().filter($=>!$.includes("rerun ")).length>l){const $=`${a.basename(t)}-results-errors-${r}.txt`,d=a.join(f,$);await n.write(d,[...new Set(n.get())].join(`
7
7
  `)),console.log(`\x1B[41m
8
- %s\x1B[0m`,`There were errors during processing, see ${a.resolve(d)}`)}return{validZipCodes:ie}},Ue=async()=>{await A.post("http://localhost:8082/browser/close")},ke=async()=>{await A.post("http://localhost:8082/server/shutdown")},Ee=async(e,t,i,{daysListed:s,timeoutMs:o,run:r,reruns:c})=>{try{await H()}catch(p){const{message:L}=S(p);g.error(L);const f=await je({message:"You need to complete a captcha in your browser. Press Return to launch your browser and continue.",active:"OK",inactive:"Cancel"});if(ze(f)||!f)return Me("Create config canceled. Please try again."),process.exit(1);await V(1e3),Ze("Browser Launching..."),await V(1e3),await ue()}await Ue();const{validZipCodes:n}=await te(e,t,{daysListed:s,timeoutMs:o,run:r,reruns:c});await q("zillow",n,t,i,{timeoutMs:o,run:r,reruns:c});const{numListings:l}=await J("zillow",n,i);return{numListings:l,validZipCodes:n}},Ye=async(e,t,i,{daysListed:s,timeoutMs:o,run:r,reruns:c})=>{const{validZipCodes:n}=await se(e,t,{daysListed:s,timeoutMs:o,run:r,reruns:c});await q("redfin",n,t,i,{timeoutMs:o,run:r,reruns:c});const{numListings:l}=await J("redfin",n,i);return{numListings:l,validZipCodes:n}};async function Te(){ve(xe.inverse(" scrape listings "));const e=re(process.argv.slice(2)),t=e.source??"zillow";t==="zillow"&&(await be()||M("Please launch the browser server before scraping."));const i=await F(),s=await R(),o=t==="zillow"?i:s,r=e["days-listed"]??(t==="zillow"?await fe():await $e())??1,c=e.runs??1,n=e.reruns??0,l=e["timeout-ms"]??6e4,p=e["results-directory"]??a.join(o,t,"results",`${I().format("YYYY-MM-DD-HHmm")}`),L=e["listings-directory"]??a.join(o,t,"listings",`${I().format("YYYY-MM-DD-HHmm")}`),f=e["logs-directory"]??a.join(o,t,"logs");if(t==="redfin"){const $=await he();for(let d=1;d<=c;d++){if(!$)return M("zip codes required, please run the createConfig script");const w=$,{numListings:u,validZipCodes:h}=await Ye(w,p,L,{daysListed:r,timeoutMs:l,run:d,reruns:n});if(d===c){const y=Object.entries({numListings:u}).map(([C,U])=>`${C}: ${U}`).join(`
8
+ %s\x1B[0m`,`There were errors during processing, see ${a.resolve(d)}`)}return{validZipCodes:ie}},Ue=async()=>{await A.post("http://localhost:8082/browser/close")},ke=async()=>{await A.post("http://localhost:8082/server/shutdown")},Ee=async(e,t,i,{daysListed:s,timeoutMs:o,run:r,reruns:l})=>{try{await H()}catch(p){const{message:L}=S(p);g.error(L);const f=await je({message:"You need to complete a captcha in your browser. Press Return to launch your browser and continue.",active:"OK",inactive:"Cancel"});if(ze(f)||!f)return Me("Create config canceled. Please try again."),process.exit(1);await V(1e3),Ze("Browser Launching..."),await V(1e3),await ue()}await Ue();const{validZipCodes:n}=await te(e,t,{daysListed:s,timeoutMs:o,run:r,reruns:l});await q("zillow",n,t,i,{timeoutMs:o,run:r,reruns:l});const{numListings:c}=await J("zillow",n,i);return{numListings:c,validZipCodes:n}},Ye=async(e,t,i,{daysListed:s,timeoutMs:o,run:r,reruns:l})=>{const{validZipCodes:n}=await se(e,t,{daysListed:s,timeoutMs:o,run:r,reruns:l});await q("redfin",n,t,i,{timeoutMs:o,run:r,reruns:l});const{numListings:c}=await J("redfin",n,i);return{numListings:c,validZipCodes:n}};async function Te(){ve(xe.inverse(" scrape listings "));const e=re(process.argv.slice(2)),t=e.source??"zillow";t==="zillow"&&(await be()||M("Please launch the browser server before scraping."));const i=await F(),s=await R(),o=t==="zillow"?i:s,r=e["days-listed"]??(t==="zillow"?await fe():await $e())??1,l=e.runs??1,n=e.reruns??0,c=e["timeout-ms"]??6e4,p=e["results-directory"]??a.join(o,t,"results",`${I().format("YYYY-MM-DD-HHmm")}`),L=e["listings-directory"]??a.join(o,t,"listings",`${I().format("YYYY-MM-DD-HHmm")}`),f=e["logs-directory"]??a.join(o,t,"logs");if(t==="redfin"){const $=await he();for(let d=1;d<=l;d++){if(!$)return M("zip codes required, please run the createConfig script");const w=$,{numListings:u,validZipCodes:h}=await Ye(w,p,L,{daysListed:r,timeoutMs:c,run:d,reruns:n});if(d===l){const y=Object.entries({numListings:u}).map(([C,U])=>`${C}: ${U}`).join(`
9
9
  `),v=_(w,h).join(`
10
- `);await m(f)||await z(f,{recursive:!0});const j=`${a.basename(p)}-scraping-results.txt`,b=a.join(f,j),O=`${a.basename(p)}-invalid-zipcodes.txt`,x=a.join(f,O);await D(b,y),await D(x,v)}}}else if(t==="zillow"){const $=await ye();for(let d=1;d<=c;d++){if(!$)return M("zip codes required, please run the createConfig script");const w=$,{numListings:u,validZipCodes:h}=await Ee(w,p,L,{daysListed:r,timeoutMs:l,run:d,reruns:n});if(d===c){const y=Object.entries({numListings:u}).map(([C,U])=>`${C}: ${U}`).join(`
10
+ `);await m(f)||await z(f,{recursive:!0});const j=`${a.basename(p)}-scraping-results.txt`,b=a.join(f,j),O=`${a.basename(p)}-invalid-zipcodes.txt`,x=a.join(f,O);await D(b,y),await D(x,v)}}}else if(t==="zillow"){const $=await ye();for(let d=1;d<=l;d++){if(!$)return M("zip codes required, please run the createConfig script");const w=$,{numListings:u,validZipCodes:h}=await Ee(w,p,L,{daysListed:r,timeoutMs:c,run:d,reruns:n});if(d===l){const y=Object.entries({numListings:u}).map(([C,U])=>`${C}: ${U}`).join(`
11
11
  `),v=_(w,h).join(`
12
12
  `);await m(f)||await z(f,{recursive:!0});const j=`${a.basename(p)}-scraping-results.txt`,b=a.join(f,j),O=`${a.basename(p)}-invalid-zipcodes.txt`,x=a.join(f,O);await D(b,y),await D(x,v)}}}g.success("Scraping complete!"),t==="zillow"&&await ke()}export{J as a,q as b,Se as c,Pe as d,Be as e,k as f,se as g,te as h,Te as r,X as s};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rent-scraper/scrape-listings",
3
- "version": "1.0.11",
3
+ "version": "1.0.12",
4
4
  "type": "module",
5
5
  "exports": {
6
6
  ".": {
@@ -30,8 +30,8 @@
30
30
  "dayjs": "^1.11.13",
31
31
  "minimist": "^1.2.8",
32
32
  "picocolors": "^1.1.1",
33
- "@rent-scraper/api": "1.0.11",
34
- "@rent-scraper/utils": "1.0.11"
33
+ "@rent-scraper/api": "1.0.12",
34
+ "@rent-scraper/utils": "1.0.12"
35
35
  },
36
36
  "devDependencies": {
37
37
  "@types/minimist": "^1.2.5",