@rent-scraper/browser-server 1.0.28 → 1.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
1
  #!/usr/bin/env node
2
- import{r}from"../shared/browser-server.C-q4WCe-.mjs";import"express";import"puppeteer";import"fs/promises";import"child_process";import"os";import"path";import"@rent-scraper/utils/config";import"@rent-scraper/utils";r();
2
+ import{h as r}from"../shared/browser-server.Be38x_p5.mjs";import"express";import"puppeteer";import"fs/promises";import"child_process";import"os";import"path";import"@rent-scraper/utils/config";import"@rent-scraper/utils";r();
package/dist/index.d.mts CHANGED
@@ -13,6 +13,9 @@ declare const closeBrowser: () => Promise<void>;
13
13
  declare const launchBrowser: (source?: ListingsSource) => Promise<{
14
14
  status: string;
15
15
  }>;
16
+ declare const getBrowserStatus: () => Promise<{
17
+ status: string;
18
+ }>;
16
19
  declare const openBrowser: (url: string) => Promise<{
17
20
  status: string;
18
21
  }>;
@@ -22,14 +25,11 @@ declare const shutdownBrowser: () => Promise<{
22
25
 
23
26
  declare function runConfirmBrowserLaunch(): Promise<undefined>;
24
27
 
25
- declare const getZillowCookie: (attempt?: number, options?: {
26
- onCaptcha?: () => Promise<void>;
27
- }) => Promise<string | undefined>;
28
+ declare const getZillowCookie: (attempt?: number) => Promise<string | undefined>;
28
29
  declare const getRedfinCookie: (attempt?: number) => Promise<string | undefined>;
29
30
  declare const saveRedfinCookie: () => Promise<void>;
30
- declare const saveZillowCookie: (options?: {
31
- onCaptcha?: () => Promise<void>;
32
- }) => Promise<void>;
31
+ declare const refreshZillowCookie: () => Promise<string | undefined>;
32
+ declare const saveZillowCookie: () => Promise<void>;
33
33
 
34
34
  // This extracts the core definitions from express to prevent a circular dependency between express and serve-static
35
35
 
@@ -1254,4 +1254,4 @@ interface Express extends Application {
1254
1254
 
1255
1255
  declare function runBrowserServer(source?: ListingsSource): Express;
1256
1256
 
1257
- export { closeBrowser, getBrowser, getRedfinCookie, getZillowCookie, launchBrowser, openBrowser, runBrowserServer, runConfirmBrowserLaunch, saveRedfinCookie, saveZillowCookie, shutdownBrowser, waitForBrowser };
1257
+ export { closeBrowser, getBrowser, getBrowserStatus, getRedfinCookie, getZillowCookie, launchBrowser, openBrowser, refreshZillowCookie, runBrowserServer, runConfirmBrowserLaunch, saveRedfinCookie, saveZillowCookie, shutdownBrowser, waitForBrowser };
package/dist/index.mjs CHANGED
@@ -1 +1 @@
1
- export{c as closeBrowser,g as getBrowser,b as getRedfinCookie,a as getZillowCookie,l as launchBrowser,o as openBrowser,r as runBrowserServer,d as saveRedfinCookie,e as saveZillowCookie,s as shutdownBrowser,w as waitForBrowser}from"./shared/browser-server.C-q4WCe-.mjs";import{confirm as r,isCancel as a,cancel as i,outro as s}from"@clack/prompts";import{setTimeout as o}from"node:timers/promises";import"express";import"puppeteer";import"fs/promises";import"child_process";import"os";import"path";import"@rent-scraper/utils/config";import"@rent-scraper/utils";async function t(){const e=await r({message:"We need to launch your browser to continue",active:"OK",inactive:"Cancel"});if(a(e)||!e)return i("Create config canceled. Please try again."),process.exit(1);await o(1e3),s("Browser Launching..."),await o(1e3)}export{t as runConfirmBrowserLaunch};
1
+ export{c as closeBrowser,g as getBrowser,a as getBrowserStatus,d as getRedfinCookie,b as getZillowCookie,l as launchBrowser,o as openBrowser,r as refreshZillowCookie,h as runBrowserServer,e as saveRedfinCookie,f as saveZillowCookie,s as shutdownBrowser,w as waitForBrowser}from"./shared/browser-server.Be38x_p5.mjs";import{confirm as r,isCancel as a,cancel as s,outro as i}from"@clack/prompts";import{setTimeout as o}from"node:timers/promises";import"express";import"puppeteer";import"fs/promises";import"child_process";import"os";import"path";import"@rent-scraper/utils/config";import"@rent-scraper/utils";async function t(){const e=await r({message:"We need to launch your browser to continue",active:"OK",inactive:"Cancel"});if(a(e)||!e)return s("Create config canceled. Please try again."),process.exit(1);await o(1e3),i("Browser Launching..."),await o(1e3)}export{t as runConfirmBrowserLaunch};
@@ -0,0 +1 @@
1
+ import j from"express";import E from"puppeteer";import{mkdtemp as k,access as ee}from"fs/promises";import{exec as te}from"child_process";import{tmpdir as oe}from"os";import ae from"path";import{checkForConfigFile as ne,waitForConfigFile as se,getValueFromConfigFile as ie,updateConfigFile as M}from"@rent-scraper/utils/config";import{parseError as L}from"@rent-scraper/utils";const H={darwin:["/Applications/Brave Browser.app/Contents/MacOS/Brave Browser","/Applications/Google Chrome.app/Contents/MacOS/Google Chrome","/Applications/Chromium.app/Contents/MacOS/Chromium","/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"],linux:["/usr/bin/brave-browser","/usr/bin/google-chrome","/usr/bin/google-chrome-stable","/usr/bin/chromium-browser","/usr/bin/chromium","/snap/bin/chromium"],win32:["C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe","C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe","C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"]},V=async()=>{const t=H[process.platform]??[];for(const e of t)try{return await ee(e),e}catch{}return null},W="http://127.0.0.1:9222/json/version",re=async(t=15e3)=>{const e=Date.now();for(;Date.now()-e<t;)try{await E.connect({browserURL:W});return}catch{await new Promise(c=>setTimeout(c,500))}throw new Error("timed out waiting for browser")},$=async()=>{try{return await E.connect({browserURL:W})}catch{return null}},S=async()=>{const t=await(await $())?.pages();t&&await Promise.all(t.map(e=>e.close()))},N=async(t="zillow")=>{if(await $())return{status:"already launched"};await ne(t)||await se(t);const e=await k(ae.join(oe(),"chrome-remote-")),c=await ie(t,"browser"),o=c?(H[process.platform]??[]).find(s=>s.toLowerCase().includes(c.toLowerCase()))??await V():await V();if(console.log(`launching browser: ${o??"bundled chromium"}`),o)te(`"${o}" --remote-debugging-port=9222 --no-first-run --no-default-browser-check --user-data-dir="${e}"`,s=>{s&&console.error(`Error: ${s.message}`)});else{const s=["--remote-debugging-port=9222","--no-first-run","--no-default-browser-check",`--user-data-dir=${e}`,...process.platform==="linux"?["--no-sandbox","--disable-setuid-sandbox"]:[]];await E.launch({headless:!1,args:s,dumpio:!1})}return{status:"launched"}},J=async()=>{const t=await $();if(t){const e=(await t.pages())?.[0];return e?(await e.title()).includes("denied")?{status:"captcha"}:{status:"navigated"}:{status:"navigated"}}else return{status:"not connected"}},_=async t=>{const e=await $();if(e){const c=t,o=(await e.pages())?.[0]??await e.newPage();return(await o.title()).includes("denied")?{status:"captcha"}:(await o.goto(c,{waitUntil:"load"}),(await o.title()).includes("denied")?{status:"captcha"}:{status:"navigated"})}else return{status:"not connected"}},x=async()=>{const t=await $();if(t){try{await(await t.target().createCDPSession()).send("Browser.close")}catch{await t.disconnect()}return{status:"closed"}}else return{status:"not connected"}},G="http://127.0.0.1:9222/json/version",T=async(t=0)=>{const e=await E.connect({browserURL:G});t===0&&await _("https://www.zillow.com/homes/for_rent/");const c=await e.cookies(),[o]=c.filter(n=>n.name==="_pxvid"),[s]=c.filter(n=>n.name==="_px3");return o?(await S(),[o,s].filter(Boolean).map(n=>`${n.name}=${n.value}`).join("; ")):(console.log("refetching zillow cookie"),await new Promise(n=>setTimeout(n,2e3)),await T(t+1))},I=async(t=0)=>{const e=await E.connect({browserURL:G});t===0&&(await _("https://www.redfin.com"),await new Promise(n=>setTimeout(n,3e3)));const c=await e.pages(),o=c?.[0]?await c[0].title():"",s=(await e.cookies()).filter(n=>n.domain.includes("redfin.com"));return s.some(n=>n.name==="aws-waf-token")&&o.includes("Redfin")?(await S(),s.map(n=>`${n.name}=${n.value}`).join("; ")):(console.log("refetching redfin cookie"),await new Promise(n=>setTimeout(n,2e3)),await I(t+1))},U=async()=>{try{const t=await I();t&&await M("redfin",{redfinCookie:t})}catch(t){const{status:e,message:c}=L(t);console.error(e,c)}},Q=async()=>{try{const t=await(await E.connect({browserURL:G})).cookies(),[e]=t.filter(o=>o.name==="_pxvid"),[c]=t.filter(o=>o.name==="_px3");if(e){const o=[e,c].filter(Boolean).map(s=>`${s.name}=${s.value}`).join("; ");return await M("zillow",{zillowCookie:o}),o}}catch(t){const{status:e,message:c}=L(t);console.error(e,c)}},Z=async()=>{try{const t=await T();t&&await M("zillow",{zillowCookie:t})}catch(t){const{status:e,message:c}=L(t);console.error(e,c)}};function ce(t){return t&&t.__esModule&&Object.prototype.hasOwnProperty.call(t,"default")?t.default:t}var D,X;function le(){if(X)return D;X=1;function t(o,s){var n=o;s.slice(0,-1).forEach(function(f){n=n[f]||{}});var p=s[s.length-1];return p in n}function e(o){return typeof o=="number"||/^0x[0-9a-f]+$/i.test(o)?!0:/^[-+]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(o)}function c(o,s){return s==="constructor"&&typeof o[s]=="function"||s==="__proto__"}return D=function(o,s){s||(s={});var n={bools:{},strings:{},unknownFn:null};typeof s.unknown=="function"&&(n.unknownFn=s.unknown),typeof s.boolean=="boolean"&&s.boolean?n.allBools=!0:[].concat(s.boolean).filter(Boolean).forEach(function(r){n.bools[r]=!0});var p={};function f(r){return p[r].some(function(d){return n.bools[d]})}Object.keys(s.alias||{}).forEach(function(r){p[r]=[].concat(s.alias[r]),p[r].forEach(function(d){p[d]=[r].concat(p[r].filter(function(C){return d!==C}))})}),[].concat(s.string).filter(Boolean).forEach(function(r){n.strings[r]=!0,p[r]&&[].concat(p[r]).forEach(function(d){n.strings[d]=!0})});var i=s.default||{},a={_:[]};function A(r,d){return n.allBools&&/^--[^=]+$/.test(d)||n.strings[r]||n.bools[r]||p[r]}function P(r,d,C){for(var u=r,F=0;F<d.length-1;F++){var y=d[F];if(c(u,y))return;u[y]===void 0&&(u[y]={}),(u[y]===Object.prototype||u[y]===Number.prototype||u[y]===String.prototype)&&(u[y]={}),u[y]===Array.prototype&&(u[y]=[]),u=u[y]}var B=d[d.length-1];c(u,B)||((u===Object.prototype||u===Number.prototype||u===String.prototype)&&(u={}),u===Array.prototype&&(u=[]),u[B]===void 0||n.bools[B]||typeof u[B]=="boolean"?u[B]=C:Array.isArray(u[B])?u[B].push(C):u[B]=[u[B],C])}function h(r,d,C){if(!(C&&n.unknownFn&&!A(r,C)&&n.unknownFn(C)===!1)){var u=!n.strings[r]&&e(d)?Number(d):d;P(a,r.split("."),u),(p[r]||[]).forEach(function(F){P(a,F.split("."),u)})}}Object.keys(n.bools).forEach(function(r){h(r,i[r]===void 0?!1:i[r])});var z=[];o.indexOf("--")!==-1&&(z=o.slice(o.indexOf("--")+1),o=o.slice(0,o.indexOf("--")));for(var m=0;m<o.length;m++){var l=o[m],w,v;if(/^--.+=/.test(l)){var q=l.match(/^--([^=]+)=([\s\S]*)$/);w=q[1];var R=q[2];n.bools[w]&&(R=R!=="false"),h(w,R,l)}else if(/^--no-.+/.test(l))w=l.match(/^--no-(.+)/)[1],h(w,!1,l);else if(/^--.+/.test(l))w=l.match(/^--(.+)/)[1],v=o[m+1],v!==void 0&&!/^(-|--)[^-]/.test(v)&&!n.bools[w]&&!n.allBools&&(!p[w]||!f(w))?(h(w,v,l),m+=1):/^(true|false)$/.test(v)?(h(w,v==="true",l),m+=1):h(w,n.strings[w]?"":!0,l);else if(/^-[^-]+/.test(l)){for(var g=l.slice(1,-1).split(""),O=!1,b=0;b<g.length;b++){if(v=l.slice(b+2),v==="-"){h(g[b],v,l);continue}if(/[A-Za-z]/.test(g[b])&&v[0]==="="){h(g[b],v.slice(1),l),O=!0;break}if(/[A-Za-z]/.test(g[b])&&/-?\d+(\.\d*)?(e-?\d+)?$/.test(v)){h(g[b],v,l),O=!0;break}if(g[b+1]&&g[b+1].match(/\W/)){h(g[b],l.slice(b+2),l),O=!0;break}else h(g[b],n.strings[g[b]]?"":!0,l)}w=l.slice(-1)[0],!O&&w!=="-"&&(o[m+1]&&!/^(-|--)[^-]/.test(o[m+1])&&!n.bools[w]&&(!p[w]||!f(w))?(h(w,o[m+1],l),m+=1):o[m+1]&&/^(true|false)$/.test(o[m+1])?(h(w,o[m+1]==="true",l),m+=1):h(w,n.strings[w]?"":!0,l))}else if((!n.unknownFn||n.unknownFn(l)!==!1)&&a._.push(n.strings._||!e(l)?l:Number(l)),s.stopEarly){a._.push.apply(a._,o.slice(m+1));break}}return Object.keys(i).forEach(function(r){t(a,r.split("."))||(P(a,r.split("."),i[r]),(p[r]||[]).forEach(function(d){P(a,d.split("."),i[r])}))}),s["--"]?a["--"]=z.slice():z.forEach(function(r){a._.push(r)}),a},D}var ue=le();const we=ce(ue),Y=1e4,K=async t=>{await t.keyboard.press("Escape"),await new Promise(e=>setTimeout(e,500)),await t.keyboard.press("Tab"),await new Promise(e=>setTimeout(e,200)),await t.keyboard.down("Space"),await new Promise(e=>setTimeout(e,Y)),await t.keyboard.up("Space")},fe=async()=>{try{const t=await $();if(!t)return!1;const e=(await t.pages())?.[0];if(!e)return!1;if(!(await e.title()).includes("denied"))return!0;await e.deleteCookie({name:"pxcts",domain:".zillow.com"});for(let c=0;c<5;c++){c>0&&await new Promise(o=>setTimeout(o,5e3));try{const o=await e.$("#px-captcha");if(o){const s=await o.boundingBox();s?(await e.mouse.move(s.x+s.width/2,s.y+s.height/2),await e.mouse.down(),await new Promise(n=>setTimeout(n,Y)),await e.mouse.up()):await K(e)}else await K(e)}catch{await K(e)}if(await new Promise(o=>setTimeout(o,1e3)),!(await e.title()).includes("denied"))return!0}return!1}catch{return!1}};function de(t="zillow"){const e=j(),c=process.env.HOST??"127.0.0.1",o=process.env.PORT??8082;e.use(j.json());const s=we(process.argv.slice(2)).debug,n=e.listen(Number(o),async()=>{await N(t);const f=setInterval(async()=>{(await $())?.connected&&(clearInterval(f),s&&console.log("Browser listening at 127.0.0.1:9222"),t==="zillow"?await Z():t==="redfin"&&(await U(),await x()))},1e3);s&&console.log(`Server listening at ${c}:${o}`)}),p=async()=>(await x(),n.close(f=>{s&&console.log("server closed"),process.exit(f?1:0)}),{status:"shutdown"});return e.get("/server",(f,i)=>{try{i.send({running:!0})}catch(a){i.send(a)}}),e.post("/browser/launch",async(f,i)=>{try{const a=await N();i.send({browser:a})}catch(a){i.send(a)}}),e.post("/browser/status",async(f,i)=>{try{const a=await J();i.send({browser:a})}catch(a){i.send(a)}}),e.post("/browser/open",async(f,i)=>{try{const{url:a}=f?.body??{},A=await _(a);i.send({browser:A})}catch(a){i.send(a)}}),e.get("/cookie",async(f,i)=>{try{const a=await T();i.send({cookie:a})}catch(a){i.send(a)}}),e.post("/cookie/refresh",async(f,i)=>{try{const a=await Q();i.send({cookie:a})}catch(a){i.send(a)}}),e.post("/cookie/save",async(f,i)=>{try{const a=await Z();i.send({cookie:a})}catch(a){i.send(a)}}),e.post("/cookie/redfin/save",async(f,i)=>{try{const a=await U();i.send({cookie:a})}catch(a){i.send(a)}}),e.post("/captcha/solve",async(f,i)=>{try{const a=await fe();i.send({solved:a})}catch(a){i.send(a)}}),e.post("/browser/close",async(f,i)=>{try{const a=await S();i.send({browser:a})}catch(a){i.send(a)}}),e.post("/server/shutdown",(f,i)=>{try{const a=p();i.send({server:a})}catch(a){i.send(a)}}),process.on("SIGINT",p),process.on("SIGTERM",p),e}export{J as a,T as b,S as c,I as d,U as e,Z as f,$ as g,de as h,N as l,_ as o,Q as r,x as s,re as w};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rent-scraper/browser-server",
3
- "version": "1.0.28",
3
+ "version": "1.0.30",
4
4
  "type": "module",
5
5
  "exports": {
6
6
  ".": {
@@ -26,8 +26,8 @@
26
26
  "express": "^4.21.2",
27
27
  "picocolors": "^1.1.1",
28
28
  "puppeteer": "^24.11.2",
29
- "@rent-scraper/utils": "1.0.28",
30
- "@rent-scraper/api": "1.0.28"
29
+ "@rent-scraper/utils": "1.0.30",
30
+ "@rent-scraper/api": "1.0.30"
31
31
  },
32
32
  "devDependencies": {
33
33
  "@types/express": "^5.0.3",
package/src/browser.ts CHANGED
@@ -3,7 +3,7 @@ import { access, mkdtemp } from 'fs/promises'
3
3
  import { exec } from 'child_process'
4
4
  import { tmpdir } from 'os'
5
5
  import path from 'path'
6
- import { checkForConfigFile, waitForConfigFile } from '@rent-scraper/utils/config'
6
+ import { checkForConfigFile, waitForConfigFile, getValueFromConfigFile } from '@rent-scraper/utils/config'
7
7
  import type { ListingsSource } from '@rent-scraper/api'
8
8
 
9
9
  const nativeBrowserPaths: Partial<Record<NodeJS.Platform, string[]>> = {
@@ -82,7 +82,10 @@ export const launchBrowser = async (source = 'zillow' as ListingsSource) => {
82
82
  await waitForConfigFile(source)
83
83
  }
84
84
  const userDataDir = await mkdtemp(path.join(tmpdir(), 'chrome-remote-'))
85
- const nativePath = await findNativeBrowser()
85
+ const browserPref = await getValueFromConfigFile(source, 'browser') as string | null
86
+ const nativePath = browserPref
87
+ ? (nativeBrowserPaths[process.platform] ?? []).find(p => p.toLowerCase().includes(browserPref.toLowerCase())) ?? await findNativeBrowser()
88
+ : await findNativeBrowser()
86
89
  console.log(`launching browser: ${nativePath ?? 'bundled chromium'}`)
87
90
  if (nativePath) {
88
91
  exec(`"${nativePath}" --remote-debugging-port=9222 --no-first-run --no-default-browser-check --user-data-dir="${userDataDir}"`, (error) => {
@@ -101,6 +104,22 @@ export const launchBrowser = async (source = 'zillow' as ListingsSource) => {
101
104
  return { status: 'launched' }
102
105
  }
103
106
 
107
+ export const getBrowserStatus = async () => {
108
+ const browser = await getBrowser()
109
+ if (browser) {
110
+ const pages = await browser.pages()
111
+ const page = pages?.[0]
112
+ if (!page) return { status: 'navigated' }
113
+ const pageTitle = await page.title()
114
+ if (pageTitle.includes('denied')) {
115
+ return { status: 'captcha' }
116
+ }
117
+ return { status: 'navigated' }
118
+ } else {
119
+ return { status: 'not connected' }
120
+ }
121
+ }
122
+
104
123
  export const openBrowser = async (url: string) => {
105
124
  const browser = await getBrowser()
106
125
  if (browser) {
@@ -109,12 +128,18 @@ export const openBrowser = async (url: string) => {
109
128
  const page = pages?.[0] ?? await browser.newPage()
110
129
  const pageTitle = await page.title()
111
130
  // do not change page if captcha is showing
112
- if (!pageTitle.includes('denied')) {
113
- await page.goto(pageUrl, {
114
- waitUntil: 'load',
115
- })
131
+ if (pageTitle.includes('denied')) {
132
+ return { status: 'captcha' }
133
+ }
134
+ await page.goto(pageUrl, {
135
+ waitUntil: 'load',
136
+ })
137
+ // check title after navigation — zillow may have shown a captcha as a result of the request
138
+ const titleAfterNav = await page.title()
139
+ if (titleAfterNav.includes('denied')) {
140
+ return { status: 'captcha' }
116
141
  }
117
- return { status: 'opened' }
142
+ return { status: 'navigated' }
118
143
  } else {
119
144
  return { status: 'not connected' }
120
145
  }
@@ -123,7 +148,15 @@ export const openBrowser = async (url: string) => {
123
148
  export const shutdownBrowser = async () => {
124
149
  const browser = await getBrowser()
125
150
  if (browser) {
126
- await browser.close()
151
+ try {
152
+ // send Browser.close via CDP to actually terminate the browser process
153
+ // (browser.close() on a connected browser only disconnects the session)
154
+ const target = browser.target()
155
+ const session = await target.createCDPSession()
156
+ await session.send('Browser.close')
157
+ } catch {
158
+ await browser.disconnect()
159
+ }
127
160
  return { status: 'closed' }
128
161
  } else {
129
162
  return { status: 'not connected' }
package/src/cookie.ts CHANGED
@@ -5,29 +5,21 @@ import { parseError } from '@rent-scraper/utils'
5
5
 
6
6
  const wsChromeEndpointurl = 'http://127.0.0.1:9222/json/version'
7
7
 
8
- export const getZillowCookie = async (attempt = 0, options?: { onCaptcha?: () => Promise<void> }): Promise<string | undefined> => {
8
+ export const getZillowCookie = async (attempt = 0): Promise<string | undefined> => {
9
9
  const browser = await puppeteer.connect({
10
10
  browserURL: wsChromeEndpointurl,
11
11
  })
12
12
  if (attempt === 0) {
13
13
  await openBrowser('https://www.zillow.com/homes/for_rent/')
14
14
  }
15
- const pages = await browser.pages()
16
- const title = pages?.[0] ? await pages[0].title() : ''
17
- if (title.includes('Access to this page has been denied')) {
18
- if (options?.onCaptcha) {
19
- await options.onCaptcha()
20
- } else {
21
- await new Promise(resolve => setTimeout(resolve, 2000))
22
- }
23
- return await getZillowCookie(attempt + 1, options)
24
- }
15
+
25
16
  const allCookies = await browser.cookies()
26
- const zillowCookies = allCookies.filter(c => c.domain?.includes('zillow.com'))
27
- const hasPxvid = zillowCookies.some(c => c.name === '_pxvid')
28
- if (hasPxvid) {
17
+ const [pxvid] = (allCookies).filter(cookie => cookie.name === '_pxvid')
18
+ const [px3] = (allCookies).filter(cookie => cookie.name === '_px3')
19
+
20
+ if (pxvid) {
29
21
  await closeBrowser()
30
- return zillowCookies.map(c => `${c.name}=${c.value}`).join('; ')
22
+ return [pxvid, px3].filter(Boolean).map(c => `${c.name}=${c.value}`).join('; ')
31
23
  } else {
32
24
  console.log('refetching zillow cookie')
33
25
  await new Promise(resolve => setTimeout(resolve, 2000))
@@ -68,9 +60,26 @@ export const saveRedfinCookie = async () => {
68
60
  }
69
61
  }
70
62
 
71
- export const saveZillowCookie = async (options?: { onCaptcha?: () => Promise<void> }) => {
63
+ export const refreshZillowCookie = async () => {
64
+ try {
65
+ const browser = await puppeteer.connect({ browserURL: wsChromeEndpointurl })
66
+ const allCookies = await browser.cookies()
67
+ const [pxvid] = allCookies.filter(c => c.name === '_pxvid')
68
+ const [px3] = allCookies.filter(c => c.name === '_px3')
69
+ if (pxvid) {
70
+ const zillowCookie = [pxvid, px3].filter(Boolean).map(c => `${c.name}=${c.value}`).join('; ')
71
+ await updateConfigFile('zillow', { zillowCookie })
72
+ return zillowCookie
73
+ }
74
+ } catch (error: any) {
75
+ const { status, message } = parseError(error)
76
+ console.error(status, message)
77
+ }
78
+ }
79
+
80
+ export const saveZillowCookie = async () => {
72
81
  try {
73
- const zillowCookie = await getZillowCookie(0, options)
82
+ const zillowCookie = await getZillowCookie()
74
83
  if (zillowCookie) {
75
84
  await updateConfigFile('zillow', { zillowCookie })
76
85
  }
package/src/server.ts CHANGED
@@ -1,7 +1,8 @@
1
1
  import express from 'express'
2
2
  import minimist from 'minimist'
3
- import { getZillowCookie, saveZillowCookie, saveRedfinCookie } from './cookie.js'
4
- import { launchBrowser, closeBrowser, getBrowser, shutdownBrowser, openBrowser } from './browser.js'
3
+ import { getZillowCookie, saveZillowCookie, saveRedfinCookie, refreshZillowCookie } from './cookie.js'
4
+ import { launchBrowser, closeBrowser, getBrowser, shutdownBrowser, openBrowser, getBrowserStatus } from './browser.js'
5
+ import { solveZillowCaptcha } from './solve-captcha.js'
5
6
  import type { ListingsSource } from '@rent-scraper/api'
6
7
 
7
8
  export function runBrowserServer(source: ListingsSource = 'zillow') {
@@ -35,13 +36,10 @@ export function runBrowserServer(source: ListingsSource = 'zillow') {
35
36
  }
36
37
  })
37
38
 
38
- const shutdownServer = () => {
39
- server.close(async (err) => {
40
- const browser = await shutdownBrowser()
41
- if (debug) {
42
- console.log(browser)
43
- console.log('server closed')
44
- }
39
+ const shutdownServer = async () => {
40
+ await shutdownBrowser()
41
+ server.close((err) => {
42
+ if (debug) console.log('server closed')
45
43
  process.exit(err ? 1 : 0)
46
44
  })
47
45
  return { status: 'shutdown' }
@@ -64,6 +62,15 @@ export function runBrowserServer(source: ListingsSource = 'zillow') {
64
62
  }
65
63
  })
66
64
 
65
+ app.post('/browser/status', async (_req, res) => {
66
+ try {
67
+ const browser = await getBrowserStatus()
68
+ res.send({ browser })
69
+ } catch (error) {
70
+ res.send(error)
71
+ }
72
+ })
73
+
67
74
  app.post('/browser/open', async (req, res) => {
68
75
  try {
69
76
  const { url } = req?.body ?? {}
@@ -83,6 +90,15 @@ export function runBrowserServer(source: ListingsSource = 'zillow') {
83
90
  }
84
91
  })
85
92
 
93
+ app.post('/cookie/refresh', async (_req, res) => {
94
+ try {
95
+ const cookie = await refreshZillowCookie()
96
+ res.send({ cookie })
97
+ } catch (error) {
98
+ res.send(error)
99
+ }
100
+ })
101
+
86
102
  app.post('/cookie/save', async (_req, res) => {
87
103
  try {
88
104
  const cookie = await saveZillowCookie()
@@ -101,6 +117,15 @@ export function runBrowserServer(source: ListingsSource = 'zillow') {
101
117
  }
102
118
  })
103
119
 
120
+ app.post('/captcha/solve', async (_req, res) => {
121
+ try {
122
+ const solved = await solveZillowCaptcha()
123
+ res.send({ solved })
124
+ } catch (error) {
125
+ res.send(error)
126
+ }
127
+ })
128
+
104
129
  app.post('/browser/close', async (_req, res) => {
105
130
  try {
106
131
  const browser = await closeBrowser()
@@ -0,0 +1,70 @@
1
+ import type { Page } from 'puppeteer'
2
+ import { getBrowser } from './browser.js'
3
+
4
+ const HOLD_DURATION_MS = 10000 // PerimeterX typically requires ~8-10s hold
5
+
6
+ const pressAndHoldSpace = async (page: Page) => {
7
+ await page.keyboard.press('Escape')
8
+ await new Promise(resolve => setTimeout(resolve, 500))
9
+ await page.keyboard.press('Tab')
10
+ await new Promise(resolve => setTimeout(resolve, 200))
11
+ await page.keyboard.down('Space')
12
+ await new Promise(resolve => setTimeout(resolve, HOLD_DURATION_MS))
13
+ await page.keyboard.up('Space')
14
+ }
15
+
16
+ export const solveZillowCaptcha = async (): Promise<boolean> => {
17
+ try {
18
+ const browser = await getBrowser()
19
+ if (!browser) return false
20
+
21
+ const pages = await browser.pages()
22
+ const page = pages?.[0]
23
+ if (!page) return false
24
+
25
+ const title = await page.title()
26
+ if (!title.includes('denied')) return true // no captcha present
27
+
28
+ // Delete pxcts cookie — resets the challenge state so a fresh attempt starts clean
29
+ await page.deleteCookie({ name: 'pxcts', domain: '.zillow.com' })
30
+
31
+ for (let attempt = 0; attempt < 5; attempt++) {
32
+ if (attempt > 0) {
33
+ // wait between attempts
34
+ await new Promise(resolve => setTimeout(resolve, 5000))
35
+ }
36
+
37
+ try {
38
+ // Try mouse hold on the captcha element first
39
+ const captchaEl = await page.$('#px-captcha')
40
+ if (captchaEl) {
41
+ const box = await captchaEl.boundingBox()
42
+ if (box) {
43
+ await page.mouse.move(box.x + box.width / 2, box.y + box.height / 2)
44
+ await page.mouse.down()
45
+ await new Promise(resolve => setTimeout(resolve, HOLD_DURATION_MS))
46
+ await page.mouse.up()
47
+ } else {
48
+ // element found but no bounding box — fall back to keyboard
49
+ await pressAndHoldSpace(page)
50
+ }
51
+ } else {
52
+ // no element found — fall back to keyboard Tab + Space
53
+ await pressAndHoldSpace(page)
54
+ }
55
+ } catch {
56
+ // any puppeteer error — try keyboard as last resort
57
+ await pressAndHoldSpace(page)
58
+ }
59
+
60
+ // give the page a moment to react
61
+ await new Promise(resolve => setTimeout(resolve, 1000))
62
+ const currentTitle = await page.title()
63
+ if (!currentTitle.includes('denied')) return true
64
+ }
65
+
66
+ return false
67
+ } catch {
68
+ return false
69
+ }
70
+ }
@@ -1 +0,0 @@
1
- import Z from"express";import O from"puppeteer";import{mkdtemp as J,access as Q}from"fs/promises";import{exec as V}from"child_process";import{tmpdir as X}from"os";import Y from"path";import{checkForConfigFile as x,waitForConfigFile as k,updateConfigFile as K}from"@rent-scraper/utils/config";import{parseError as q}from"@rent-scraper/utils";const oo={darwin:["/Applications/Brave Browser.app/Contents/MacOS/Brave Browser","/Applications/Google Chrome.app/Contents/MacOS/Google Chrome","/Applications/Chromium.app/Contents/MacOS/Chromium","/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"],linux:["/usr/bin/brave-browser","/usr/bin/google-chrome","/usr/bin/google-chrome-stable","/usr/bin/chromium-browser","/usr/bin/chromium","/snap/bin/chromium"],win32:["C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe","C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe","C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"]},eo=async()=>{const s=oo[process.platform]??[];for(const e of s)try{return await Q(e),e}catch{}return null},H="http://127.0.0.1:9222/json/version",to=async(s=15e3)=>{const e=Date.now();for(;Date.now()-e<s;)try{await O.connect({browserURL:H});return}catch{await new Promise(u=>setTimeout(u,500))}throw new Error("timed out waiting for browser")},E=async()=>{try{return await O.connect({browserURL:H})}catch{return null}},P=async()=>{const s=await(await E())?.pages();s&&await Promise.all(s.map(e=>e.close()))},T=async(s="zillow")=>{if(await E())return{status:"already launched"};await x(s)||await k(s);const e=await J(Y.join(X(),"chrome-remote-")),u=await eo();if(console.log(`launching browser: ${u??"bundled chromium"}`),u)V(`"${u}" --remote-debugging-port=9222 --no-first-run --no-default-browser-check --user-data-dir="${e}"`,t=>{t&&console.error(`Error: ${t.message}`)});else{const t=["--remote-debugging-port=9222","--no-first-run","--no-default-browser-check",`--user-data-dir=${e}`,...process.platform==="linux"?["--no-sandbox","--disable-setuid-sandbox"]:[]];await O.launch({headless:!1,args:t,dumpio:!1})}return{status:"launched"}},S=async s=>{const e=await E();if(e){const u=s,t=(await e.pages())?.[0]??await e.newPage();return(await t.title()).includes("denied")||await t.goto(u,{waitUntil:"load"}),{status:"opened"}}else return{status:"not connected"}},N=async()=>{const s=await E();return s?(await s.close(),{status:"closed"}):{status:"not connected"}},W="http://127.0.0.1:9222/json/version",A=async(s=0,e)=>{const u=await O.connect({browserURL:W});s===0&&await S("https://www.zillow.com/homes/for_rent/");const t=await u.pages();if((t?.[0]?await t[0].title():"").includes("Access to this page has been denied"))return e?.onCaptcha?await e.onCaptcha():await new Promise(o=>setTimeout(o,2e3)),await A(s+1,e);const a=(await u.cookies()).filter(o=>o.domain?.includes("zillow.com"));return a.some(o=>o.name==="_pxvid")?(await P(),a.map(o=>`${o.name}=${o.value}`).join("; ")):(console.log("refetching zillow cookie"),await new Promise(o=>setTimeout(o,2e3)),await A(s+1))},G=async(s=0)=>{const e=await O.connect({browserURL:W});s===0&&(await S("https://www.redfin.com"),await new Promise(o=>setTimeout(o,3e3)));const u=await e.pages(),t=u?.[0]?await u[0].title():"",a=(await e.cookies()).filter(o=>o.domain.includes("redfin.com"));return a.some(o=>o.name==="aws-waf-token")&&t.includes("Redfin")?(await P(),a.map(o=>`${o.name}=${o.value}`).join("; ")):(console.log("refetching redfin cookie"),await new Promise(o=>setTimeout(o,2e3)),await G(s+1))},I=async()=>{try{const s=await G();s&&await K("redfin",{redfinCookie:s})}catch(s){const{status:e,message:u}=q(s);console.error(e,u)}},U=async s=>{try{const e=await A(0,s);e&&await K("zillow",{zillowCookie:e})}catch(e){const{status:u,message:t}=q(e);console.error(u,t)}};function no(s){return s&&s.__esModule&&Object.prototype.hasOwnProperty.call(s,"default")?s.default:s}var D,j;function so(){if(j)return D;j=1;function s(t,a){var o=t;a.slice(0,-1).forEach(function(p){o=o[p]||{}});var d=a[a.length-1];return d in o}function e(t){return typeof t=="number"||/^0x[0-9a-f]+$/i.test(t)?!0:/^[-+]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(t)}function u(t,a){return a==="constructor"&&typeof t[a]=="function"||a==="__proto__"}return D=function(t,a){a||(a={});var o={bools:{},strings:{},unknownFn:null};typeof a.unknown=="function"&&(o.unknownFn=a.unknown),typeof a.boolean=="boolean"&&a.boolean?o.allBools=!0:[].concat(a.boolean).filter(Boolean).forEach(function(r){o.bools[r]=!0});var d={};function p(r){return d[r].some(function(w){return o.bools[w]})}Object.keys(a.alias||{}).forEach(function(r){d[r]=[].concat(a.alias[r]),d[r].forEach(function(w){d[w]=[r].concat(d[r].filter(function(B){return w!==B}))})}),[].concat(a.string).filter(Boolean).forEach(function(r){o.strings[r]=!0,d[r]&&[].concat(d[r]).forEach(function(w){o.strings[w]=!0})});var i=a.default||{},n={_:[]};function z(r,w){return o.allBools&&/^--[^=]+$/.test(w)||o.strings[r]||o.bools[r]||d[r]}function y(r,w,B){for(var l=r,F=0;F<w.length-1;F++){var C=w[F];if(u(l,C))return;l[C]===void 0&&(l[C]={}),(l[C]===Object.prototype||l[C]===Number.prototype||l[C]===String.prototype)&&(l[C]={}),l[C]===Array.prototype&&(l[C]=[]),l=l[C]}var $=w[w.length-1];u(l,$)||((l===Object.prototype||l===Number.prototype||l===String.prototype)&&(l={}),l===Array.prototype&&(l=[]),l[$]===void 0||o.bools[$]||typeof l[$]=="boolean"?l[$]=B:Array.isArray(l[$])?l[$].push(B):l[$]=[l[$],B])}function h(r,w,B){if(!(B&&o.unknownFn&&!z(r,B)&&o.unknownFn(B)===!1)){var l=!o.strings[r]&&e(w)?Number(w):w;y(n,r.split("."),l),(d[r]||[]).forEach(function(F){y(n,F.split("."),l)})}}Object.keys(o.bools).forEach(function(r){h(r,i[r]===void 0?!1:i[r])});var M=[];t.indexOf("--")!==-1&&(M=t.slice(t.indexOf("--")+1),t=t.slice(0,t.indexOf("--")));for(var m=0;m<t.length;m++){var c=t[m],f,v;if(/^--.+=/.test(c)){var L=c.match(/^--([^=]+)=([\s\S]*)$/);f=L[1];var R=L[2];o.bools[f]&&(R=R!=="false"),h(f,R,c)}else if(/^--no-.+/.test(c))f=c.match(/^--no-(.+)/)[1],h(f,!1,c);else if(/^--.+/.test(c))f=c.match(/^--(.+)/)[1],v=t[m+1],v!==void 0&&!/^(-|--)[^-]/.test(v)&&!o.bools[f]&&!o.allBools&&(!d[f]||!p(f))?(h(f,v,c),m+=1):/^(true|false)$/.test(v)?(h(f,v==="true",c),m+=1):h(f,o.strings[f]?"":!0,c);else if(/^-[^-]+/.test(c)){for(var g=c.slice(1,-1).split(""),_=!1,b=0;b<g.length;b++){if(v=c.slice(b+2),v==="-"){h(g[b],v,c);continue}if(/[A-Za-z]/.test(g[b])&&v[0]==="="){h(g[b],v.slice(1),c),_=!0;break}if(/[A-Za-z]/.test(g[b])&&/-?\d+(\.\d*)?(e-?\d+)?$/.test(v)){h(g[b],v,c),_=!0;break}if(g[b+1]&&g[b+1].match(/\W/)){h(g[b],c.slice(b+2),c),_=!0;break}else h(g[b],o.strings[g[b]]?"":!0,c)}f=c.slice(-1)[0],!_&&f!=="-"&&(t[m+1]&&!/^(-|--)[^-]/.test(t[m+1])&&!o.bools[f]&&(!d[f]||!p(f))?(h(f,t[m+1],c),m+=1):t[m+1]&&/^(true|false)$/.test(t[m+1])?(h(f,t[m+1]==="true",c),m+=1):h(f,o.strings[f]?"":!0,c))}else if((!o.unknownFn||o.unknownFn(c)!==!1)&&n._.push(o.strings._||!e(c)?c:Number(c)),a.stopEarly){n._.push.apply(n._,t.slice(m+1));break}}return Object.keys(i).forEach(function(r){s(n,r.split("."))||(y(n,r.split("."),i[r]),(d[r]||[]).forEach(function(w){y(n,w.split("."),i[r])}))}),a["--"]?n["--"]=M.slice():M.forEach(function(r){n._.push(r)}),n},D}var ro=so();const ao=no(ro);function io(s="zillow"){const e=Z(),u=process.env.HOST??"127.0.0.1",t=process.env.PORT??8082;e.use(Z.json());const a=ao(process.argv.slice(2)).debug,o=e.listen(Number(t),async()=>{await T(s);const p=setInterval(async()=>{(await E())?.connected&&(clearInterval(p),a&&console.log("Browser listening at 127.0.0.1:9222"),s==="zillow"?await U():s==="redfin"&&(await I(),await N()))},1e3);a&&console.log(`Server listening at ${u}:${t}`)}),d=()=>(o.close(async p=>{const i=await N();a&&(console.log(i),console.log("server closed")),process.exit(p?1:0)}),{status:"shutdown"});return e.get("/server",(p,i)=>{try{i.send({running:!0})}catch(n){i.send(n)}}),e.post("/browser/launch",async(p,i)=>{try{const n=await T();i.send({browser:n})}catch(n){i.send(n)}}),e.post("/browser/open",async(p,i)=>{try{const{url:n}=p?.body??{},z=await S(n);i.send({browser:z})}catch(n){i.send(n)}}),e.get("/cookie",async(p,i)=>{try{const n=await A();i.send({cookie:n})}catch(n){i.send(n)}}),e.post("/cookie/save",async(p,i)=>{try{const n=await U();i.send({cookie:n})}catch(n){i.send(n)}}),e.post("/cookie/redfin/save",async(p,i)=>{try{const n=await I();i.send({cookie:n})}catch(n){i.send(n)}}),e.post("/browser/close",async(p,i)=>{try{const n=await P();i.send({browser:n})}catch(n){i.send(n)}}),e.post("/server/shutdown",(p,i)=>{try{const n=d();i.send({server:n})}catch(n){i.send(n)}}),process.on("SIGINT",d),process.on("SIGTERM",d),e}export{A as a,G as b,P as c,I as d,U as e,E as g,T as l,S as o,io as r,N as s,to as w};