@rent-scraper/browser-server 1.0.29 → 1.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/run-browser-server.mjs +1 -1
- package/dist/index.d.mts +5 -1
- package/dist/index.mjs +1 -1
- package/dist/shared/browser-server.Be38x_p5.mjs +1 -0
- package/package.json +3 -3
- package/src/browser.ts +36 -6
- package/src/cookie.ts +20 -7
- package/src/server.ts +34 -9
- package/src/solve-captcha.ts +70 -0
- package/dist/shared/browser-server.BZ8vwfSV.mjs +0 -1
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import{r}from"../shared/browser-server.
|
|
2
|
+
import{h as r}from"../shared/browser-server.Be38x_p5.mjs";import"express";import"puppeteer";import"fs/promises";import"child_process";import"os";import"path";import"@rent-scraper/utils/config";import"@rent-scraper/utils";r();
|
package/dist/index.d.mts
CHANGED
|
@@ -13,6 +13,9 @@ declare const closeBrowser: () => Promise<void>;
|
|
|
13
13
|
declare const launchBrowser: (source?: ListingsSource) => Promise<{
|
|
14
14
|
status: string;
|
|
15
15
|
}>;
|
|
16
|
+
declare const getBrowserStatus: () => Promise<{
|
|
17
|
+
status: string;
|
|
18
|
+
}>;
|
|
16
19
|
declare const openBrowser: (url: string) => Promise<{
|
|
17
20
|
status: string;
|
|
18
21
|
}>;
|
|
@@ -25,6 +28,7 @@ declare function runConfirmBrowserLaunch(): Promise<undefined>;
|
|
|
25
28
|
declare const getZillowCookie: (attempt?: number) => Promise<string | undefined>;
|
|
26
29
|
declare const getRedfinCookie: (attempt?: number) => Promise<string | undefined>;
|
|
27
30
|
declare const saveRedfinCookie: () => Promise<void>;
|
|
31
|
+
declare const refreshZillowCookie: () => Promise<string | undefined>;
|
|
28
32
|
declare const saveZillowCookie: () => Promise<void>;
|
|
29
33
|
|
|
30
34
|
// This extracts the core definitions from express to prevent a circular dependency between express and serve-static
|
|
@@ -1250,4 +1254,4 @@ interface Express extends Application {
|
|
|
1250
1254
|
|
|
1251
1255
|
declare function runBrowserServer(source?: ListingsSource): Express;
|
|
1252
1256
|
|
|
1253
|
-
export { closeBrowser, getBrowser, getRedfinCookie, getZillowCookie, launchBrowser, openBrowser, runBrowserServer, runConfirmBrowserLaunch, saveRedfinCookie, saveZillowCookie, shutdownBrowser, waitForBrowser };
|
|
1257
|
+
export { closeBrowser, getBrowser, getBrowserStatus, getRedfinCookie, getZillowCookie, launchBrowser, openBrowser, refreshZillowCookie, runBrowserServer, runConfirmBrowserLaunch, saveRedfinCookie, saveZillowCookie, shutdownBrowser, waitForBrowser };
|
package/dist/index.mjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export{c as closeBrowser,g as getBrowser,
|
|
1
|
+
export{c as closeBrowser,g as getBrowser,a as getBrowserStatus,d as getRedfinCookie,b as getZillowCookie,l as launchBrowser,o as openBrowser,r as refreshZillowCookie,h as runBrowserServer,e as saveRedfinCookie,f as saveZillowCookie,s as shutdownBrowser,w as waitForBrowser}from"./shared/browser-server.Be38x_p5.mjs";import{confirm as r,isCancel as a,cancel as s,outro as i}from"@clack/prompts";import{setTimeout as o}from"node:timers/promises";import"express";import"puppeteer";import"fs/promises";import"child_process";import"os";import"path";import"@rent-scraper/utils/config";import"@rent-scraper/utils";async function t(){const e=await r({message:"We need to launch your browser to continue",active:"OK",inactive:"Cancel"});if(a(e)||!e)return s("Create config canceled. Please try again."),process.exit(1);await o(1e3),i("Browser Launching..."),await o(1e3)}export{t as runConfirmBrowserLaunch};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import j from"express";import E from"puppeteer";import{mkdtemp as k,access as ee}from"fs/promises";import{exec as te}from"child_process";import{tmpdir as oe}from"os";import ae from"path";import{checkForConfigFile as ne,waitForConfigFile as se,getValueFromConfigFile as ie,updateConfigFile as M}from"@rent-scraper/utils/config";import{parseError as L}from"@rent-scraper/utils";const H={darwin:["/Applications/Brave Browser.app/Contents/MacOS/Brave Browser","/Applications/Google Chrome.app/Contents/MacOS/Google Chrome","/Applications/Chromium.app/Contents/MacOS/Chromium","/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"],linux:["/usr/bin/brave-browser","/usr/bin/google-chrome","/usr/bin/google-chrome-stable","/usr/bin/chromium-browser","/usr/bin/chromium","/snap/bin/chromium"],win32:["C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe","C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe","C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"]},V=async()=>{const t=H[process.platform]??[];for(const e of t)try{return await ee(e),e}catch{}return null},W="http://127.0.0.1:9222/json/version",re=async(t=15e3)=>{const e=Date.now();for(;Date.now()-e<t;)try{await E.connect({browserURL:W});return}catch{await new Promise(c=>setTimeout(c,500))}throw new Error("timed out waiting for browser")},$=async()=>{try{return await E.connect({browserURL:W})}catch{return null}},S=async()=>{const t=await(await $())?.pages();t&&await Promise.all(t.map(e=>e.close()))},N=async(t="zillow")=>{if(await $())return{status:"already launched"};await ne(t)||await se(t);const e=await k(ae.join(oe(),"chrome-remote-")),c=await ie(t,"browser"),o=c?(H[process.platform]??[]).find(s=>s.toLowerCase().includes(c.toLowerCase()))??await V():await V();if(console.log(`launching browser: ${o??"bundled chromium"}`),o)te(`"${o}" --remote-debugging-port=9222 --no-first-run --no-default-browser-check --user-data-dir="${e}"`,s=>{s&&console.error(`Error: ${s.message}`)});else{const s=["--remote-debugging-port=9222","--no-first-run","--no-default-browser-check",`--user-data-dir=${e}`,...process.platform==="linux"?["--no-sandbox","--disable-setuid-sandbox"]:[]];await E.launch({headless:!1,args:s,dumpio:!1})}return{status:"launched"}},J=async()=>{const t=await $();if(t){const e=(await t.pages())?.[0];return e?(await e.title()).includes("denied")?{status:"captcha"}:{status:"navigated"}:{status:"navigated"}}else return{status:"not connected"}},_=async t=>{const e=await $();if(e){const c=t,o=(await e.pages())?.[0]??await e.newPage();return(await o.title()).includes("denied")?{status:"captcha"}:(await o.goto(c,{waitUntil:"load"}),(await o.title()).includes("denied")?{status:"captcha"}:{status:"navigated"})}else return{status:"not connected"}},x=async()=>{const t=await $();if(t){try{await(await t.target().createCDPSession()).send("Browser.close")}catch{await t.disconnect()}return{status:"closed"}}else return{status:"not connected"}},G="http://127.0.0.1:9222/json/version",T=async(t=0)=>{const e=await E.connect({browserURL:G});t===0&&await _("https://www.zillow.com/homes/for_rent/");const c=await e.cookies(),[o]=c.filter(n=>n.name==="_pxvid"),[s]=c.filter(n=>n.name==="_px3");return o?(await S(),[o,s].filter(Boolean).map(n=>`${n.name}=${n.value}`).join("; ")):(console.log("refetching zillow cookie"),await new Promise(n=>setTimeout(n,2e3)),await T(t+1))},I=async(t=0)=>{const e=await E.connect({browserURL:G});t===0&&(await _("https://www.redfin.com"),await new Promise(n=>setTimeout(n,3e3)));const c=await e.pages(),o=c?.[0]?await c[0].title():"",s=(await e.cookies()).filter(n=>n.domain.includes("redfin.com"));return s.some(n=>n.name==="aws-waf-token")&&o.includes("Redfin")?(await S(),s.map(n=>`${n.name}=${n.value}`).join("; ")):(console.log("refetching redfin cookie"),await new Promise(n=>setTimeout(n,2e3)),await I(t+1))},U=async()=>{try{const t=await I();t&&await M("redfin",{redfinCookie:t})}catch(t){const{status:e,message:c}=L(t);console.error(e,c)}},Q=async()=>{try{const t=await(await E.connect({browserURL:G})).cookies(),[e]=t.filter(o=>o.name==="_pxvid"),[c]=t.filter(o=>o.name==="_px3");if(e){const o=[e,c].filter(Boolean).map(s=>`${s.name}=${s.value}`).join("; ");return await M("zillow",{zillowCookie:o}),o}}catch(t){const{status:e,message:c}=L(t);console.error(e,c)}},Z=async()=>{try{const t=await T();t&&await M("zillow",{zillowCookie:t})}catch(t){const{status:e,message:c}=L(t);console.error(e,c)}};function ce(t){return t&&t.__esModule&&Object.prototype.hasOwnProperty.call(t,"default")?t.default:t}var D,X;function le(){if(X)return D;X=1;function t(o,s){var n=o;s.slice(0,-1).forEach(function(f){n=n[f]||{}});var p=s[s.length-1];return p in n}function e(o){return typeof o=="number"||/^0x[0-9a-f]+$/i.test(o)?!0:/^[-+]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(o)}function c(o,s){return s==="constructor"&&typeof o[s]=="function"||s==="__proto__"}return D=function(o,s){s||(s={});var n={bools:{},strings:{},unknownFn:null};typeof s.unknown=="function"&&(n.unknownFn=s.unknown),typeof s.boolean=="boolean"&&s.boolean?n.allBools=!0:[].concat(s.boolean).filter(Boolean).forEach(function(r){n.bools[r]=!0});var p={};function f(r){return p[r].some(function(d){return n.bools[d]})}Object.keys(s.alias||{}).forEach(function(r){p[r]=[].concat(s.alias[r]),p[r].forEach(function(d){p[d]=[r].concat(p[r].filter(function(C){return d!==C}))})}),[].concat(s.string).filter(Boolean).forEach(function(r){n.strings[r]=!0,p[r]&&[].concat(p[r]).forEach(function(d){n.strings[d]=!0})});var i=s.default||{},a={_:[]};function A(r,d){return n.allBools&&/^--[^=]+$/.test(d)||n.strings[r]||n.bools[r]||p[r]}function P(r,d,C){for(var u=r,F=0;F<d.length-1;F++){var y=d[F];if(c(u,y))return;u[y]===void 0&&(u[y]={}),(u[y]===Object.prototype||u[y]===Number.prototype||u[y]===String.prototype)&&(u[y]={}),u[y]===Array.prototype&&(u[y]=[]),u=u[y]}var B=d[d.length-1];c(u,B)||((u===Object.prototype||u===Number.prototype||u===String.prototype)&&(u={}),u===Array.prototype&&(u=[]),u[B]===void 0||n.bools[B]||typeof u[B]=="boolean"?u[B]=C:Array.isArray(u[B])?u[B].push(C):u[B]=[u[B],C])}function h(r,d,C){if(!(C&&n.unknownFn&&!A(r,C)&&n.unknownFn(C)===!1)){var u=!n.strings[r]&&e(d)?Number(d):d;P(a,r.split("."),u),(p[r]||[]).forEach(function(F){P(a,F.split("."),u)})}}Object.keys(n.bools).forEach(function(r){h(r,i[r]===void 0?!1:i[r])});var z=[];o.indexOf("--")!==-1&&(z=o.slice(o.indexOf("--")+1),o=o.slice(0,o.indexOf("--")));for(var m=0;m<o.length;m++){var l=o[m],w,v;if(/^--.+=/.test(l)){var q=l.match(/^--([^=]+)=([\s\S]*)$/);w=q[1];var R=q[2];n.bools[w]&&(R=R!=="false"),h(w,R,l)}else if(/^--no-.+/.test(l))w=l.match(/^--no-(.+)/)[1],h(w,!1,l);else if(/^--.+/.test(l))w=l.match(/^--(.+)/)[1],v=o[m+1],v!==void 0&&!/^(-|--)[^-]/.test(v)&&!n.bools[w]&&!n.allBools&&(!p[w]||!f(w))?(h(w,v,l),m+=1):/^(true|false)$/.test(v)?(h(w,v==="true",l),m+=1):h(w,n.strings[w]?"":!0,l);else if(/^-[^-]+/.test(l)){for(var g=l.slice(1,-1).split(""),O=!1,b=0;b<g.length;b++){if(v=l.slice(b+2),v==="-"){h(g[b],v,l);continue}if(/[A-Za-z]/.test(g[b])&&v[0]==="="){h(g[b],v.slice(1),l),O=!0;break}if(/[A-Za-z]/.test(g[b])&&/-?\d+(\.\d*)?(e-?\d+)?$/.test(v)){h(g[b],v,l),O=!0;break}if(g[b+1]&&g[b+1].match(/\W/)){h(g[b],l.slice(b+2),l),O=!0;break}else h(g[b],n.strings[g[b]]?"":!0,l)}w=l.slice(-1)[0],!O&&w!=="-"&&(o[m+1]&&!/^(-|--)[^-]/.test(o[m+1])&&!n.bools[w]&&(!p[w]||!f(w))?(h(w,o[m+1],l),m+=1):o[m+1]&&/^(true|false)$/.test(o[m+1])?(h(w,o[m+1]==="true",l),m+=1):h(w,n.strings[w]?"":!0,l))}else if((!n.unknownFn||n.unknownFn(l)!==!1)&&a._.push(n.strings._||!e(l)?l:Number(l)),s.stopEarly){a._.push.apply(a._,o.slice(m+1));break}}return Object.keys(i).forEach(function(r){t(a,r.split("."))||(P(a,r.split("."),i[r]),(p[r]||[]).forEach(function(d){P(a,d.split("."),i[r])}))}),s["--"]?a["--"]=z.slice():z.forEach(function(r){a._.push(r)}),a},D}var ue=le();const we=ce(ue),Y=1e4,K=async t=>{await t.keyboard.press("Escape"),await new Promise(e=>setTimeout(e,500)),await t.keyboard.press("Tab"),await new Promise(e=>setTimeout(e,200)),await t.keyboard.down("Space"),await new Promise(e=>setTimeout(e,Y)),await t.keyboard.up("Space")},fe=async()=>{try{const t=await $();if(!t)return!1;const e=(await t.pages())?.[0];if(!e)return!1;if(!(await e.title()).includes("denied"))return!0;await e.deleteCookie({name:"pxcts",domain:".zillow.com"});for(let c=0;c<5;c++){c>0&&await new Promise(o=>setTimeout(o,5e3));try{const o=await e.$("#px-captcha");if(o){const s=await o.boundingBox();s?(await e.mouse.move(s.x+s.width/2,s.y+s.height/2),await e.mouse.down(),await new Promise(n=>setTimeout(n,Y)),await e.mouse.up()):await K(e)}else await K(e)}catch{await K(e)}if(await new Promise(o=>setTimeout(o,1e3)),!(await e.title()).includes("denied"))return!0}return!1}catch{return!1}};function de(t="zillow"){const e=j(),c=process.env.HOST??"127.0.0.1",o=process.env.PORT??8082;e.use(j.json());const s=we(process.argv.slice(2)).debug,n=e.listen(Number(o),async()=>{await N(t);const f=setInterval(async()=>{(await $())?.connected&&(clearInterval(f),s&&console.log("Browser listening at 127.0.0.1:9222"),t==="zillow"?await Z():t==="redfin"&&(await U(),await x()))},1e3);s&&console.log(`Server listening at ${c}:${o}`)}),p=async()=>(await x(),n.close(f=>{s&&console.log("server closed"),process.exit(f?1:0)}),{status:"shutdown"});return e.get("/server",(f,i)=>{try{i.send({running:!0})}catch(a){i.send(a)}}),e.post("/browser/launch",async(f,i)=>{try{const a=await N();i.send({browser:a})}catch(a){i.send(a)}}),e.post("/browser/status",async(f,i)=>{try{const a=await J();i.send({browser:a})}catch(a){i.send(a)}}),e.post("/browser/open",async(f,i)=>{try{const{url:a}=f?.body??{},A=await _(a);i.send({browser:A})}catch(a){i.send(a)}}),e.get("/cookie",async(f,i)=>{try{const a=await T();i.send({cookie:a})}catch(a){i.send(a)}}),e.post("/cookie/refresh",async(f,i)=>{try{const a=await Q();i.send({cookie:a})}catch(a){i.send(a)}}),e.post("/cookie/save",async(f,i)=>{try{const a=await Z();i.send({cookie:a})}catch(a){i.send(a)}}),e.post("/cookie/redfin/save",async(f,i)=>{try{const a=await U();i.send({cookie:a})}catch(a){i.send(a)}}),e.post("/captcha/solve",async(f,i)=>{try{const a=await fe();i.send({solved:a})}catch(a){i.send(a)}}),e.post("/browser/close",async(f,i)=>{try{const a=await S();i.send({browser:a})}catch(a){i.send(a)}}),e.post("/server/shutdown",(f,i)=>{try{const a=p();i.send({server:a})}catch(a){i.send(a)}}),process.on("SIGINT",p),process.on("SIGTERM",p),e}export{J as a,T as b,S as c,I as d,U as e,Z as f,$ as g,de as h,N as l,_ as o,Q as r,x as s,re as w};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@rent-scraper/browser-server",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.30",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"exports": {
|
|
6
6
|
".": {
|
|
@@ -26,8 +26,8 @@
|
|
|
26
26
|
"express": "^4.21.2",
|
|
27
27
|
"picocolors": "^1.1.1",
|
|
28
28
|
"puppeteer": "^24.11.2",
|
|
29
|
-
"@rent-scraper/utils": "1.0.
|
|
30
|
-
"@rent-scraper/api": "1.0.
|
|
29
|
+
"@rent-scraper/utils": "1.0.30",
|
|
30
|
+
"@rent-scraper/api": "1.0.30"
|
|
31
31
|
},
|
|
32
32
|
"devDependencies": {
|
|
33
33
|
"@types/express": "^5.0.3",
|
package/src/browser.ts
CHANGED
|
@@ -104,6 +104,22 @@ export const launchBrowser = async (source = 'zillow' as ListingsSource) => {
|
|
|
104
104
|
return { status: 'launched' }
|
|
105
105
|
}
|
|
106
106
|
|
|
107
|
+
export const getBrowserStatus = async () => {
|
|
108
|
+
const browser = await getBrowser()
|
|
109
|
+
if (browser) {
|
|
110
|
+
const pages = await browser.pages()
|
|
111
|
+
const page = pages?.[0]
|
|
112
|
+
if (!page) return { status: 'navigated' }
|
|
113
|
+
const pageTitle = await page.title()
|
|
114
|
+
if (pageTitle.includes('denied')) {
|
|
115
|
+
return { status: 'captcha' }
|
|
116
|
+
}
|
|
117
|
+
return { status: 'navigated' }
|
|
118
|
+
} else {
|
|
119
|
+
return { status: 'not connected' }
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
107
123
|
export const openBrowser = async (url: string) => {
|
|
108
124
|
const browser = await getBrowser()
|
|
109
125
|
if (browser) {
|
|
@@ -112,12 +128,18 @@ export const openBrowser = async (url: string) => {
|
|
|
112
128
|
const page = pages?.[0] ?? await browser.newPage()
|
|
113
129
|
const pageTitle = await page.title()
|
|
114
130
|
// do not change page if captcha is showing
|
|
115
|
-
if (
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
131
|
+
if (pageTitle.includes('denied')) {
|
|
132
|
+
return { status: 'captcha' }
|
|
133
|
+
}
|
|
134
|
+
await page.goto(pageUrl, {
|
|
135
|
+
waitUntil: 'load',
|
|
136
|
+
})
|
|
137
|
+
// check title after navigation — zillow may have shown a captcha as a result of the request
|
|
138
|
+
const titleAfterNav = await page.title()
|
|
139
|
+
if (titleAfterNav.includes('denied')) {
|
|
140
|
+
return { status: 'captcha' }
|
|
119
141
|
}
|
|
120
|
-
return { status: '
|
|
142
|
+
return { status: 'navigated' }
|
|
121
143
|
} else {
|
|
122
144
|
return { status: 'not connected' }
|
|
123
145
|
}
|
|
@@ -126,7 +148,15 @@ export const openBrowser = async (url: string) => {
|
|
|
126
148
|
export const shutdownBrowser = async () => {
|
|
127
149
|
const browser = await getBrowser()
|
|
128
150
|
if (browser) {
|
|
129
|
-
|
|
151
|
+
try {
|
|
152
|
+
// send Browser.close via CDP to actually terminate the browser process
|
|
153
|
+
// (browser.close() on a connected browser only disconnects the session)
|
|
154
|
+
const target = browser.target()
|
|
155
|
+
const session = await target.createCDPSession()
|
|
156
|
+
await session.send('Browser.close')
|
|
157
|
+
} catch {
|
|
158
|
+
await browser.disconnect()
|
|
159
|
+
}
|
|
130
160
|
return { status: 'closed' }
|
|
131
161
|
} else {
|
|
132
162
|
return { status: 'not connected' }
|
package/src/cookie.ts
CHANGED
|
@@ -60,16 +60,29 @@ export const saveRedfinCookie = async () => {
|
|
|
60
60
|
}
|
|
61
61
|
}
|
|
62
62
|
|
|
63
|
-
export const
|
|
63
|
+
export const refreshZillowCookie = async () => {
|
|
64
64
|
try {
|
|
65
|
-
const
|
|
66
|
-
|
|
67
|
-
const
|
|
68
|
-
|
|
65
|
+
const browser = await puppeteer.connect({ browserURL: wsChromeEndpointurl })
|
|
66
|
+
const allCookies = await browser.cookies()
|
|
67
|
+
const [pxvid] = allCookies.filter(c => c.name === '_pxvid')
|
|
68
|
+
const [px3] = allCookies.filter(c => c.name === '_px3')
|
|
69
|
+
if (pxvid) {
|
|
70
|
+
const zillowCookie = [pxvid, px3].filter(Boolean).map(c => `${c.name}=${c.value}`).join('; ')
|
|
71
|
+
await updateConfigFile('zillow', { zillowCookie })
|
|
72
|
+
return zillowCookie
|
|
69
73
|
}
|
|
74
|
+
} catch (error: any) {
|
|
75
|
+
const { status, message } = parseError(error)
|
|
76
|
+
console.error(status, message)
|
|
77
|
+
}
|
|
78
|
+
}
|
|
70
79
|
|
|
71
|
-
|
|
72
|
-
|
|
80
|
+
export const saveZillowCookie = async () => {
|
|
81
|
+
try {
|
|
82
|
+
const zillowCookie = await getZillowCookie()
|
|
83
|
+
if (zillowCookie) {
|
|
84
|
+
await updateConfigFile('zillow', { zillowCookie })
|
|
85
|
+
}
|
|
73
86
|
} catch (error: any) {
|
|
74
87
|
const { status, message } = parseError(error)
|
|
75
88
|
console.error(status, message)
|
package/src/server.ts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import express from 'express'
|
|
2
2
|
import minimist from 'minimist'
|
|
3
|
-
import { getZillowCookie, saveZillowCookie, saveRedfinCookie } from './cookie.js'
|
|
4
|
-
import { launchBrowser, closeBrowser, getBrowser, shutdownBrowser, openBrowser } from './browser.js'
|
|
3
|
+
import { getZillowCookie, saveZillowCookie, saveRedfinCookie, refreshZillowCookie } from './cookie.js'
|
|
4
|
+
import { launchBrowser, closeBrowser, getBrowser, shutdownBrowser, openBrowser, getBrowserStatus } from './browser.js'
|
|
5
|
+
import { solveZillowCaptcha } from './solve-captcha.js'
|
|
5
6
|
import type { ListingsSource } from '@rent-scraper/api'
|
|
6
7
|
|
|
7
8
|
export function runBrowserServer(source: ListingsSource = 'zillow') {
|
|
@@ -35,13 +36,10 @@ export function runBrowserServer(source: ListingsSource = 'zillow') {
|
|
|
35
36
|
}
|
|
36
37
|
})
|
|
37
38
|
|
|
38
|
-
const shutdownServer = () => {
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
if (debug)
|
|
42
|
-
console.log(browser)
|
|
43
|
-
console.log('server closed')
|
|
44
|
-
}
|
|
39
|
+
const shutdownServer = async () => {
|
|
40
|
+
await shutdownBrowser()
|
|
41
|
+
server.close((err) => {
|
|
42
|
+
if (debug) console.log('server closed')
|
|
45
43
|
process.exit(err ? 1 : 0)
|
|
46
44
|
})
|
|
47
45
|
return { status: 'shutdown' }
|
|
@@ -64,6 +62,15 @@ export function runBrowserServer(source: ListingsSource = 'zillow') {
|
|
|
64
62
|
}
|
|
65
63
|
})
|
|
66
64
|
|
|
65
|
+
app.post('/browser/status', async (_req, res) => {
|
|
66
|
+
try {
|
|
67
|
+
const browser = await getBrowserStatus()
|
|
68
|
+
res.send({ browser })
|
|
69
|
+
} catch (error) {
|
|
70
|
+
res.send(error)
|
|
71
|
+
}
|
|
72
|
+
})
|
|
73
|
+
|
|
67
74
|
app.post('/browser/open', async (req, res) => {
|
|
68
75
|
try {
|
|
69
76
|
const { url } = req?.body ?? {}
|
|
@@ -83,6 +90,15 @@ export function runBrowserServer(source: ListingsSource = 'zillow') {
|
|
|
83
90
|
}
|
|
84
91
|
})
|
|
85
92
|
|
|
93
|
+
app.post('/cookie/refresh', async (_req, res) => {
|
|
94
|
+
try {
|
|
95
|
+
const cookie = await refreshZillowCookie()
|
|
96
|
+
res.send({ cookie })
|
|
97
|
+
} catch (error) {
|
|
98
|
+
res.send(error)
|
|
99
|
+
}
|
|
100
|
+
})
|
|
101
|
+
|
|
86
102
|
app.post('/cookie/save', async (_req, res) => {
|
|
87
103
|
try {
|
|
88
104
|
const cookie = await saveZillowCookie()
|
|
@@ -101,6 +117,15 @@ export function runBrowserServer(source: ListingsSource = 'zillow') {
|
|
|
101
117
|
}
|
|
102
118
|
})
|
|
103
119
|
|
|
120
|
+
app.post('/captcha/solve', async (_req, res) => {
|
|
121
|
+
try {
|
|
122
|
+
const solved = await solveZillowCaptcha()
|
|
123
|
+
res.send({ solved })
|
|
124
|
+
} catch (error) {
|
|
125
|
+
res.send(error)
|
|
126
|
+
}
|
|
127
|
+
})
|
|
128
|
+
|
|
104
129
|
app.post('/browser/close', async (_req, res) => {
|
|
105
130
|
try {
|
|
106
131
|
const browser = await closeBrowser()
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import type { Page } from 'puppeteer'
|
|
2
|
+
import { getBrowser } from './browser.js'
|
|
3
|
+
|
|
4
|
+
const HOLD_DURATION_MS = 10000 // PerimeterX typically requires ~8-10s hold
|
|
5
|
+
|
|
6
|
+
const pressAndHoldSpace = async (page: Page) => {
|
|
7
|
+
await page.keyboard.press('Escape')
|
|
8
|
+
await new Promise(resolve => setTimeout(resolve, 500))
|
|
9
|
+
await page.keyboard.press('Tab')
|
|
10
|
+
await new Promise(resolve => setTimeout(resolve, 200))
|
|
11
|
+
await page.keyboard.down('Space')
|
|
12
|
+
await new Promise(resolve => setTimeout(resolve, HOLD_DURATION_MS))
|
|
13
|
+
await page.keyboard.up('Space')
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export const solveZillowCaptcha = async (): Promise<boolean> => {
|
|
17
|
+
try {
|
|
18
|
+
const browser = await getBrowser()
|
|
19
|
+
if (!browser) return false
|
|
20
|
+
|
|
21
|
+
const pages = await browser.pages()
|
|
22
|
+
const page = pages?.[0]
|
|
23
|
+
if (!page) return false
|
|
24
|
+
|
|
25
|
+
const title = await page.title()
|
|
26
|
+
if (!title.includes('denied')) return true // no captcha present
|
|
27
|
+
|
|
28
|
+
// Delete pxcts cookie — resets the challenge state so a fresh attempt starts clean
|
|
29
|
+
await page.deleteCookie({ name: 'pxcts', domain: '.zillow.com' })
|
|
30
|
+
|
|
31
|
+
for (let attempt = 0; attempt < 5; attempt++) {
|
|
32
|
+
if (attempt > 0) {
|
|
33
|
+
// wait between attempts
|
|
34
|
+
await new Promise(resolve => setTimeout(resolve, 5000))
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
try {
|
|
38
|
+
// Try mouse hold on the captcha element first
|
|
39
|
+
const captchaEl = await page.$('#px-captcha')
|
|
40
|
+
if (captchaEl) {
|
|
41
|
+
const box = await captchaEl.boundingBox()
|
|
42
|
+
if (box) {
|
|
43
|
+
await page.mouse.move(box.x + box.width / 2, box.y + box.height / 2)
|
|
44
|
+
await page.mouse.down()
|
|
45
|
+
await new Promise(resolve => setTimeout(resolve, HOLD_DURATION_MS))
|
|
46
|
+
await page.mouse.up()
|
|
47
|
+
} else {
|
|
48
|
+
// element found but no bounding box — fall back to keyboard
|
|
49
|
+
await pressAndHoldSpace(page)
|
|
50
|
+
}
|
|
51
|
+
} else {
|
|
52
|
+
// no element found — fall back to keyboard Tab + Space
|
|
53
|
+
await pressAndHoldSpace(page)
|
|
54
|
+
}
|
|
55
|
+
} catch {
|
|
56
|
+
// any puppeteer error — try keyboard as last resort
|
|
57
|
+
await pressAndHoldSpace(page)
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// give the page a moment to react
|
|
61
|
+
await new Promise(resolve => setTimeout(resolve, 1000))
|
|
62
|
+
const currentTitle = await page.title()
|
|
63
|
+
if (!currentTitle.includes('denied')) return true
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return false
|
|
67
|
+
} catch {
|
|
68
|
+
return false
|
|
69
|
+
}
|
|
70
|
+
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
import Z from"express";import O from"puppeteer";import{mkdtemp as J,access as Q}from"fs/promises";import{exec as X}from"child_process";import{tmpdir as Y}from"os";import k from"path";import{checkForConfigFile as oo,waitForConfigFile as eo,getValueFromConfigFile as to,updateConfigFile as K}from"@rent-scraper/utils/config";import{parseError as q}from"@rent-scraper/utils";const H={darwin:["/Applications/Brave Browser.app/Contents/MacOS/Brave Browser","/Applications/Google Chrome.app/Contents/MacOS/Google Chrome","/Applications/Chromium.app/Contents/MacOS/Chromium","/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"],linux:["/usr/bin/brave-browser","/usr/bin/google-chrome","/usr/bin/google-chrome-stable","/usr/bin/chromium-browser","/usr/bin/chromium","/snap/bin/chromium"],win32:["C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe","C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe","C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"]},V=async()=>{const e=H[process.platform]??[];for(const r of e)try{return await Q(r),r}catch{}return null},W="http://127.0.0.1:9222/json/version",no=async(e=15e3)=>{const r=Date.now();for(;Date.now()-r<e;)try{await O.connect({browserURL:W});return}catch{await new Promise(f=>setTimeout(f,500))}throw new Error("timed out waiting for browser")},E=async()=>{try{return await O.connect({browserURL:W})}catch{return null}},A=async()=>{const e=await(await E())?.pages();e&&await Promise.all(e.map(r=>r.close()))},T=async(e="zillow")=>{if(await E())return{status:"already launched"};await oo(e)||await eo(e);const r=await J(k.join(Y(),"chrome-remote-")),f=await to(e,"browser"),n=f?(H[process.platform]??[]).find(s=>s.toLowerCase().includes(f.toLowerCase()))??await V():await V();if(console.log(`launching browser: ${n??"bundled chromium"}`),n)X(`"${n}" --remote-debugging-port=9222 --no-first-run --no-default-browser-check --user-data-dir="${r}"`,s=>{s&&console.error(`Error: ${s.message}`)});else{const s=["--remote-debugging-port=9222","--no-first-run","--no-default-browser-check",`--user-data-dir=${r}`,...process.platform==="linux"?["--no-sandbox","--disable-setuid-sandbox"]:[]];await O.launch({headless:!1,args:s,dumpio:!1})}return{status:"launched"}},S=async e=>{const r=await E();if(r){const f=e,n=(await r.pages())?.[0]??await r.newPage();return(await n.title()).includes("denied")||await n.goto(f,{waitUntil:"load"}),{status:"opened"}}else return{status:"not connected"}},N=async()=>{const e=await E();return e?(await e.close(),{status:"closed"}):{status:"not connected"}},j="http://127.0.0.1:9222/json/version",P=async(e=0)=>{const r=await O.connect({browserURL:j});e===0&&await S("https://www.zillow.com/homes/for_rent/");const f=await r.cookies(),[n]=f.filter(o=>o.name==="_pxvid"),[s]=f.filter(o=>o.name==="_px3");return n?(await A(),[n,s].filter(Boolean).map(o=>`${o.name}=${o.value}`).join("; ")):(console.log("refetching zillow cookie"),await new Promise(o=>setTimeout(o,2e3)),await P(e+1))},G=async(e=0)=>{const r=await O.connect({browserURL:j});e===0&&(await S("https://www.redfin.com"),await new Promise(o=>setTimeout(o,3e3)));const f=await r.pages(),n=f?.[0]?await f[0].title():"",s=(await r.cookies()).filter(o=>o.domain.includes("redfin.com"));return s.some(o=>o.name==="aws-waf-token")&&n.includes("Redfin")?(await A(),s.map(o=>`${o.name}=${o.value}`).join("; ")):(console.log("refetching redfin cookie"),await new Promise(o=>setTimeout(o,2e3)),await G(e+1))},I=async()=>{try{const e=await G();e&&await K("redfin",{redfinCookie:e})}catch(e){const{status:r,message:f}=q(e);console.error(r,f)}},L=async()=>{try{const e={zillowCookie:await P()??{}};await K("zillow",e)}catch(e){const{status:r,message:f}=q(e);console.error(r,f)}};function ro(e){return e&&e.__esModule&&Object.prototype.hasOwnProperty.call(e,"default")?e.default:e}var U,x;function so(){if(x)return U;x=1;function e(n,s){var o=n;s.slice(0,-1).forEach(function(d){o=o[d]||{}});var p=s[s.length-1];return p in o}function r(n){return typeof n=="number"||/^0x[0-9a-f]+$/i.test(n)?!0:/^[-+]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(n)}function f(n,s){return s==="constructor"&&typeof n[s]=="function"||s==="__proto__"}return U=function(n,s){s||(s={});var o={bools:{},strings:{},unknownFn:null};typeof s.unknown=="function"&&(o.unknownFn=s.unknown),typeof s.boolean=="boolean"&&s.boolean?o.allBools=!0:[].concat(s.boolean).filter(Boolean).forEach(function(i){o.bools[i]=!0});var p={};function d(i){return p[i].some(function(w){return o.bools[w]})}Object.keys(s.alias||{}).forEach(function(i){p[i]=[].concat(s.alias[i]),p[i].forEach(function(w){p[w]=[i].concat(p[i].filter(function(B){return w!==B}))})}),[].concat(s.string).filter(Boolean).forEach(function(i){o.strings[i]=!0,p[i]&&[].concat(p[i]).forEach(function(w){o.strings[w]=!0})});var a=s.default||{},t={_:[]};function M(i,w){return o.allBools&&/^--[^=]+$/.test(w)||o.strings[i]||o.bools[i]||p[i]}function _(i,w,B){for(var l=i,F=0;F<w.length-1;F++){var C=w[F];if(f(l,C))return;l[C]===void 0&&(l[C]={}),(l[C]===Object.prototype||l[C]===Number.prototype||l[C]===String.prototype)&&(l[C]={}),l[C]===Array.prototype&&(l[C]=[]),l=l[C]}var $=w[w.length-1];f(l,$)||((l===Object.prototype||l===Number.prototype||l===String.prototype)&&(l={}),l===Array.prototype&&(l=[]),l[$]===void 0||o.bools[$]||typeof l[$]=="boolean"?l[$]=B:Array.isArray(l[$])?l[$].push(B):l[$]=[l[$],B])}function h(i,w,B){if(!(B&&o.unknownFn&&!M(i,B)&&o.unknownFn(B)===!1)){var l=!o.strings[i]&&r(w)?Number(w):w;_(t,i.split("."),l),(p[i]||[]).forEach(function(F){_(t,F.split("."),l)})}}Object.keys(o.bools).forEach(function(i){h(i,a[i]===void 0?!1:a[i])});var R=[];n.indexOf("--")!==-1&&(R=n.slice(n.indexOf("--")+1),n=n.slice(0,n.indexOf("--")));for(var m=0;m<n.length;m++){var c=n[m],u,v;if(/^--.+=/.test(c)){var D=c.match(/^--([^=]+)=([\s\S]*)$/);u=D[1];var z=D[2];o.bools[u]&&(z=z!=="false"),h(u,z,c)}else if(/^--no-.+/.test(c))u=c.match(/^--no-(.+)/)[1],h(u,!1,c);else if(/^--.+/.test(c))u=c.match(/^--(.+)/)[1],v=n[m+1],v!==void 0&&!/^(-|--)[^-]/.test(v)&&!o.bools[u]&&!o.allBools&&(!p[u]||!d(u))?(h(u,v,c),m+=1):/^(true|false)$/.test(v)?(h(u,v==="true",c),m+=1):h(u,o.strings[u]?"":!0,c);else if(/^-[^-]+/.test(c)){for(var g=c.slice(1,-1).split(""),y=!1,b=0;b<g.length;b++){if(v=c.slice(b+2),v==="-"){h(g[b],v,c);continue}if(/[A-Za-z]/.test(g[b])&&v[0]==="="){h(g[b],v.slice(1),c),y=!0;break}if(/[A-Za-z]/.test(g[b])&&/-?\d+(\.\d*)?(e-?\d+)?$/.test(v)){h(g[b],v,c),y=!0;break}if(g[b+1]&&g[b+1].match(/\W/)){h(g[b],c.slice(b+2),c),y=!0;break}else h(g[b],o.strings[g[b]]?"":!0,c)}u=c.slice(-1)[0],!y&&u!=="-"&&(n[m+1]&&!/^(-|--)[^-]/.test(n[m+1])&&!o.bools[u]&&(!p[u]||!d(u))?(h(u,n[m+1],c),m+=1):n[m+1]&&/^(true|false)$/.test(n[m+1])?(h(u,n[m+1]==="true",c),m+=1):h(u,o.strings[u]?"":!0,c))}else if((!o.unknownFn||o.unknownFn(c)!==!1)&&t._.push(o.strings._||!r(c)?c:Number(c)),s.stopEarly){t._.push.apply(t._,n.slice(m+1));break}}return Object.keys(a).forEach(function(i){e(t,i.split("."))||(_(t,i.split("."),a[i]),(p[i]||[]).forEach(function(w){_(t,w.split("."),a[i])}))}),s["--"]?t["--"]=R.slice():R.forEach(function(i){t._.push(i)}),t},U}var io=so();const ao=ro(io);function co(e="zillow"){const r=Z(),f=process.env.HOST??"127.0.0.1",n=process.env.PORT??8082;r.use(Z.json());const s=ao(process.argv.slice(2)).debug,o=r.listen(Number(n),async()=>{await T(e);const d=setInterval(async()=>{(await E())?.connected&&(clearInterval(d),s&&console.log("Browser listening at 127.0.0.1:9222"),e==="zillow"?await L():e==="redfin"&&(await I(),await N()))},1e3);s&&console.log(`Server listening at ${f}:${n}`)}),p=()=>(o.close(async d=>{const a=await N();s&&(console.log(a),console.log("server closed")),process.exit(d?1:0)}),{status:"shutdown"});return r.get("/server",(d,a)=>{try{a.send({running:!0})}catch(t){a.send(t)}}),r.post("/browser/launch",async(d,a)=>{try{const t=await T();a.send({browser:t})}catch(t){a.send(t)}}),r.post("/browser/open",async(d,a)=>{try{const{url:t}=d?.body??{},M=await S(t);a.send({browser:M})}catch(t){a.send(t)}}),r.get("/cookie",async(d,a)=>{try{const t=await P();a.send({cookie:t})}catch(t){a.send(t)}}),r.post("/cookie/save",async(d,a)=>{try{const t=await L();a.send({cookie:t})}catch(t){a.send(t)}}),r.post("/cookie/redfin/save",async(d,a)=>{try{const t=await I();a.send({cookie:t})}catch(t){a.send(t)}}),r.post("/browser/close",async(d,a)=>{try{const t=await A();a.send({browser:t})}catch(t){a.send(t)}}),r.post("/server/shutdown",(d,a)=>{try{const t=p();a.send({server:t})}catch(t){a.send(t)}}),process.on("SIGINT",p),process.on("SIGTERM",p),r}export{P as a,G as b,A as c,I as d,L as e,E as g,T as l,S as o,co as r,N as s,no as w};
|