@rent-scraper/browser-server 1.0.28 → 1.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/run-browser-server.mjs +1 -1
- package/dist/index.d.mts +7 -7
- package/dist/index.mjs +1 -1
- package/dist/shared/browser-server.Be38x_p5.mjs +1 -0
- package/package.json +3 -3
- package/src/browser.ts +41 -8
- package/src/cookie.ts +26 -17
- package/src/server.ts +34 -9
- package/src/solve-captcha.ts +70 -0
- package/dist/shared/browser-server.C-q4WCe-.mjs +0 -1
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import{r}from"../shared/browser-server.
|
|
2
|
+
import{h as r}from"../shared/browser-server.Be38x_p5.mjs";import"express";import"puppeteer";import"fs/promises";import"child_process";import"os";import"path";import"@rent-scraper/utils/config";import"@rent-scraper/utils";r();
|
package/dist/index.d.mts
CHANGED
|
@@ -13,6 +13,9 @@ declare const closeBrowser: () => Promise<void>;
|
|
|
13
13
|
declare const launchBrowser: (source?: ListingsSource) => Promise<{
|
|
14
14
|
status: string;
|
|
15
15
|
}>;
|
|
16
|
+
declare const getBrowserStatus: () => Promise<{
|
|
17
|
+
status: string;
|
|
18
|
+
}>;
|
|
16
19
|
declare const openBrowser: (url: string) => Promise<{
|
|
17
20
|
status: string;
|
|
18
21
|
}>;
|
|
@@ -22,14 +25,11 @@ declare const shutdownBrowser: () => Promise<{
|
|
|
22
25
|
|
|
23
26
|
declare function runConfirmBrowserLaunch(): Promise<undefined>;
|
|
24
27
|
|
|
25
|
-
declare const getZillowCookie: (attempt?: number
|
|
26
|
-
onCaptcha?: () => Promise<void>;
|
|
27
|
-
}) => Promise<string | undefined>;
|
|
28
|
+
declare const getZillowCookie: (attempt?: number) => Promise<string | undefined>;
|
|
28
29
|
declare const getRedfinCookie: (attempt?: number) => Promise<string | undefined>;
|
|
29
30
|
declare const saveRedfinCookie: () => Promise<void>;
|
|
30
|
-
declare const
|
|
31
|
-
|
|
32
|
-
}) => Promise<void>;
|
|
31
|
+
declare const refreshZillowCookie: () => Promise<string | undefined>;
|
|
32
|
+
declare const saveZillowCookie: () => Promise<void>;
|
|
33
33
|
|
|
34
34
|
// This extracts the core definitions from express to prevent a circular dependency between express and serve-static
|
|
35
35
|
|
|
@@ -1254,4 +1254,4 @@ interface Express extends Application {
|
|
|
1254
1254
|
|
|
1255
1255
|
declare function runBrowserServer(source?: ListingsSource): Express;
|
|
1256
1256
|
|
|
1257
|
-
export { closeBrowser, getBrowser, getRedfinCookie, getZillowCookie, launchBrowser, openBrowser, runBrowserServer, runConfirmBrowserLaunch, saveRedfinCookie, saveZillowCookie, shutdownBrowser, waitForBrowser };
|
|
1257
|
+
export { closeBrowser, getBrowser, getBrowserStatus, getRedfinCookie, getZillowCookie, launchBrowser, openBrowser, refreshZillowCookie, runBrowserServer, runConfirmBrowserLaunch, saveRedfinCookie, saveZillowCookie, shutdownBrowser, waitForBrowser };
|
package/dist/index.mjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export{c as closeBrowser,g as getBrowser,
|
|
1
|
+
export{c as closeBrowser,g as getBrowser,a as getBrowserStatus,d as getRedfinCookie,b as getZillowCookie,l as launchBrowser,o as openBrowser,r as refreshZillowCookie,h as runBrowserServer,e as saveRedfinCookie,f as saveZillowCookie,s as shutdownBrowser,w as waitForBrowser}from"./shared/browser-server.Be38x_p5.mjs";import{confirm as r,isCancel as a,cancel as s,outro as i}from"@clack/prompts";import{setTimeout as o}from"node:timers/promises";import"express";import"puppeteer";import"fs/promises";import"child_process";import"os";import"path";import"@rent-scraper/utils/config";import"@rent-scraper/utils";async function t(){const e=await r({message:"We need to launch your browser to continue",active:"OK",inactive:"Cancel"});if(a(e)||!e)return s("Create config canceled. Please try again."),process.exit(1);await o(1e3),i("Browser Launching..."),await o(1e3)}export{t as runConfirmBrowserLaunch};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import j from"express";import E from"puppeteer";import{mkdtemp as k,access as ee}from"fs/promises";import{exec as te}from"child_process";import{tmpdir as oe}from"os";import ae from"path";import{checkForConfigFile as ne,waitForConfigFile as se,getValueFromConfigFile as ie,updateConfigFile as M}from"@rent-scraper/utils/config";import{parseError as L}from"@rent-scraper/utils";const H={darwin:["/Applications/Brave Browser.app/Contents/MacOS/Brave Browser","/Applications/Google Chrome.app/Contents/MacOS/Google Chrome","/Applications/Chromium.app/Contents/MacOS/Chromium","/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"],linux:["/usr/bin/brave-browser","/usr/bin/google-chrome","/usr/bin/google-chrome-stable","/usr/bin/chromium-browser","/usr/bin/chromium","/snap/bin/chromium"],win32:["C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe","C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe","C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"]},V=async()=>{const t=H[process.platform]??[];for(const e of t)try{return await ee(e),e}catch{}return null},W="http://127.0.0.1:9222/json/version",re=async(t=15e3)=>{const e=Date.now();for(;Date.now()-e<t;)try{await E.connect({browserURL:W});return}catch{await new Promise(c=>setTimeout(c,500))}throw new Error("timed out waiting for browser")},$=async()=>{try{return await E.connect({browserURL:W})}catch{return null}},S=async()=>{const t=await(await $())?.pages();t&&await Promise.all(t.map(e=>e.close()))},N=async(t="zillow")=>{if(await $())return{status:"already launched"};await ne(t)||await se(t);const e=await k(ae.join(oe(),"chrome-remote-")),c=await ie(t,"browser"),o=c?(H[process.platform]??[]).find(s=>s.toLowerCase().includes(c.toLowerCase()))??await V():await V();if(console.log(`launching browser: ${o??"bundled chromium"}`),o)te(`"${o}" --remote-debugging-port=9222 --no-first-run --no-default-browser-check --user-data-dir="${e}"`,s=>{s&&console.error(`Error: ${s.message}`)});else{const s=["--remote-debugging-port=9222","--no-first-run","--no-default-browser-check",`--user-data-dir=${e}`,...process.platform==="linux"?["--no-sandbox","--disable-setuid-sandbox"]:[]];await E.launch({headless:!1,args:s,dumpio:!1})}return{status:"launched"}},J=async()=>{const t=await $();if(t){const e=(await t.pages())?.[0];return e?(await e.title()).includes("denied")?{status:"captcha"}:{status:"navigated"}:{status:"navigated"}}else return{status:"not connected"}},_=async t=>{const e=await $();if(e){const c=t,o=(await e.pages())?.[0]??await e.newPage();return(await o.title()).includes("denied")?{status:"captcha"}:(await o.goto(c,{waitUntil:"load"}),(await o.title()).includes("denied")?{status:"captcha"}:{status:"navigated"})}else return{status:"not connected"}},x=async()=>{const t=await $();if(t){try{await(await t.target().createCDPSession()).send("Browser.close")}catch{await t.disconnect()}return{status:"closed"}}else return{status:"not connected"}},G="http://127.0.0.1:9222/json/version",T=async(t=0)=>{const e=await E.connect({browserURL:G});t===0&&await _("https://www.zillow.com/homes/for_rent/");const c=await e.cookies(),[o]=c.filter(n=>n.name==="_pxvid"),[s]=c.filter(n=>n.name==="_px3");return o?(await S(),[o,s].filter(Boolean).map(n=>`${n.name}=${n.value}`).join("; ")):(console.log("refetching zillow cookie"),await new Promise(n=>setTimeout(n,2e3)),await T(t+1))},I=async(t=0)=>{const e=await E.connect({browserURL:G});t===0&&(await _("https://www.redfin.com"),await new Promise(n=>setTimeout(n,3e3)));const c=await e.pages(),o=c?.[0]?await c[0].title():"",s=(await e.cookies()).filter(n=>n.domain.includes("redfin.com"));return s.some(n=>n.name==="aws-waf-token")&&o.includes("Redfin")?(await S(),s.map(n=>`${n.name}=${n.value}`).join("; ")):(console.log("refetching redfin cookie"),await new Promise(n=>setTimeout(n,2e3)),await I(t+1))},U=async()=>{try{const t=await I();t&&await M("redfin",{redfinCookie:t})}catch(t){const{status:e,message:c}=L(t);console.error(e,c)}},Q=async()=>{try{const t=await(await E.connect({browserURL:G})).cookies(),[e]=t.filter(o=>o.name==="_pxvid"),[c]=t.filter(o=>o.name==="_px3");if(e){const o=[e,c].filter(Boolean).map(s=>`${s.name}=${s.value}`).join("; ");return await M("zillow",{zillowCookie:o}),o}}catch(t){const{status:e,message:c}=L(t);console.error(e,c)}},Z=async()=>{try{const t=await T();t&&await M("zillow",{zillowCookie:t})}catch(t){const{status:e,message:c}=L(t);console.error(e,c)}};function ce(t){return t&&t.__esModule&&Object.prototype.hasOwnProperty.call(t,"default")?t.default:t}var D,X;function le(){if(X)return D;X=1;function t(o,s){var n=o;s.slice(0,-1).forEach(function(f){n=n[f]||{}});var p=s[s.length-1];return p in n}function e(o){return typeof o=="number"||/^0x[0-9a-f]+$/i.test(o)?!0:/^[-+]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(o)}function c(o,s){return s==="constructor"&&typeof o[s]=="function"||s==="__proto__"}return D=function(o,s){s||(s={});var n={bools:{},strings:{},unknownFn:null};typeof s.unknown=="function"&&(n.unknownFn=s.unknown),typeof s.boolean=="boolean"&&s.boolean?n.allBools=!0:[].concat(s.boolean).filter(Boolean).forEach(function(r){n.bools[r]=!0});var p={};function f(r){return p[r].some(function(d){return n.bools[d]})}Object.keys(s.alias||{}).forEach(function(r){p[r]=[].concat(s.alias[r]),p[r].forEach(function(d){p[d]=[r].concat(p[r].filter(function(C){return d!==C}))})}),[].concat(s.string).filter(Boolean).forEach(function(r){n.strings[r]=!0,p[r]&&[].concat(p[r]).forEach(function(d){n.strings[d]=!0})});var i=s.default||{},a={_:[]};function A(r,d){return n.allBools&&/^--[^=]+$/.test(d)||n.strings[r]||n.bools[r]||p[r]}function P(r,d,C){for(var u=r,F=0;F<d.length-1;F++){var y=d[F];if(c(u,y))return;u[y]===void 0&&(u[y]={}),(u[y]===Object.prototype||u[y]===Number.prototype||u[y]===String.prototype)&&(u[y]={}),u[y]===Array.prototype&&(u[y]=[]),u=u[y]}var B=d[d.length-1];c(u,B)||((u===Object.prototype||u===Number.prototype||u===String.prototype)&&(u={}),u===Array.prototype&&(u=[]),u[B]===void 0||n.bools[B]||typeof u[B]=="boolean"?u[B]=C:Array.isArray(u[B])?u[B].push(C):u[B]=[u[B],C])}function h(r,d,C){if(!(C&&n.unknownFn&&!A(r,C)&&n.unknownFn(C)===!1)){var u=!n.strings[r]&&e(d)?Number(d):d;P(a,r.split("."),u),(p[r]||[]).forEach(function(F){P(a,F.split("."),u)})}}Object.keys(n.bools).forEach(function(r){h(r,i[r]===void 0?!1:i[r])});var z=[];o.indexOf("--")!==-1&&(z=o.slice(o.indexOf("--")+1),o=o.slice(0,o.indexOf("--")));for(var m=0;m<o.length;m++){var l=o[m],w,v;if(/^--.+=/.test(l)){var q=l.match(/^--([^=]+)=([\s\S]*)$/);w=q[1];var R=q[2];n.bools[w]&&(R=R!=="false"),h(w,R,l)}else if(/^--no-.+/.test(l))w=l.match(/^--no-(.+)/)[1],h(w,!1,l);else if(/^--.+/.test(l))w=l.match(/^--(.+)/)[1],v=o[m+1],v!==void 0&&!/^(-|--)[^-]/.test(v)&&!n.bools[w]&&!n.allBools&&(!p[w]||!f(w))?(h(w,v,l),m+=1):/^(true|false)$/.test(v)?(h(w,v==="true",l),m+=1):h(w,n.strings[w]?"":!0,l);else if(/^-[^-]+/.test(l)){for(var g=l.slice(1,-1).split(""),O=!1,b=0;b<g.length;b++){if(v=l.slice(b+2),v==="-"){h(g[b],v,l);continue}if(/[A-Za-z]/.test(g[b])&&v[0]==="="){h(g[b],v.slice(1),l),O=!0;break}if(/[A-Za-z]/.test(g[b])&&/-?\d+(\.\d*)?(e-?\d+)?$/.test(v)){h(g[b],v,l),O=!0;break}if(g[b+1]&&g[b+1].match(/\W/)){h(g[b],l.slice(b+2),l),O=!0;break}else h(g[b],n.strings[g[b]]?"":!0,l)}w=l.slice(-1)[0],!O&&w!=="-"&&(o[m+1]&&!/^(-|--)[^-]/.test(o[m+1])&&!n.bools[w]&&(!p[w]||!f(w))?(h(w,o[m+1],l),m+=1):o[m+1]&&/^(true|false)$/.test(o[m+1])?(h(w,o[m+1]==="true",l),m+=1):h(w,n.strings[w]?"":!0,l))}else if((!n.unknownFn||n.unknownFn(l)!==!1)&&a._.push(n.strings._||!e(l)?l:Number(l)),s.stopEarly){a._.push.apply(a._,o.slice(m+1));break}}return Object.keys(i).forEach(function(r){t(a,r.split("."))||(P(a,r.split("."),i[r]),(p[r]||[]).forEach(function(d){P(a,d.split("."),i[r])}))}),s["--"]?a["--"]=z.slice():z.forEach(function(r){a._.push(r)}),a},D}var ue=le();const we=ce(ue),Y=1e4,K=async t=>{await t.keyboard.press("Escape"),await new Promise(e=>setTimeout(e,500)),await t.keyboard.press("Tab"),await new Promise(e=>setTimeout(e,200)),await t.keyboard.down("Space"),await new Promise(e=>setTimeout(e,Y)),await t.keyboard.up("Space")},fe=async()=>{try{const t=await $();if(!t)return!1;const e=(await t.pages())?.[0];if(!e)return!1;if(!(await e.title()).includes("denied"))return!0;await e.deleteCookie({name:"pxcts",domain:".zillow.com"});for(let c=0;c<5;c++){c>0&&await new Promise(o=>setTimeout(o,5e3));try{const o=await e.$("#px-captcha");if(o){const s=await o.boundingBox();s?(await e.mouse.move(s.x+s.width/2,s.y+s.height/2),await e.mouse.down(),await new Promise(n=>setTimeout(n,Y)),await e.mouse.up()):await K(e)}else await K(e)}catch{await K(e)}if(await new Promise(o=>setTimeout(o,1e3)),!(await e.title()).includes("denied"))return!0}return!1}catch{return!1}};function de(t="zillow"){const e=j(),c=process.env.HOST??"127.0.0.1",o=process.env.PORT??8082;e.use(j.json());const s=we(process.argv.slice(2)).debug,n=e.listen(Number(o),async()=>{await N(t);const f=setInterval(async()=>{(await $())?.connected&&(clearInterval(f),s&&console.log("Browser listening at 127.0.0.1:9222"),t==="zillow"?await Z():t==="redfin"&&(await U(),await x()))},1e3);s&&console.log(`Server listening at ${c}:${o}`)}),p=async()=>(await x(),n.close(f=>{s&&console.log("server closed"),process.exit(f?1:0)}),{status:"shutdown"});return e.get("/server",(f,i)=>{try{i.send({running:!0})}catch(a){i.send(a)}}),e.post("/browser/launch",async(f,i)=>{try{const a=await N();i.send({browser:a})}catch(a){i.send(a)}}),e.post("/browser/status",async(f,i)=>{try{const a=await J();i.send({browser:a})}catch(a){i.send(a)}}),e.post("/browser/open",async(f,i)=>{try{const{url:a}=f?.body??{},A=await _(a);i.send({browser:A})}catch(a){i.send(a)}}),e.get("/cookie",async(f,i)=>{try{const a=await T();i.send({cookie:a})}catch(a){i.send(a)}}),e.post("/cookie/refresh",async(f,i)=>{try{const a=await Q();i.send({cookie:a})}catch(a){i.send(a)}}),e.post("/cookie/save",async(f,i)=>{try{const a=await Z();i.send({cookie:a})}catch(a){i.send(a)}}),e.post("/cookie/redfin/save",async(f,i)=>{try{const a=await U();i.send({cookie:a})}catch(a){i.send(a)}}),e.post("/captcha/solve",async(f,i)=>{try{const a=await fe();i.send({solved:a})}catch(a){i.send(a)}}),e.post("/browser/close",async(f,i)=>{try{const a=await S();i.send({browser:a})}catch(a){i.send(a)}}),e.post("/server/shutdown",(f,i)=>{try{const a=p();i.send({server:a})}catch(a){i.send(a)}}),process.on("SIGINT",p),process.on("SIGTERM",p),e}export{J as a,T as b,S as c,I as d,U as e,Z as f,$ as g,de as h,N as l,_ as o,Q as r,x as s,re as w};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@rent-scraper/browser-server",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.30",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"exports": {
|
|
6
6
|
".": {
|
|
@@ -26,8 +26,8 @@
|
|
|
26
26
|
"express": "^4.21.2",
|
|
27
27
|
"picocolors": "^1.1.1",
|
|
28
28
|
"puppeteer": "^24.11.2",
|
|
29
|
-
"@rent-scraper/utils": "1.0.
|
|
30
|
-
"@rent-scraper/api": "1.0.
|
|
29
|
+
"@rent-scraper/utils": "1.0.30",
|
|
30
|
+
"@rent-scraper/api": "1.0.30"
|
|
31
31
|
},
|
|
32
32
|
"devDependencies": {
|
|
33
33
|
"@types/express": "^5.0.3",
|
package/src/browser.ts
CHANGED
|
@@ -3,7 +3,7 @@ import { access, mkdtemp } from 'fs/promises'
|
|
|
3
3
|
import { exec } from 'child_process'
|
|
4
4
|
import { tmpdir } from 'os'
|
|
5
5
|
import path from 'path'
|
|
6
|
-
import { checkForConfigFile, waitForConfigFile } from '@rent-scraper/utils/config'
|
|
6
|
+
import { checkForConfigFile, waitForConfigFile, getValueFromConfigFile } from '@rent-scraper/utils/config'
|
|
7
7
|
import type { ListingsSource } from '@rent-scraper/api'
|
|
8
8
|
|
|
9
9
|
const nativeBrowserPaths: Partial<Record<NodeJS.Platform, string[]>> = {
|
|
@@ -82,7 +82,10 @@ export const launchBrowser = async (source = 'zillow' as ListingsSource) => {
|
|
|
82
82
|
await waitForConfigFile(source)
|
|
83
83
|
}
|
|
84
84
|
const userDataDir = await mkdtemp(path.join(tmpdir(), 'chrome-remote-'))
|
|
85
|
-
const
|
|
85
|
+
const browserPref = await getValueFromConfigFile(source, 'browser') as string | null
|
|
86
|
+
const nativePath = browserPref
|
|
87
|
+
? (nativeBrowserPaths[process.platform] ?? []).find(p => p.toLowerCase().includes(browserPref.toLowerCase())) ?? await findNativeBrowser()
|
|
88
|
+
: await findNativeBrowser()
|
|
86
89
|
console.log(`launching browser: ${nativePath ?? 'bundled chromium'}`)
|
|
87
90
|
if (nativePath) {
|
|
88
91
|
exec(`"${nativePath}" --remote-debugging-port=9222 --no-first-run --no-default-browser-check --user-data-dir="${userDataDir}"`, (error) => {
|
|
@@ -101,6 +104,22 @@ export const launchBrowser = async (source = 'zillow' as ListingsSource) => {
|
|
|
101
104
|
return { status: 'launched' }
|
|
102
105
|
}
|
|
103
106
|
|
|
107
|
+
export const getBrowserStatus = async () => {
|
|
108
|
+
const browser = await getBrowser()
|
|
109
|
+
if (browser) {
|
|
110
|
+
const pages = await browser.pages()
|
|
111
|
+
const page = pages?.[0]
|
|
112
|
+
if (!page) return { status: 'navigated' }
|
|
113
|
+
const pageTitle = await page.title()
|
|
114
|
+
if (pageTitle.includes('denied')) {
|
|
115
|
+
return { status: 'captcha' }
|
|
116
|
+
}
|
|
117
|
+
return { status: 'navigated' }
|
|
118
|
+
} else {
|
|
119
|
+
return { status: 'not connected' }
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
104
123
|
export const openBrowser = async (url: string) => {
|
|
105
124
|
const browser = await getBrowser()
|
|
106
125
|
if (browser) {
|
|
@@ -109,12 +128,18 @@ export const openBrowser = async (url: string) => {
|
|
|
109
128
|
const page = pages?.[0] ?? await browser.newPage()
|
|
110
129
|
const pageTitle = await page.title()
|
|
111
130
|
// do not change page if captcha is showing
|
|
112
|
-
if (
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
131
|
+
if (pageTitle.includes('denied')) {
|
|
132
|
+
return { status: 'captcha' }
|
|
133
|
+
}
|
|
134
|
+
await page.goto(pageUrl, {
|
|
135
|
+
waitUntil: 'load',
|
|
136
|
+
})
|
|
137
|
+
// check title after navigation — zillow may have shown a captcha as a result of the request
|
|
138
|
+
const titleAfterNav = await page.title()
|
|
139
|
+
if (titleAfterNav.includes('denied')) {
|
|
140
|
+
return { status: 'captcha' }
|
|
116
141
|
}
|
|
117
|
-
return { status: '
|
|
142
|
+
return { status: 'navigated' }
|
|
118
143
|
} else {
|
|
119
144
|
return { status: 'not connected' }
|
|
120
145
|
}
|
|
@@ -123,7 +148,15 @@ export const openBrowser = async (url: string) => {
|
|
|
123
148
|
export const shutdownBrowser = async () => {
|
|
124
149
|
const browser = await getBrowser()
|
|
125
150
|
if (browser) {
|
|
126
|
-
|
|
151
|
+
try {
|
|
152
|
+
// send Browser.close via CDP to actually terminate the browser process
|
|
153
|
+
// (browser.close() on a connected browser only disconnects the session)
|
|
154
|
+
const target = browser.target()
|
|
155
|
+
const session = await target.createCDPSession()
|
|
156
|
+
await session.send('Browser.close')
|
|
157
|
+
} catch {
|
|
158
|
+
await browser.disconnect()
|
|
159
|
+
}
|
|
127
160
|
return { status: 'closed' }
|
|
128
161
|
} else {
|
|
129
162
|
return { status: 'not connected' }
|
package/src/cookie.ts
CHANGED
|
@@ -5,29 +5,21 @@ import { parseError } from '@rent-scraper/utils'
|
|
|
5
5
|
|
|
6
6
|
const wsChromeEndpointurl = 'http://127.0.0.1:9222/json/version'
|
|
7
7
|
|
|
8
|
-
export const getZillowCookie = async (attempt = 0
|
|
8
|
+
export const getZillowCookie = async (attempt = 0): Promise<string | undefined> => {
|
|
9
9
|
const browser = await puppeteer.connect({
|
|
10
10
|
browserURL: wsChromeEndpointurl,
|
|
11
11
|
})
|
|
12
12
|
if (attempt === 0) {
|
|
13
13
|
await openBrowser('https://www.zillow.com/homes/for_rent/')
|
|
14
14
|
}
|
|
15
|
-
|
|
16
|
-
const title = pages?.[0] ? await pages[0].title() : ''
|
|
17
|
-
if (title.includes('Access to this page has been denied')) {
|
|
18
|
-
if (options?.onCaptcha) {
|
|
19
|
-
await options.onCaptcha()
|
|
20
|
-
} else {
|
|
21
|
-
await new Promise(resolve => setTimeout(resolve, 2000))
|
|
22
|
-
}
|
|
23
|
-
return await getZillowCookie(attempt + 1, options)
|
|
24
|
-
}
|
|
15
|
+
|
|
25
16
|
const allCookies = await browser.cookies()
|
|
26
|
-
const
|
|
27
|
-
const
|
|
28
|
-
|
|
17
|
+
const [pxvid] = (allCookies).filter(cookie => cookie.name === '_pxvid')
|
|
18
|
+
const [px3] = (allCookies).filter(cookie => cookie.name === '_px3')
|
|
19
|
+
|
|
20
|
+
if (pxvid) {
|
|
29
21
|
await closeBrowser()
|
|
30
|
-
return
|
|
22
|
+
return [pxvid, px3].filter(Boolean).map(c => `${c.name}=${c.value}`).join('; ')
|
|
31
23
|
} else {
|
|
32
24
|
console.log('refetching zillow cookie')
|
|
33
25
|
await new Promise(resolve => setTimeout(resolve, 2000))
|
|
@@ -68,9 +60,26 @@ export const saveRedfinCookie = async () => {
|
|
|
68
60
|
}
|
|
69
61
|
}
|
|
70
62
|
|
|
71
|
-
export const
|
|
63
|
+
export const refreshZillowCookie = async () => {
|
|
64
|
+
try {
|
|
65
|
+
const browser = await puppeteer.connect({ browserURL: wsChromeEndpointurl })
|
|
66
|
+
const allCookies = await browser.cookies()
|
|
67
|
+
const [pxvid] = allCookies.filter(c => c.name === '_pxvid')
|
|
68
|
+
const [px3] = allCookies.filter(c => c.name === '_px3')
|
|
69
|
+
if (pxvid) {
|
|
70
|
+
const zillowCookie = [pxvid, px3].filter(Boolean).map(c => `${c.name}=${c.value}`).join('; ')
|
|
71
|
+
await updateConfigFile('zillow', { zillowCookie })
|
|
72
|
+
return zillowCookie
|
|
73
|
+
}
|
|
74
|
+
} catch (error: any) {
|
|
75
|
+
const { status, message } = parseError(error)
|
|
76
|
+
console.error(status, message)
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export const saveZillowCookie = async () => {
|
|
72
81
|
try {
|
|
73
|
-
const zillowCookie = await getZillowCookie(
|
|
82
|
+
const zillowCookie = await getZillowCookie()
|
|
74
83
|
if (zillowCookie) {
|
|
75
84
|
await updateConfigFile('zillow', { zillowCookie })
|
|
76
85
|
}
|
package/src/server.ts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import express from 'express'
|
|
2
2
|
import minimist from 'minimist'
|
|
3
|
-
import { getZillowCookie, saveZillowCookie, saveRedfinCookie } from './cookie.js'
|
|
4
|
-
import { launchBrowser, closeBrowser, getBrowser, shutdownBrowser, openBrowser } from './browser.js'
|
|
3
|
+
import { getZillowCookie, saveZillowCookie, saveRedfinCookie, refreshZillowCookie } from './cookie.js'
|
|
4
|
+
import { launchBrowser, closeBrowser, getBrowser, shutdownBrowser, openBrowser, getBrowserStatus } from './browser.js'
|
|
5
|
+
import { solveZillowCaptcha } from './solve-captcha.js'
|
|
5
6
|
import type { ListingsSource } from '@rent-scraper/api'
|
|
6
7
|
|
|
7
8
|
export function runBrowserServer(source: ListingsSource = 'zillow') {
|
|
@@ -35,13 +36,10 @@ export function runBrowserServer(source: ListingsSource = 'zillow') {
|
|
|
35
36
|
}
|
|
36
37
|
})
|
|
37
38
|
|
|
38
|
-
const shutdownServer = () => {
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
if (debug)
|
|
42
|
-
console.log(browser)
|
|
43
|
-
console.log('server closed')
|
|
44
|
-
}
|
|
39
|
+
const shutdownServer = async () => {
|
|
40
|
+
await shutdownBrowser()
|
|
41
|
+
server.close((err) => {
|
|
42
|
+
if (debug) console.log('server closed')
|
|
45
43
|
process.exit(err ? 1 : 0)
|
|
46
44
|
})
|
|
47
45
|
return { status: 'shutdown' }
|
|
@@ -64,6 +62,15 @@ export function runBrowserServer(source: ListingsSource = 'zillow') {
|
|
|
64
62
|
}
|
|
65
63
|
})
|
|
66
64
|
|
|
65
|
+
app.post('/browser/status', async (_req, res) => {
|
|
66
|
+
try {
|
|
67
|
+
const browser = await getBrowserStatus()
|
|
68
|
+
res.send({ browser })
|
|
69
|
+
} catch (error) {
|
|
70
|
+
res.send(error)
|
|
71
|
+
}
|
|
72
|
+
})
|
|
73
|
+
|
|
67
74
|
app.post('/browser/open', async (req, res) => {
|
|
68
75
|
try {
|
|
69
76
|
const { url } = req?.body ?? {}
|
|
@@ -83,6 +90,15 @@ export function runBrowserServer(source: ListingsSource = 'zillow') {
|
|
|
83
90
|
}
|
|
84
91
|
})
|
|
85
92
|
|
|
93
|
+
app.post('/cookie/refresh', async (_req, res) => {
|
|
94
|
+
try {
|
|
95
|
+
const cookie = await refreshZillowCookie()
|
|
96
|
+
res.send({ cookie })
|
|
97
|
+
} catch (error) {
|
|
98
|
+
res.send(error)
|
|
99
|
+
}
|
|
100
|
+
})
|
|
101
|
+
|
|
86
102
|
app.post('/cookie/save', async (_req, res) => {
|
|
87
103
|
try {
|
|
88
104
|
const cookie = await saveZillowCookie()
|
|
@@ -101,6 +117,15 @@ export function runBrowserServer(source: ListingsSource = 'zillow') {
|
|
|
101
117
|
}
|
|
102
118
|
})
|
|
103
119
|
|
|
120
|
+
app.post('/captcha/solve', async (_req, res) => {
|
|
121
|
+
try {
|
|
122
|
+
const solved = await solveZillowCaptcha()
|
|
123
|
+
res.send({ solved })
|
|
124
|
+
} catch (error) {
|
|
125
|
+
res.send(error)
|
|
126
|
+
}
|
|
127
|
+
})
|
|
128
|
+
|
|
104
129
|
app.post('/browser/close', async (_req, res) => {
|
|
105
130
|
try {
|
|
106
131
|
const browser = await closeBrowser()
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import type { Page } from 'puppeteer'
|
|
2
|
+
import { getBrowser } from './browser.js'
|
|
3
|
+
|
|
4
|
+
const HOLD_DURATION_MS = 10000 // PerimeterX typically requires ~8-10s hold
|
|
5
|
+
|
|
6
|
+
const pressAndHoldSpace = async (page: Page) => {
|
|
7
|
+
await page.keyboard.press('Escape')
|
|
8
|
+
await new Promise(resolve => setTimeout(resolve, 500))
|
|
9
|
+
await page.keyboard.press('Tab')
|
|
10
|
+
await new Promise(resolve => setTimeout(resolve, 200))
|
|
11
|
+
await page.keyboard.down('Space')
|
|
12
|
+
await new Promise(resolve => setTimeout(resolve, HOLD_DURATION_MS))
|
|
13
|
+
await page.keyboard.up('Space')
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export const solveZillowCaptcha = async (): Promise<boolean> => {
|
|
17
|
+
try {
|
|
18
|
+
const browser = await getBrowser()
|
|
19
|
+
if (!browser) return false
|
|
20
|
+
|
|
21
|
+
const pages = await browser.pages()
|
|
22
|
+
const page = pages?.[0]
|
|
23
|
+
if (!page) return false
|
|
24
|
+
|
|
25
|
+
const title = await page.title()
|
|
26
|
+
if (!title.includes('denied')) return true // no captcha present
|
|
27
|
+
|
|
28
|
+
// Delete pxcts cookie — resets the challenge state so a fresh attempt starts clean
|
|
29
|
+
await page.deleteCookie({ name: 'pxcts', domain: '.zillow.com' })
|
|
30
|
+
|
|
31
|
+
for (let attempt = 0; attempt < 5; attempt++) {
|
|
32
|
+
if (attempt > 0) {
|
|
33
|
+
// wait between attempts
|
|
34
|
+
await new Promise(resolve => setTimeout(resolve, 5000))
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
try {
|
|
38
|
+
// Try mouse hold on the captcha element first
|
|
39
|
+
const captchaEl = await page.$('#px-captcha')
|
|
40
|
+
if (captchaEl) {
|
|
41
|
+
const box = await captchaEl.boundingBox()
|
|
42
|
+
if (box) {
|
|
43
|
+
await page.mouse.move(box.x + box.width / 2, box.y + box.height / 2)
|
|
44
|
+
await page.mouse.down()
|
|
45
|
+
await new Promise(resolve => setTimeout(resolve, HOLD_DURATION_MS))
|
|
46
|
+
await page.mouse.up()
|
|
47
|
+
} else {
|
|
48
|
+
// element found but no bounding box — fall back to keyboard
|
|
49
|
+
await pressAndHoldSpace(page)
|
|
50
|
+
}
|
|
51
|
+
} else {
|
|
52
|
+
// no element found — fall back to keyboard Tab + Space
|
|
53
|
+
await pressAndHoldSpace(page)
|
|
54
|
+
}
|
|
55
|
+
} catch {
|
|
56
|
+
// any puppeteer error — try keyboard as last resort
|
|
57
|
+
await pressAndHoldSpace(page)
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// give the page a moment to react
|
|
61
|
+
await new Promise(resolve => setTimeout(resolve, 1000))
|
|
62
|
+
const currentTitle = await page.title()
|
|
63
|
+
if (!currentTitle.includes('denied')) return true
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return false
|
|
67
|
+
} catch {
|
|
68
|
+
return false
|
|
69
|
+
}
|
|
70
|
+
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
import Z from"express";import O from"puppeteer";import{mkdtemp as J,access as Q}from"fs/promises";import{exec as V}from"child_process";import{tmpdir as X}from"os";import Y from"path";import{checkForConfigFile as x,waitForConfigFile as k,updateConfigFile as K}from"@rent-scraper/utils/config";import{parseError as q}from"@rent-scraper/utils";const oo={darwin:["/Applications/Brave Browser.app/Contents/MacOS/Brave Browser","/Applications/Google Chrome.app/Contents/MacOS/Google Chrome","/Applications/Chromium.app/Contents/MacOS/Chromium","/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"],linux:["/usr/bin/brave-browser","/usr/bin/google-chrome","/usr/bin/google-chrome-stable","/usr/bin/chromium-browser","/usr/bin/chromium","/snap/bin/chromium"],win32:["C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe","C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe","C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"]},eo=async()=>{const s=oo[process.platform]??[];for(const e of s)try{return await Q(e),e}catch{}return null},H="http://127.0.0.1:9222/json/version",to=async(s=15e3)=>{const e=Date.now();for(;Date.now()-e<s;)try{await O.connect({browserURL:H});return}catch{await new Promise(u=>setTimeout(u,500))}throw new Error("timed out waiting for browser")},E=async()=>{try{return await O.connect({browserURL:H})}catch{return null}},P=async()=>{const s=await(await E())?.pages();s&&await Promise.all(s.map(e=>e.close()))},T=async(s="zillow")=>{if(await E())return{status:"already launched"};await x(s)||await k(s);const e=await J(Y.join(X(),"chrome-remote-")),u=await eo();if(console.log(`launching browser: ${u??"bundled chromium"}`),u)V(`"${u}" --remote-debugging-port=9222 --no-first-run --no-default-browser-check --user-data-dir="${e}"`,t=>{t&&console.error(`Error: ${t.message}`)});else{const t=["--remote-debugging-port=9222","--no-first-run","--no-default-browser-check",`--user-data-dir=${e}`,...process.platform==="linux"?["--no-sandbox","--disable-setuid-sandbox"]:[]];await O.launch({headless:!1,args:t,dumpio:!1})}return{status:"launched"}},S=async s=>{const e=await E();if(e){const u=s,t=(await e.pages())?.[0]??await e.newPage();return(await t.title()).includes("denied")||await t.goto(u,{waitUntil:"load"}),{status:"opened"}}else return{status:"not connected"}},N=async()=>{const s=await E();return s?(await s.close(),{status:"closed"}):{status:"not connected"}},W="http://127.0.0.1:9222/json/version",A=async(s=0,e)=>{const u=await O.connect({browserURL:W});s===0&&await S("https://www.zillow.com/homes/for_rent/");const t=await u.pages();if((t?.[0]?await t[0].title():"").includes("Access to this page has been denied"))return e?.onCaptcha?await e.onCaptcha():await new Promise(o=>setTimeout(o,2e3)),await A(s+1,e);const a=(await u.cookies()).filter(o=>o.domain?.includes("zillow.com"));return a.some(o=>o.name==="_pxvid")?(await P(),a.map(o=>`${o.name}=${o.value}`).join("; ")):(console.log("refetching zillow cookie"),await new Promise(o=>setTimeout(o,2e3)),await A(s+1))},G=async(s=0)=>{const e=await O.connect({browserURL:W});s===0&&(await S("https://www.redfin.com"),await new Promise(o=>setTimeout(o,3e3)));const u=await e.pages(),t=u?.[0]?await u[0].title():"",a=(await e.cookies()).filter(o=>o.domain.includes("redfin.com"));return a.some(o=>o.name==="aws-waf-token")&&t.includes("Redfin")?(await P(),a.map(o=>`${o.name}=${o.value}`).join("; ")):(console.log("refetching redfin cookie"),await new Promise(o=>setTimeout(o,2e3)),await G(s+1))},I=async()=>{try{const s=await G();s&&await K("redfin",{redfinCookie:s})}catch(s){const{status:e,message:u}=q(s);console.error(e,u)}},U=async s=>{try{const e=await A(0,s);e&&await K("zillow",{zillowCookie:e})}catch(e){const{status:u,message:t}=q(e);console.error(u,t)}};function no(s){return s&&s.__esModule&&Object.prototype.hasOwnProperty.call(s,"default")?s.default:s}var D,j;function so(){if(j)return D;j=1;function s(t,a){var o=t;a.slice(0,-1).forEach(function(p){o=o[p]||{}});var d=a[a.length-1];return d in o}function e(t){return typeof t=="number"||/^0x[0-9a-f]+$/i.test(t)?!0:/^[-+]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(t)}function u(t,a){return a==="constructor"&&typeof t[a]=="function"||a==="__proto__"}return D=function(t,a){a||(a={});var o={bools:{},strings:{},unknownFn:null};typeof a.unknown=="function"&&(o.unknownFn=a.unknown),typeof a.boolean=="boolean"&&a.boolean?o.allBools=!0:[].concat(a.boolean).filter(Boolean).forEach(function(r){o.bools[r]=!0});var d={};function p(r){return d[r].some(function(w){return o.bools[w]})}Object.keys(a.alias||{}).forEach(function(r){d[r]=[].concat(a.alias[r]),d[r].forEach(function(w){d[w]=[r].concat(d[r].filter(function(B){return w!==B}))})}),[].concat(a.string).filter(Boolean).forEach(function(r){o.strings[r]=!0,d[r]&&[].concat(d[r]).forEach(function(w){o.strings[w]=!0})});var i=a.default||{},n={_:[]};function z(r,w){return o.allBools&&/^--[^=]+$/.test(w)||o.strings[r]||o.bools[r]||d[r]}function y(r,w,B){for(var l=r,F=0;F<w.length-1;F++){var C=w[F];if(u(l,C))return;l[C]===void 0&&(l[C]={}),(l[C]===Object.prototype||l[C]===Number.prototype||l[C]===String.prototype)&&(l[C]={}),l[C]===Array.prototype&&(l[C]=[]),l=l[C]}var $=w[w.length-1];u(l,$)||((l===Object.prototype||l===Number.prototype||l===String.prototype)&&(l={}),l===Array.prototype&&(l=[]),l[$]===void 0||o.bools[$]||typeof l[$]=="boolean"?l[$]=B:Array.isArray(l[$])?l[$].push(B):l[$]=[l[$],B])}function h(r,w,B){if(!(B&&o.unknownFn&&!z(r,B)&&o.unknownFn(B)===!1)){var l=!o.strings[r]&&e(w)?Number(w):w;y(n,r.split("."),l),(d[r]||[]).forEach(function(F){y(n,F.split("."),l)})}}Object.keys(o.bools).forEach(function(r){h(r,i[r]===void 0?!1:i[r])});var M=[];t.indexOf("--")!==-1&&(M=t.slice(t.indexOf("--")+1),t=t.slice(0,t.indexOf("--")));for(var m=0;m<t.length;m++){var c=t[m],f,v;if(/^--.+=/.test(c)){var L=c.match(/^--([^=]+)=([\s\S]*)$/);f=L[1];var R=L[2];o.bools[f]&&(R=R!=="false"),h(f,R,c)}else if(/^--no-.+/.test(c))f=c.match(/^--no-(.+)/)[1],h(f,!1,c);else if(/^--.+/.test(c))f=c.match(/^--(.+)/)[1],v=t[m+1],v!==void 0&&!/^(-|--)[^-]/.test(v)&&!o.bools[f]&&!o.allBools&&(!d[f]||!p(f))?(h(f,v,c),m+=1):/^(true|false)$/.test(v)?(h(f,v==="true",c),m+=1):h(f,o.strings[f]?"":!0,c);else if(/^-[^-]+/.test(c)){for(var g=c.slice(1,-1).split(""),_=!1,b=0;b<g.length;b++){if(v=c.slice(b+2),v==="-"){h(g[b],v,c);continue}if(/[A-Za-z]/.test(g[b])&&v[0]==="="){h(g[b],v.slice(1),c),_=!0;break}if(/[A-Za-z]/.test(g[b])&&/-?\d+(\.\d*)?(e-?\d+)?$/.test(v)){h(g[b],v,c),_=!0;break}if(g[b+1]&&g[b+1].match(/\W/)){h(g[b],c.slice(b+2),c),_=!0;break}else h(g[b],o.strings[g[b]]?"":!0,c)}f=c.slice(-1)[0],!_&&f!=="-"&&(t[m+1]&&!/^(-|--)[^-]/.test(t[m+1])&&!o.bools[f]&&(!d[f]||!p(f))?(h(f,t[m+1],c),m+=1):t[m+1]&&/^(true|false)$/.test(t[m+1])?(h(f,t[m+1]==="true",c),m+=1):h(f,o.strings[f]?"":!0,c))}else if((!o.unknownFn||o.unknownFn(c)!==!1)&&n._.push(o.strings._||!e(c)?c:Number(c)),a.stopEarly){n._.push.apply(n._,t.slice(m+1));break}}return Object.keys(i).forEach(function(r){s(n,r.split("."))||(y(n,r.split("."),i[r]),(d[r]||[]).forEach(function(w){y(n,w.split("."),i[r])}))}),a["--"]?n["--"]=M.slice():M.forEach(function(r){n._.push(r)}),n},D}var ro=so();const ao=no(ro);function io(s="zillow"){const e=Z(),u=process.env.HOST??"127.0.0.1",t=process.env.PORT??8082;e.use(Z.json());const a=ao(process.argv.slice(2)).debug,o=e.listen(Number(t),async()=>{await T(s);const p=setInterval(async()=>{(await E())?.connected&&(clearInterval(p),a&&console.log("Browser listening at 127.0.0.1:9222"),s==="zillow"?await U():s==="redfin"&&(await I(),await N()))},1e3);a&&console.log(`Server listening at ${u}:${t}`)}),d=()=>(o.close(async p=>{const i=await N();a&&(console.log(i),console.log("server closed")),process.exit(p?1:0)}),{status:"shutdown"});return e.get("/server",(p,i)=>{try{i.send({running:!0})}catch(n){i.send(n)}}),e.post("/browser/launch",async(p,i)=>{try{const n=await T();i.send({browser:n})}catch(n){i.send(n)}}),e.post("/browser/open",async(p,i)=>{try{const{url:n}=p?.body??{},z=await S(n);i.send({browser:z})}catch(n){i.send(n)}}),e.get("/cookie",async(p,i)=>{try{const n=await A();i.send({cookie:n})}catch(n){i.send(n)}}),e.post("/cookie/save",async(p,i)=>{try{const n=await U();i.send({cookie:n})}catch(n){i.send(n)}}),e.post("/cookie/redfin/save",async(p,i)=>{try{const n=await I();i.send({cookie:n})}catch(n){i.send(n)}}),e.post("/browser/close",async(p,i)=>{try{const n=await P();i.send({browser:n})}catch(n){i.send(n)}}),e.post("/server/shutdown",(p,i)=>{try{const n=d();i.send({server:n})}catch(n){i.send(n)}}),process.on("SIGINT",d),process.on("SIGTERM",d),e}export{A as a,G as b,P as c,I as d,U as e,E as g,T as l,S as o,io as r,N as s,to as w};
|