@rent-scraper/browser-server 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs ADDED
@@ -0,0 +1 @@
1
+ export{c as closeBrowser,g as getBrowser,a as getZillowCookie,l as launchBrowser,o as openBrowser,r as runBrowserServer,b as saveZillowCookie,s as shutdownBrowser}from"./shared/browser-server.CTHRk1um.mjs";import{confirm as r,isCancel as a,cancel as s,outro as t}from"@clack/prompts";import{setTimeout as o}from"node:timers/promises";import"express";import"puppeteer";import"child_process";import"@rent-scraper/utils/config";import"@rent-scraper/utils";async function i(){const e=await r({message:"We need to launch your browser to continue",active:"OK",inactive:"Cancel"});if(a(e)||!e)return s("Create config canceled. Please try again."),process.exit(1);await o(1e3),t("Browser Launching..."),await o(1e3)}export{i as runConfirmBrowserLaunch};
@@ -0,0 +1 @@
1
+ import G from"express";import P from"puppeteer";import{exec as D}from"child_process";import{checkForConfigFile as Z,waitForConfigFile as L,getValueFromConfigFile as H,updateConfigFile as V}from"@rent-scraper/utils/config";import{parseError as W}from"@rent-scraper/utils";const J="http://127.0.0.1:9222/json/version",F=async()=>{try{return await P.connect({browserURL:J})}catch{return null}},y=async()=>{(await(await F())?.pages())?.forEach(async o=>o.close())},I=async(o="zillow")=>{if(await F())return{status:"already launched"};{await Z(o)||await L(o),await Z(o);const w=await H(o,"browser")==="brave"?"/Applications/Brave\\ Browser.app/Contents/MacOS/Brave\\ Browser":"/Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome";return D(`${w} --remote-debugging-port=9222 --no-first-run --no-default-browser-check --user-data-dir=$(mktemp -d -t 'chrome-remote_data_dir')`,(d,n,c)=>{if(d){console.error(`Error: ${d.message}`);return}if(c){console.error(`Stderr: ${c}`);return}console.log(`Stdout: ${n}`)}),{status:"launched"}}},M=async(o="https://zillow.com")=>{const w=await F();if(w){const d=o,n=(await w.pages())?.[0]??await w.newPage();return(await n.title()).includes("denied")||await n.goto(d,{waitUntil:"load"}),{status:"opened"}}else return{status:"not connected"}},K=async()=>{const o=await F();return o?(await o.close(),await o.disconnect(),{status:"closed"}):{status:"not connected"}},S=async()=>{const o=await P.connect({browserURL:"http://127.0.0.1:9222/json/version"});await M("https://www.zillow.com/homes/for_rent/");const[w]=(await o.cookies()).filter(d=>d.name==="_pxvid");if(w)return await y(),w;console.log("refetching cookie"),setTimeout(async()=>{await S()},2e3)},N=async()=>{try{const{name:o,value:w}=await S()??{},d={zillowCookie:`${o}=${w}`};await V("zillow",d)}catch(o){const{status:w,message:d}=W(o);console.error(w,d)}};function Q(o){return o&&o.__esModule&&Object.prototype.hasOwnProperty.call(o,"default")?o.default:o}var R,U;function X(){if(U)return R;U=1;function o(n,c){var e=n;c.slice(0,-1).forEach(function(a){e=e[a]||{}});var l=c[c.length-1];return l in e}function w(n){return typeof n=="number"||/^0x[0-9a-f]+$/i.test(n)?!0:/^[-+]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(n)}function d(n,c){return c==="constructor"&&typeof n[c]=="function"||c==="__proto__"}return R=function(n,c){c||(c={});var e={bools:{},strings:{},unknownFn:null};typeof c.unknown=="function"&&(e.unknownFn=c.unknown),typeof c.boolean=="boolean"&&c.boolean?e.allBools=!0:[].concat(c.boolean).filter(Boolean).forEach(function(t){e.bools[t]=!0});var l={};function a(t){return l[t].some(function(f){return e.bools[f]})}Object.keys(c.alias||{}).forEach(function(t){l[t]=[].concat(c.alias[t]),l[t].forEach(function(f){l[f]=[t].concat(l[t].filter(function($){return f!==$}))})}),[].concat(c.string).filter(Boolean).forEach(function(t){e.strings[t]=!0,l[t]&&[].concat(l[t]).forEach(function(f){e.strings[f]=!0})});var r=c.default||{},b={_:[]};function q(t,f){return e.allBools&&/^--[^=]+$/.test(f)||e.strings[t]||e.bools[t]||l[t]}function g(t,f,$){for(var i=t,O=0;O<f.length-1;O++){var B=f[O];if(d(i,B))return;i[B]===void 0&&(i[B]={}),(i[B]===Object.prototype||i[B]===Number.prototype||i[B]===String.prototype)&&(i[B]={}),i[B]===Array.prototype&&(i[B]=[]),i=i[B]}var _=f[f.length-1];d(i,_)||((i===Object.prototype||i===Number.prototype||i===String.prototype)&&(i={}),i===Array.prototype&&(i=[]),i[_]===void 0||e.bools[_]||typeof i[_]=="boolean"?i[_]=$:Array.isArray(i[_])?i[_].push($):i[_]=[i[_],$])}function h(t,f,$){if(!($&&e.unknownFn&&!q(t,$)&&e.unknownFn($)===!1)){var i=!e.strings[t]&&w(f)?Number(f):f;g(b,t.split("."),i),(l[t]||[]).forEach(function(O){g(b,O.split("."),i)})}}Object.keys(e.bools).forEach(function(t){h(t,r[t]===void 0?!1:r[t])});var A=[];n.indexOf("--")!==-1&&(A=n.slice(n.indexOf("--")+1),n=n.slice(0,n.indexOf("--")));for(var p=0;p<n.length;p++){var s=n[p],u,v;if(/^--.+=/.test(s)){var T=s.match(/^--([^=]+)=([\s\S]*)$/);u=T[1];var z=T[2];e.bools[u]&&(z=z!=="false"),h(u,z,s)}else if(/^--no-.+/.test(s))u=s.match(/^--no-(.+)/)[1],h(u,!1,s);else if(/^--.+/.test(s))u=s.match(/^--(.+)/)[1],v=n[p+1],v!==void 0&&!/^(-|--)[^-]/.test(v)&&!e.bools[u]&&!e.allBools&&(!l[u]||!a(u))?(h(u,v,s),p+=1):/^(true|false)$/.test(v)?(h(u,v==="true",s),p+=1):h(u,e.strings[u]?"":!0,s);else if(/^-[^-]+/.test(s)){for(var E=s.slice(1,-1).split(""),C=!1,m=0;m<E.length;m++){if(v=s.slice(m+2),v==="-"){h(E[m],v,s);continue}if(/[A-Za-z]/.test(E[m])&&v[0]==="="){h(E[m],v.slice(1),s),C=!0;break}if(/[A-Za-z]/.test(E[m])&&/-?\d+(\.\d*)?(e-?\d+)?$/.test(v)){h(E[m],v,s),C=!0;break}if(E[m+1]&&E[m+1].match(/\W/)){h(E[m],s.slice(m+2),s),C=!0;break}else h(E[m],e.strings[E[m]]?"":!0,s)}u=s.slice(-1)[0],!C&&u!=="-"&&(n[p+1]&&!/^(-|--)[^-]/.test(n[p+1])&&!e.bools[u]&&(!l[u]||!a(u))?(h(u,n[p+1],s),p+=1):n[p+1]&&/^(true|false)$/.test(n[p+1])?(h(u,n[p+1]==="true",s),p+=1):h(u,e.strings[u]?"":!0,s))}else if((!e.unknownFn||e.unknownFn(s)!==!1)&&b._.push(e.strings._||!w(s)?s:Number(s)),c.stopEarly){b._.push.apply(b._,n.slice(p+1));break}}return Object.keys(r).forEach(function(t){o(b,t.split("."))||(g(b,t.split("."),r[t]),(l[t]||[]).forEach(function(f){g(b,f.split("."),r[t])}))}),c["--"]?b["--"]=A.slice():A.forEach(function(t){b._.push(t)}),b},R}var Y=X();const j=Q(Y);function x(){const o=G(),w=process.env.HOST??"127.0.0.1",d=process.env.PORT??8082;o.use(G.json());const n=j(process.argv.slice(2)).debug,c=o.listen(8082,async()=>{await I();const l=setInterval(async()=>{(await F())?.connected&&(clearInterval(l),n&&console.log("Browser listening at 127.0.0.1:9222"),await N())},1e3);n&&console.log(`Server listening at ${w}:${d}`)}),e=()=>(c.close(async l=>{const a=await K();n&&(console.log(a),console.log("server closed")),process.exit(l?1:0)}),{status:"shutdown"});return o.get("/server",(l,a)=>{try{a.send({running:!0})}catch(r){a.send(r)}}),o.post("/browser/launch",async(l,a)=>{try{const r=await I();a.send({browser:r})}catch(r){a.send(r)}}),o.post("/browser/open",async(l,a)=>{try{const{url:r}=l?.body??{},b=await M(r);a.send({browser:b})}catch(r){a.send(r)}}),o.get("/cookie",async(l,a)=>{try{const r=await S();a.send({cookie:r})}catch(r){a.send(r)}}),o.get("/cookie/save",async(l,a)=>{try{const r=await N();a.send({cookie:r})}catch(r){a.send(r)}}),o.post("/browser/close",async(l,a)=>{try{const r=await y();a.send({browser:r})}catch(r){a.send(r)}}),o.post("/server/shutdown",(l,a)=>{try{const r=e();a.send({server:r})}catch(r){a.send(r)}}),process.on("SIGINT",e),process.on("SIGTERM",e),o}export{S as a,N as b,y as c,F as g,I as l,M as o,x as r,K as s};
@@ -0,0 +1,2 @@
1
+ import config from '../../eslint.config.js'
2
+ export default config
package/package.json ADDED
@@ -0,0 +1,54 @@
1
+ {
2
+ "name": "@rent-scraper/browser-server",
3
+ "version": "1.0.0",
4
+ "type": "module",
5
+ "exports": {
6
+ ".": {
7
+ "types": "./dist/index.d.mts",
8
+ "default": "./dist/index.mjs"
9
+ },
10
+ "./bin/run-browser-server": {
11
+ "types": "./dist/bin/run-browser-server.d.mts",
12
+ "default": "./dist/bin/run-browser-server.mjs"
13
+ }
14
+ },
15
+ "author": "Max Stein <maxwell.stein@gmail.com> (https://maxstein.net)",
16
+ "license": "MIT",
17
+ "description": "Local browser server to help scrape data for rental listings",
18
+ "scripts": {
19
+ "dev": "tsx ./src/bin/run-browser-server.ts",
20
+ "build": "unbuild",
21
+ "start": "node ./dist/bin/run-browser-server.mjs",
22
+ "cli:dev": "tsx src/cli.ts",
23
+ "cli": "node dist/cli.mjs",
24
+ "typecheck": "tsc --noEmit",
25
+ "lint": "eslint .",
26
+ "lint:fix": "eslint --fix .",
27
+ "prepublish": "npm run lint && npm run build"
28
+ },
29
+ "repository": {
30
+ "type": "git",
31
+ "url": "git+https://github.com/rent-brigade/rent-scraper.git",
32
+ "directory": "packages/browser-server"
33
+ },
34
+ "dependencies": {
35
+ "@clack/prompts": "alpha",
36
+ "@rent-scraper/api": "workspace:*",
37
+ "@rent-scraper/utils": "workspace:*",
38
+ "bumpp": "^10.2.3",
39
+ "express": "^4.21.2",
40
+ "picocolors": "^1.1.1",
41
+ "puppeteer": "^24.11.2"
42
+ },
43
+ "devDependencies": {
44
+ "@types/express": "^5.0.3",
45
+ "@types/qs": "^6.14.0",
46
+ "@types/range-parser": "^1.2.7",
47
+ "@types/send": "^0.17.5",
48
+ "tsx": "^4.20.3",
49
+ "unbuild": "^3.5.0"
50
+ },
51
+ "engines": {
52
+ "node": "22.x"
53
+ }
54
+ }
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env node
2
+ import { runBrowserServer } from '../server.js'
3
+
4
+ runBrowserServer()
package/src/browser.ts ADDED
@@ -0,0 +1,79 @@
1
+ import puppeteer from 'puppeteer'
2
+ import { exec } from 'child_process'
3
+ import { checkForConfigFile, getValueFromConfigFile, waitForConfigFile } from '@rent-scraper/utils/config'
4
+ import type { ListingsSource } from '@rent-scraper/api'
5
+
6
+ const wsChromeEndpointurl = 'http://127.0.0.1:9222/json/version'
7
+
8
+ export const getBrowser = async () => {
9
+ try {
10
+ const browser = await puppeteer.connect({
11
+ browserURL: wsChromeEndpointurl,
12
+ })
13
+ return browser
14
+ } catch {
15
+ return null
16
+ }
17
+ }
18
+
19
+ export const closeBrowser = async () => {
20
+ const browser = await getBrowser()
21
+ const pages = await browser?.pages()
22
+ pages?.forEach(async page => page.close())
23
+ }
24
+
25
+ export const launchBrowser = async (source = 'zillow' as ListingsSource) => {
26
+ const browser = await getBrowser()
27
+ if (browser) {
28
+ return { status: 'already launched' }
29
+ } else {
30
+ if (!await checkForConfigFile(source)) {
31
+ await waitForConfigFile(source)
32
+ }
33
+ await checkForConfigFile(source)
34
+ const browserKey = await getValueFromConfigFile(source, 'browser')
35
+ const browserPath = browserKey === 'brave' ? '/Applications/Brave\\ Browser.app/Contents/MacOS/Brave\\ Browser' : '/Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome'
36
+ exec(`${browserPath} --remote-debugging-port=9222 --no-first-run --no-default-browser-check --user-data-dir=$(mktemp -d -t 'chrome-remote_data_dir')`, (error, stdout, stderr) => {
37
+ if (error) {
38
+ console.error(`Error: ${error.message}`)
39
+ return
40
+ }
41
+ if (stderr) {
42
+ console.error(`Stderr: ${stderr}`)
43
+ return
44
+ }
45
+ console.log(`Stdout: ${stdout}`)
46
+ })
47
+ return { status: 'launched' }
48
+ }
49
+ }
50
+
51
+ export const openBrowser = async (url = 'https://zillow.com') => {
52
+ const browser = await getBrowser()
53
+ if (browser) {
54
+ const pageUrl = url
55
+ const pages = await browser.pages()
56
+ const page = pages?.[0] ?? await browser.newPage()
57
+ const pageTitle = await page.title()
58
+ // do not change page if captcha is showing
59
+ if (!pageTitle.includes('denied')) {
60
+ await page.goto(pageUrl, {
61
+ waitUntil: 'load',
62
+ })
63
+ }
64
+ return { status: 'opened' }
65
+ } else {
66
+ return { status: 'not connected' }
67
+ }
68
+ }
69
+
70
+ export const shutdownBrowser = async () => {
71
+ const browser = await getBrowser()
72
+ if (browser) {
73
+ await browser.close()
74
+ await browser.disconnect()
75
+ return { status: 'closed' }
76
+ } else {
77
+ return { status: 'not connected' }
78
+ }
79
+ }
@@ -0,0 +1,26 @@
1
+ import {
2
+ outro,
3
+ confirm,
4
+ isCancel,
5
+ cancel,
6
+ } from '@clack/prompts'
7
+ import { setTimeout as sleep } from 'node:timers/promises'
8
+
9
+ export async function runConfirmBrowserLaunch() {
10
+ const shouldContinue = await confirm({
11
+ message: 'We need to launch your browser to continue',
12
+ active: 'OK',
13
+ inactive: 'Cancel',
14
+ })
15
+
16
+ if (isCancel(shouldContinue) || !shouldContinue) {
17
+ cancel('Create config canceled. Please try again.')
18
+ return process.exit(1)
19
+ }
20
+
21
+ await sleep(1000)
22
+
23
+ outro('Browser Launching...')
24
+
25
+ await sleep(1000)
26
+ }
package/src/cookie.ts ADDED
@@ -0,0 +1,39 @@
1
+ import puppeteer from 'puppeteer'
2
+ import { closeBrowser, openBrowser } from './browser.js'
3
+ import { updateConfigFile } from '@rent-scraper/utils/config'
4
+ import { parseError } from '@rent-scraper/utils'
5
+
6
+ export const getZillowCookie = async () => {
7
+ const wsChromeEndpointurl = 'http://127.0.0.1:9222/json/version'
8
+ const browser = await puppeteer.connect({
9
+ browserURL: wsChromeEndpointurl,
10
+ })
11
+ await openBrowser('https://www.zillow.com/homes/for_rent/')
12
+ const [cookie] = (await browser.cookies()).filter(cookie => cookie.name === '_pxvid')
13
+ if (cookie) {
14
+ await closeBrowser()
15
+ return cookie
16
+ } else {
17
+ console.log('refetching cookie')
18
+ setTimeout(async () => {
19
+ await getZillowCookie()
20
+ }, 2000)
21
+ }
22
+ }
23
+
24
+ export const saveZillowCookie = async () => {
25
+ try {
26
+ const { name, value } = await getZillowCookie() ?? {}
27
+ const zillowCookie = `${name}=${value}`
28
+
29
+ const data = {
30
+ zillowCookie,
31
+ }
32
+
33
+ // update config file
34
+ await updateConfigFile('zillow', data)
35
+ } catch (error: any) {
36
+ const { status, message } = parseError(error)
37
+ console.error(status, message)
38
+ }
39
+ }
package/src/index.ts ADDED
@@ -0,0 +1,4 @@
1
+ export * from './browser.js'
2
+ export * from './confirm-browser-launch.js'
3
+ export * from './cookie.js'
4
+ export * from './server.js'
package/src/server.ts ADDED
@@ -0,0 +1,111 @@
1
+ import express from 'express'
2
+ import minimist from 'minimist'
3
+ import { getZillowCookie, saveZillowCookie } from './cookie.js'
4
+ import { launchBrowser, closeBrowser, getBrowser, shutdownBrowser, openBrowser } from './browser.js'
5
+
6
+ export function runBrowserServer() {
7
+ const app = express()
8
+ const host = process.env.HOST ?? '127.0.0.1'
9
+ const port = process.env.PORT ?? 8082
10
+ app.use(express.json())
11
+
12
+ const args = minimist(process.argv.slice(2))
13
+ const debug = args.debug
14
+
15
+ const server = app.listen(8082, async () => {
16
+ await launchBrowser()
17
+ const connecting = setInterval(async () => {
18
+ const browser = await getBrowser()
19
+ if (browser?.connected) {
20
+ clearInterval(connecting)
21
+ if (debug) {
22
+ console.log(`Browser listening at 127.0.0.1:9222`)
23
+ }
24
+ await saveZillowCookie()
25
+ }
26
+ }, 1000)
27
+ if (debug) {
28
+ console.log(`Server listening at ${host}:${port}`)
29
+ }
30
+ })
31
+
32
+ const shutdownServer = () => {
33
+ server.close(async (err) => {
34
+ const browser = await shutdownBrowser()
35
+ if (debug) {
36
+ console.log(browser)
37
+ console.log('server closed')
38
+ }
39
+ process.exit(err ? 1 : 0)
40
+ })
41
+ return { status: 'shutdown' }
42
+ }
43
+
44
+ app.get('/server', (_req, res) => {
45
+ try {
46
+ res.send({ running: true })
47
+ } catch (error) {
48
+ res.send(error)
49
+ }
50
+ })
51
+
52
+ app.post('/browser/launch', async (_req, res) => {
53
+ try {
54
+ const browser = await launchBrowser()
55
+ res.send({ browser })
56
+ } catch (error) {
57
+ res.send(error)
58
+ }
59
+ })
60
+
61
+ app.post('/browser/open', async (req, res) => {
62
+ try {
63
+ const { url } = req?.body ?? {}
64
+ const browser = await openBrowser(url)
65
+ res.send({ browser })
66
+ } catch (error) {
67
+ res.send(error)
68
+ }
69
+ })
70
+
71
+ app.get('/cookie', async (_req, res) => {
72
+ try {
73
+ const cookie = await getZillowCookie()
74
+ res.send({ cookie })
75
+ } catch (error) {
76
+ res.send(error)
77
+ }
78
+ })
79
+
80
+ app.get('/cookie/save', async (_req, res) => {
81
+ try {
82
+ const cookie = await saveZillowCookie()
83
+ res.send({ cookie })
84
+ } catch (error) {
85
+ res.send(error)
86
+ }
87
+ })
88
+
89
+ app.post('/browser/close', async (_req, res) => {
90
+ try {
91
+ const browser = await closeBrowser()
92
+ res.send({ browser })
93
+ } catch (error) {
94
+ res.send(error)
95
+ }
96
+ })
97
+
98
+ app.post('/server/shutdown', (_req, res) => {
99
+ try {
100
+ const server = shutdownServer()
101
+ res.send({ server })
102
+ } catch (error) {
103
+ res.send(error)
104
+ }
105
+ })
106
+
107
+ process.on('SIGINT', shutdownServer)
108
+ process.on('SIGTERM', shutdownServer)
109
+
110
+ return app
111
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,4 @@
1
+ {
2
+ "extends": "../../tsconfig.json",
3
+ "includes": "*/**.ts"
4
+ }