@browserless.io/browserless 2.1.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/CHANGELOG.md +19 -2
  2. package/bin/browserless.js +49 -13
  3. package/build/browserless.js +9 -2
  4. package/build/browsers/cdp-chromium.d.ts +1 -1
  5. package/build/browsers/cdp-chromium.js +3 -3
  6. package/build/browsers/index.d.ts +1 -0
  7. package/build/browsers/index.js +20 -0
  8. package/build/browsers/playwright-chromium.d.ts +1 -1
  9. package/build/browsers/playwright-firefox.d.ts +1 -1
  10. package/build/browsers/playwright-webkit.d.ts +1 -1
  11. package/build/constants.d.ts +1 -0
  12. package/build/constants.js +1 -0
  13. package/build/data/classes.json +1 -1
  14. package/build/data/selectors.json +1 -1
  15. package/build/http.d.ts +3 -0
  16. package/build/http.js +3 -0
  17. package/build/routes/chromium/http/content-post.body.json +8 -8
  18. package/build/routes/chromium/http/json-list.d.ts +15 -0
  19. package/build/routes/chromium/http/json-list.js +23 -0
  20. package/build/routes/chromium/http/json-list.response.json +52 -0
  21. package/build/routes/chromium/http/json-new.d.ts +15 -0
  22. package/build/routes/chromium/http/json-new.js +23 -0
  23. package/build/routes/chromium/http/json-new.response.json +44 -0
  24. package/build/routes/chromium/http/json-protocol-get.d.ts +15 -0
  25. package/build/routes/chromium/http/json-protocol-get.js +20 -0
  26. package/build/routes/chromium/http/json-protocol-get.response.json +6 -0
  27. package/build/routes/chromium/http/json-version-get.d.ts +1 -1
  28. package/build/routes/chromium/http/json-version-get.js +1 -1
  29. package/build/routes/chromium/http/pdf-post.body.json +12 -8
  30. package/build/routes/chromium/http/scrape-post.body.json +8 -8
  31. package/build/routes/chromium/http/screenshot-post.body.json +8 -8
  32. package/build/routes/chromium/utils/cdp.d.ts +2 -0
  33. package/build/routes/chromium/utils/cdp.js +14 -0
  34. package/build/types.d.ts +31 -0
  35. package/build/utils.d.ts +9 -0
  36. package/build/utils.js +17 -2
  37. package/package.json +11 -13
  38. package/src/browserless.ts +12 -1
  39. package/src/browsers/cdp-chromium.ts +5 -7
  40. package/src/browsers/index.ts +25 -0
  41. package/src/browsers/playwright-chromium.ts +1 -1
  42. package/src/browsers/playwright-firefox.ts +1 -1
  43. package/src/browsers/playwright-webkit.ts +1 -1
  44. package/src/constants.ts +1 -0
  45. package/src/http.ts +3 -0
  46. package/src/routes/chromium/http/json-list.ts +50 -0
  47. package/src/routes/chromium/http/json-new.ts +50 -0
  48. package/src/routes/chromium/http/json-protocol-get.ts +38 -0
  49. package/src/routes/chromium/http/json-version-get.ts +1 -1
  50. package/src/routes/chromium/utils/cdp.ts +19 -0
  51. package/src/types.ts +38 -0
  52. package/src/utils.ts +26 -4
  53. package/static/docs/swagger.json +315 -10
  54. package/static/function/client.js +2328 -1975
  55. package/browser.json +0 -7
  56. package/scripts/install-cdp-json.js +0 -37
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@browserless.io/browserless",
3
- "version": "2.1.0",
3
+ "version": "2.1.1",
4
4
  "license": "SSPL",
5
5
  "description": "The browserless platform",
6
6
  "author": "browserless.io",
@@ -25,8 +25,7 @@
25
25
  "dev": "npm run build:dev && env-cmd -f .env node build",
26
26
  "install:adblock": "node scripts/install-adblock.js",
27
27
  "install:browsers": "npx --yes playwright install chromium firefox webkit",
28
- "install:cdp-json": "node scripts/install-cdp-json.js",
29
- "install:dev": "npm run install:browsers && npm run install:cdp-json",
28
+ "install:dev": "npm run install:browsers",
30
29
  "lint": "eslint . --ext .ts --fix",
31
30
  "prepack": "npm run build:dev",
32
31
  "prettier": "prettier '{src,functions,scripts,bin,external,bin}/**/*.{js,ts,json}' --log-level error --write",
@@ -43,7 +42,6 @@
43
42
  "scripts/*",
44
43
  "src/*",
45
44
  "static/*",
46
- "browser.json",
47
45
  "CHANGELOG.md",
48
46
  "tsconfig.json"
49
47
  ],
@@ -57,8 +55,8 @@
57
55
  "http-proxy": "^1.18.1",
58
56
  "lighthouse": "^11.1.0",
59
57
  "micromatch": "^4.0.4",
60
- "playwright-core": "^1.41.1",
61
- "puppeteer-core": "^21.6.1",
58
+ "playwright-core": "^1.41.2",
59
+ "puppeteer-core": "^21.10.0",
62
60
  "puppeteer-extra": "^3.3.6",
63
61
  "puppeteer-extra-plugin-stealth": "^2.11.2",
64
62
  "queue": "^7.0.0",
@@ -71,24 +69,24 @@
71
69
  "@types/http-proxy": "^1.17.14",
72
70
  "@types/micromatch": "^4.0.6",
73
71
  "@types/mocha": "^10.0.6",
74
- "@types/node": "^20.11.5",
72
+ "@types/node": "^20.11.16",
75
73
  "@types/sinon": "^17.0.3",
76
- "@typescript-eslint/eslint-plugin": "^6.19.1",
77
- "@typescript-eslint/parser": "^6.19.1",
74
+ "@typescript-eslint/eslint-plugin": "^6.20.0",
75
+ "@typescript-eslint/parser": "^6.21.0",
78
76
  "assert": "^2.0.0",
79
- "chai": "^5.0.0",
77
+ "chai": "^5.0.3",
80
78
  "cross-env": "^7.0.3",
81
79
  "env-cmd": "^10.1.0",
82
- "esbuild": "^0.19.11",
80
+ "esbuild": "^0.20.0",
83
81
  "esbuild-plugin-polyfill-node": "^0.3.0",
84
82
  "eslint": "^8.56.0",
85
83
  "eslint-plugin-import": "^2.29.1",
86
84
  "eslint-plugin-typescript-sort-keys": "^3.1.0",
87
85
  "extract-zip": "^2.0.1",
88
- "marked": "^11.1.0",
86
+ "marked": "^12.0.0",
89
87
  "mocha": "^10.0.0",
90
88
  "move-file": "^3.1.0",
91
- "prettier": "^3.2.4",
89
+ "prettier": "^3.2.5",
92
90
  "sinon": "^17.0.1",
93
91
  "ts-node": "^10.9.2",
94
92
  "typescript": "^5.3.3",
@@ -3,6 +3,7 @@ import {
3
3
  BrowserHTTPRoute,
4
4
  BrowserManager,
5
5
  BrowserWebsocketRoute,
6
+ CDPChromium,
6
7
  Config,
7
8
  FileSystem,
8
9
  HTTPRoute,
@@ -11,6 +12,9 @@ import {
11
12
  Limiter,
12
13
  Metrics,
13
14
  Monitoring,
15
+ PlaywrightChromium,
16
+ PlaywrightFirefox,
17
+ PlaywrightWebkit,
14
18
  Router,
15
19
  Token,
16
20
  WebHooks,
@@ -158,6 +162,12 @@ export class Browserless {
158
162
  public async start() {
159
163
  const httpRoutes: Array<HTTPRoute | BrowserHTTPRoute> = [];
160
164
  const wsRoutes: Array<WebSocketRoute | BrowserWebsocketRoute> = [];
165
+ const internalBrowsers = [
166
+ CDPChromium,
167
+ PlaywrightFirefox,
168
+ PlaywrightChromium,
169
+ PlaywrightWebkit,
170
+ ];
161
171
 
162
172
  const [[httpRouteFiles, wsRouteFiles], installedBrowsers] =
163
173
  await Promise.all([getRouteFiles(this.config), availableBrowsers]);
@@ -254,11 +264,12 @@ export class Browserless {
254
264
  }
255
265
  }
256
266
 
257
- // Validate that browsers are installed and route paths are unique
267
+ // Validate that we have the browsers they are asking for
258
268
  [...httpRoutes, ...wsRoutes].forEach((route) => {
259
269
  if (
260
270
  'browser' in route &&
261
271
  route.browser &&
272
+ internalBrowsers.includes(route.browser) &&
262
273
  !installedBrowsers.some((b) => b.name === route.browser?.name)
263
274
  ) {
264
275
  throw new Error(
@@ -314,7 +314,7 @@ export class CDPChromium extends EventEmitter {
314
314
 
315
315
  public wsEndpoint = (): string | null => this.browserWSEndpoint;
316
316
 
317
- public publicWSEndpoint = (token: string): string | null => {
317
+ public publicWSEndpoint = (token: string | null): string | null => {
318
318
  if (!this.browserWSEndpoint) {
319
319
  return null;
320
320
  }
@@ -344,16 +344,14 @@ export class CDPChromium extends EventEmitter {
344
344
  }
345
345
  socket.once('close', resolve);
346
346
 
347
- this.debug(
348
- `Proxying ${req.parsed.href} to browser ${this.browserWSEndpoint}`,
349
- );
350
-
351
347
  const [page] = await this.browser.pages();
352
348
  const pageLocation = `/devtools/page/${this.getPageId(page)}`;
353
349
 
354
350
  this.debug(`Proxying ${req.parsed.href} to page "${pageLocation}"`);
355
351
 
356
- req.url = pageLocation;
352
+ const target = new URL(pageLocation, this.browserWSEndpoint).href;
353
+
354
+ req.url = '';
357
355
 
358
356
  this.proxy.ws(
359
357
  req,
@@ -361,7 +359,7 @@ export class CDPChromium extends EventEmitter {
361
359
  head,
362
360
  {
363
361
  changeOrigin: true,
364
- target: this.browserWSEndpoint,
362
+ target,
365
363
  },
366
364
  (error) => {
367
365
  this.debug(`Error proxying session: ${error}`);
@@ -82,6 +82,30 @@ export class BrowserManager {
82
82
  return dataDirPath;
83
83
  };
84
84
 
85
+ public getProtocolJSON = async (): Promise<object> => {
86
+ this.debug(`Launching Chrome to generate /json/protocol results`);
87
+ const browser = new CDPChromium({
88
+ blockAds: false,
89
+ config: this.config,
90
+ record: false,
91
+ userDataDir: null,
92
+ });
93
+ await browser.launch();
94
+ const wsEndpoint = browser.wsEndpoint();
95
+
96
+ if (!wsEndpoint) {
97
+ throw new Error('There was an error launching the browser');
98
+ }
99
+
100
+ const { port } = new URL(wsEndpoint);
101
+ const res = await fetch(`http://127.0.0.1:${port}/json/protocol`);
102
+ const protocolJSON = await res.json();
103
+
104
+ browser.close();
105
+
106
+ return protocolJSON;
107
+ };
108
+
85
109
  public getVersionJSON = async (): Promise<{
86
110
  Browser: string;
87
111
  'Debugger-Version': string;
@@ -91,6 +115,7 @@ export class BrowserManager {
91
115
  'WebKit-Version': string;
92
116
  webSocketDebuggerUrl: string;
93
117
  }> => {
118
+ this.debug(`Launching Chrome to generate /json/version results`);
94
119
  const browser = new CDPChromium({
95
120
  blockAds: false,
96
121
  config: this.config,
@@ -106,7 +106,7 @@ export class PlaywrightChromium extends EventEmitter {
106
106
 
107
107
  public wsEndpoint = (): string | null => this.browserWSEndpoint;
108
108
 
109
- public publicWSEndpoint = (token: string): string | null => {
109
+ public publicWSEndpoint = (token: string | null): string | null => {
110
110
  if (!this.browserWSEndpoint) {
111
111
  return null;
112
112
  }
@@ -99,7 +99,7 @@ export class PlaywrightFirefox extends EventEmitter {
99
99
 
100
100
  public wsEndpoint = (): string | null => this.browserWSEndpoint;
101
101
 
102
- public publicWSEndpoint = (token: string): string | null => {
102
+ public publicWSEndpoint = (token: string | null): string | null => {
103
103
  if (!this.browserWSEndpoint) {
104
104
  return null;
105
105
  }
@@ -99,7 +99,7 @@ export class PlaywrightWebkit extends EventEmitter {
99
99
 
100
100
  public wsEndpoint = (): string | null => this.browserWSEndpoint;
101
101
 
102
- public publicWSEndpoint = (token: string): string | null => {
102
+ public publicWSEndpoint = (token: string | null): string | null => {
103
103
  if (!this.browserWSEndpoint) {
104
104
  return null;
105
105
  }
package/src/constants.ts CHANGED
@@ -2,3 +2,4 @@ export const encryptionAlgo = 'aes-192-cbc';
2
2
  export const encryptionSep = '.';
3
3
  export const liveURLSep = ':';
4
4
  export const keyLength = 24;
5
+ export const BLESS_PAGE_IDENTIFIER = 'BLESS';
package/src/http.ts CHANGED
@@ -92,6 +92,9 @@ export enum HTTPRoutes {
92
92
  content = '/content',
93
93
  download = '/download',
94
94
  function = '/function',
95
+ jsonList = '/json/list',
96
+ jsonNew = '/json/new',
97
+ jsonProtocol = '/json/protocol',
95
98
  jsonVersion = '/json/version',
96
99
  pdf = '/pdf',
97
100
  performance = '/performance',
@@ -0,0 +1,50 @@
1
+ import {
2
+ APITags,
3
+ HTTPRoute,
4
+ HTTPRoutes,
5
+ Methods,
6
+ Request,
7
+ Response,
8
+ contentTypes,
9
+ dedent,
10
+ jsonResponse,
11
+ } from '@browserless.io/browserless';
12
+ import { getCDPJSONPayload } from '../utils/cdp.js';
13
+
14
+ /*
15
+ Example Payload from Chrome:
16
+ [{
17
+ "description": "",
18
+ "devtoolsFrontendUrl": "/devtools/inspector.html?ws=localhost:9222/devtools/page/6CA38A3E207BA534C674D1057B19E9CC",
19
+ "id": "6CA38A3E207BA534C674D1057B19E9CC",
20
+ "title": "New Tab",
21
+ "type": "page",
22
+ "url": "http://localhost:9222/json/list",
23
+ "webSocketDebuggerUrl": "ws://localhost:9222/devtools/page/6CA38A3E207BA534C674D1057B19E9CC"
24
+ }]
25
+ */
26
+ export type ResponseSchema = Array<ReturnType<typeof getCDPJSONPayload>>;
27
+
28
+ export default class GetJSONList extends HTTPRoute {
29
+ accepts = [contentTypes.any];
30
+ auth = true;
31
+ browser = null;
32
+ concurrency = false;
33
+ contentTypes = [contentTypes.json];
34
+ description = dedent(`
35
+ Returns a JSON payload that acts as a pass-through to the DevTools /json/list HTTP API in Chromium.
36
+ Browserless mocks this payload so that remote clients can connect to the underlying "webSocketDebuggerUrl"
37
+ which will cause Browserless to start the browser and proxy that request into a blank page.
38
+ `);
39
+ method = Methods.get;
40
+ path = HTTPRoutes.jsonList;
41
+ tags = [APITags.browserAPI];
42
+
43
+ handler = async (_req: Request, res: Response): Promise<void> => {
44
+ const config = this.config();
45
+ const externalAddress = config.getExternalAddress();
46
+ const payload = getCDPJSONPayload(externalAddress);
47
+
48
+ return jsonResponse(res, 200, [payload] as ResponseSchema);
49
+ };
50
+ }
@@ -0,0 +1,50 @@
1
+ import {
2
+ APITags,
3
+ HTTPRoute,
4
+ HTTPRoutes,
5
+ Methods,
6
+ Request,
7
+ Response,
8
+ contentTypes,
9
+ dedent,
10
+ jsonResponse,
11
+ } from '@browserless.io/browserless';
12
+ import { getCDPJSONPayload } from '../utils/cdp.js';
13
+
14
+ /*
15
+ Example Payload from Chrome:
16
+ {
17
+ "description": "",
18
+ "devtoolsFrontendUrl": "/devtools/inspector.html?ws=localhost:9222/devtools/page/2F76525C32A916DF30C4F37A4970B8BF",
19
+ "id": "2F76525C32A916DF30C4F37A4970B8BF",
20
+ "title": "",
21
+ "type": "page",
22
+ "url": "about:blank",
23
+ "webSocketDebuggerUrl": "ws://localhost:9222/devtools/page/2F76525C32A916DF30C4F37A4970B8BF"
24
+ }
25
+ */
26
+ export type ResponseSchema = ReturnType<typeof getCDPJSONPayload>;
27
+
28
+ export default class GetJSONList extends HTTPRoute {
29
+ accepts = [contentTypes.any];
30
+ auth = true;
31
+ browser = null;
32
+ concurrency = false;
33
+ contentTypes = [contentTypes.json];
34
+ description = dedent(`
35
+ Returns a JSON payload that acts as a pass-through to the DevTools /json/list HTTP API in Chromium.
36
+ Browserless mocks this payload so that remote clients can connect to the underlying "webSocketDebuggerUrl"
37
+ which will cause Browserless to start the browser and proxy that request into a blank page.
38
+ `);
39
+ method = Methods.put;
40
+ path = HTTPRoutes.jsonNew;
41
+ tags = [APITags.browserAPI];
42
+
43
+ handler = async (_req: Request, res: Response): Promise<void> => {
44
+ const config = this.config();
45
+ const externalAddress = config.getExternalAddress();
46
+ const payload = getCDPJSONPayload(externalAddress);
47
+
48
+ return jsonResponse(res, 200, payload);
49
+ };
50
+ }
@@ -0,0 +1,38 @@
1
+ import {
2
+ APITags,
3
+ HTTPRoute,
4
+ HTTPRoutes,
5
+ Methods,
6
+ Request,
7
+ Response,
8
+ contentTypes,
9
+ jsonResponse,
10
+ } from '@browserless.io/browserless';
11
+
12
+ // @TODO Figure out how to parse the Protocol JSON into a TS definition
13
+ // for our openapi docs.
14
+ export type ResponseSchema = object;
15
+
16
+ export default class GetJSONVersion extends HTTPRoute {
17
+ accepts = [contentTypes.any];
18
+ auth = true;
19
+ browser = null;
20
+ concurrency = false;
21
+ contentTypes = [contentTypes.json];
22
+ description = `Returns Protocol JSON meta-data that Chrome comes with.`;
23
+ method = Methods.get;
24
+ path = HTTPRoutes.jsonProtocol;
25
+ tags = [APITags.browserAPI];
26
+
27
+ private cachedProtocol: object | undefined;
28
+
29
+ handler = async (_req: Request, res: Response): Promise<void> => {
30
+ const browserManager = this.browserManager();
31
+
32
+ if (!this.cachedProtocol) {
33
+ this.cachedProtocol = await browserManager.getProtocolJSON();
34
+ }
35
+
36
+ return jsonResponse(res, 200, this.cachedProtocol);
37
+ };
38
+ }
@@ -1,8 +1,8 @@
1
1
  import {
2
2
  APITags,
3
3
  BrowserManager,
4
- HTTPRoutes,
5
4
  HTTPRoute,
5
+ HTTPRoutes,
6
6
  Methods,
7
7
  Request,
8
8
  Response,
@@ -0,0 +1,19 @@
1
+ import { CDPJSONPayload, pageID } from '@browserless.io/browserless';
2
+
3
+ export const getCDPJSONPayload = (externalAddress: string): CDPJSONPayload => {
4
+ const id = pageID();
5
+ const { protocol, host, pathname, href } = new URL(
6
+ `/devtools/page/${id}`,
7
+ externalAddress,
8
+ );
9
+
10
+ return {
11
+ description: '',
12
+ devtoolsFrontendUrl: `/devtools/inspector.html?${protocol.replace(':', '')}=${host}${pathname}`,
13
+ id,
14
+ title: 'New Tab',
15
+ type: 'page',
16
+ url: 'about:blank',
17
+ webSocketDebuggerUrl: href,
18
+ };
19
+ };
package/src/types.ts CHANGED
@@ -556,3 +556,41 @@ export interface IBrowserlessStats {
556
556
  unhealthy: number;
557
557
  units: number;
558
558
  }
559
+
560
+ export interface CDPJSONPayload {
561
+ /**
562
+ * The description of the target. Generally the page's title.
563
+ */
564
+ description: string;
565
+
566
+ /**
567
+ * The fully-qualified URL of the Devtools inspector app.
568
+ */
569
+ devtoolsFrontendUrl: string;
570
+
571
+ /**
572
+ * A Unique Id for the underlying target.
573
+ */
574
+ id: string;
575
+
576
+ /**
577
+ * The title of the target. For pages this is the page's title.
578
+ */
579
+ title: string;
580
+
581
+ /**
582
+ * The type of target, generally "page" or "background_page".
583
+ */
584
+ type: string;
585
+
586
+ /**
587
+ * The current URL the target is consuming or visiting.
588
+ */
589
+ url: string;
590
+
591
+ /**
592
+ * The target or page's WebSocket Debugger URL. Primarily used for legacy
593
+ * libraries to connect and inspect or remote automate this target.
594
+ */
595
+ webSocketDebuggerUrl: string;
596
+ }
package/src/utils.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import * as fs from 'fs/promises';
2
2
  import {
3
+ BLESS_PAGE_IDENTIFIER,
3
4
  CDPChromium,
4
5
  Config,
5
6
  PlaywrightChromium,
@@ -52,6 +53,23 @@ export const jsExtension = '.js';
52
53
 
53
54
  export const id = (): string => crypto.randomUUID();
54
55
 
56
+ /**
57
+ * Generates a random, Chrome-compliant page ID with "BLESS"
58
+ * prepended. This prepended text signals to other parts of the
59
+ * system that this is a Browserless-created ID so it can be appropriately
60
+ * handled.
61
+ *
62
+ * @returns {string} A random Page ID
63
+ */
64
+ export const pageID = (): string => {
65
+ const chars = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ';
66
+ const id = Array.from({ length: 32 - BLESS_PAGE_IDENTIFIER.length })
67
+ .map(() => chars[Math.floor(Math.random() * chars.length)])
68
+ .join('');
69
+
70
+ return `${BLESS_PAGE_IDENTIFIER}${id}`;
71
+ };
72
+
55
73
  export const createLogger = (domain: string): debug.Debugger => {
56
74
  return debug(`browserless.io:${domain}`);
57
75
  };
@@ -224,10 +242,14 @@ export const removeNullStringify = (
224
242
  json: unknown,
225
243
  allowNull = true,
226
244
  ): string => {
227
- return JSON.stringify(json, (_key, value) => {
228
- if (allowNull) return value;
229
- if (value !== null) return value;
230
- });
245
+ return JSON.stringify(
246
+ json,
247
+ (_key, value) => {
248
+ if (allowNull) return value;
249
+ if (value !== null) return value;
250
+ },
251
+ ' ',
252
+ );
231
253
  };
232
254
 
233
255
  export const jsonOrString = (maybeJson: string): unknown | string =>