@browserless.io/browserless 2.0.0-beta-7 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +10 -6
  2. package/bin/browserless.js +4 -4
  3. package/bin/scaffold/README.md +7 -7
  4. package/bin/scaffold/tsconfig.json +1 -1
  5. package/browser.json +3 -3
  6. package/build/browsers/cdp-chromium.js +1 -2
  7. package/build/browsers/index.d.ts +9 -0
  8. package/build/browsers/index.js +24 -0
  9. package/build/browsers/playwright-chromium.js +1 -2
  10. package/build/browsers/playwright-firefox.js +1 -2
  11. package/build/browsers/playwright-webkit.js +1 -2
  12. package/build/config.d.ts +9 -0
  13. package/build/config.js +15 -0
  14. package/build/data/selectors.json +1 -1
  15. package/build/file-system.spec.js +1 -1
  16. package/build/http.d.ts +1 -0
  17. package/build/http.js +1 -0
  18. package/build/routes/chromium/http/content-post.body.json +8 -8
  19. package/build/routes/chromium/http/json-version-get.d.ts +15 -0
  20. package/build/routes/chromium/http/json-version-get.js +30 -0
  21. package/build/routes/chromium/http/json-version-get.response.json +37 -0
  22. package/build/routes/chromium/http/pdf-post.body.json +8 -8
  23. package/build/routes/chromium/http/scrape-post.body.json +8 -8
  24. package/build/routes/chromium/http/screenshot-post.body.json +8 -8
  25. package/build/routes/chromium/tests/json-version.spec.d.ts +1 -0
  26. package/build/routes/chromium/tests/json-version.spec.js +37 -0
  27. package/build/utils.js +1 -1
  28. package/package.json +10 -11
  29. package/src/browsers/cdp-chromium.ts +2 -3
  30. package/src/browsers/index.ts +40 -2
  31. package/src/browsers/playwright-chromium.ts +2 -3
  32. package/src/browsers/playwright-firefox.ts +2 -3
  33. package/src/browsers/playwright-webkit.ts +2 -3
  34. package/src/config.ts +17 -0
  35. package/src/file-system.spec.ts +1 -1
  36. package/src/http.ts +1 -0
  37. package/src/routes/chromium/http/json-version-get.ts +55 -0
  38. package/src/routes/chromium/tests/json-version.spec.ts +52 -0
  39. package/src/utils.ts +1 -1
  40. package/static/docs/swagger.json +99 -10
package/README.md CHANGED
@@ -8,8 +8,7 @@
8
8
  ![Multi CI](https://github.com/browserless/chrome/actions/workflows/docker-multi.yml/badge.svg)
9
9
 
10
10
 
11
- > [Looking for v2.x.x of browserless? Check it out here](https://github.com/browserless/browserless).
12
- > NOTE: Version 1 is the version we currently still have running on browserless' hosted services, we would recommend self-hosted users migrate to v2.
11
+ > [Looking for v1.x.x of browserless? You can find it here](https://github.com/browserless/chrome/tree/v1), although we recommend migrating to v2.
13
12
 
14
13
  Browserless allows remote clients to connect and execute headless work, all inside of docker. It supports the standard, unforked Puppeteer and Playwright libraries, as well offering REST-based APIs for common actions like data collection, PDF generation and more.
15
14
 
@@ -24,14 +23,15 @@ If you've been struggling to deploy headless browsers without running into issue
24
23
  4. [Hosting](#hosting-providers)
25
24
  5. [Puppeteer](#puppeteer)
26
25
  6. [Playwright](#playwright)
27
- 7. [Licensing](#licensing)
28
- 8. [Changelog](https://github.com/browserless/chrome/blob/master/CHANGELOG.md)
26
+ 7. [Extending with NodeJS SDK](#extending-nodejs-skd)
27
+ 8. [Licensing](#licensing)
28
+ 9. [Changelog](https://github.com/browserless/chrome/blob/master/CHANGELOG.md)
29
29
 
30
30
  ## External links
31
31
 
32
32
  1. [Full documentation site](https://www.browserless.io/docs/start)
33
33
  2. [Live Debugger (using browserless.io)](https://chrome.browserless.io/)
34
- 3. [Docker](https://github.com/browserless/chrome/pkgs/container/basic)
34
+ 3. [Docker](https://github.com/browserless/browserless/pkgs/container/base)
35
35
  4. [Slack](https://join.slack.com/t/browserless/shared_invite/enQtMzA3OTMwNjA3MzY1LTRmMWU5NjQ0MTQ2YTE2YmU3MzdjNmVlMmU4MThjM2UxODNmNzNlZjVkY2U2NjdkMzYyNTgyZTBiMmE3Nzg0MzY)
36
36
 
37
37
  # Features
@@ -57,7 +57,7 @@ You still execute the script itself which gives you total control over what libr
57
57
 
58
58
  1. `docker run -p 3000:3000 ghcr.io/browserless/chrome`
59
59
  2. Visit `http://localhost:3000/docs` to see the documentation site.
60
- 3. See more at our [docker package](https://github.com/browserless/chrome/pkgs/container/basic).
60
+ 3. See more at our [docker package](https://github.com/browserless/browserless/pkgs/container/base).
61
61
 
62
62
  # Hosting Providers
63
63
 
@@ -102,6 +102,10 @@ const browser = await pw.chromium.connectOverCDP('ws://localhost:3000');
102
102
 
103
103
  After that, the rest of your code remains the same with no other changes required.
104
104
 
105
+ # Extending (NodeJS SKD)
106
+
107
+ Browserless comes with built-in extension capabilities, and allows for extending nearly any aspect of the system (for Version 2+). For more details on how to write your own routes, build docker images, and more, [see our SDK README.md](/bin/scaffold/README.md) or simply run "npx @browserless.io/browserless create" in a terminal and follow the onscreen prompts.
108
+
105
109
  # Usage with other libraries
106
110
 
107
111
  Most libraries allow you to specify a remote instance of Chrome to interact with. They are either looking for a websocket endpoint, a host and port, or some address. Browserless supports these by default, however if you're having issues please make an issue in this project and we'll try and work with the library authors to get them integrated with browserless. Please note that in V2 we no longer support selenium or webdriver integrations.
@@ -1,8 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
  /* eslint-disable no-undef */
3
3
  'use strict';
4
- process.env.DEBUG = process.env.DEBUG || 'browserless*';
5
-
6
4
  import { readFile, writeFile } from 'fs/promises';
7
5
  import { Browserless } from '@browserless.io/browserless';
8
6
  import buildOpenAPI from '../scripts/build-open-api.js';
@@ -16,8 +14,10 @@ import fs from 'fs/promises';
16
14
  import path from 'path';
17
15
  import { spawn } from 'child_process';
18
16
 
19
- const log = debug('browserless:sdk:log');
20
- const promptLog = debug('browserless:prompt');
17
+ debug.enable('browserless*');
18
+
19
+ const log = debug('browserless.io:sdk:log');
20
+ const promptLog = debug('browserless.io:prompt');
21
21
 
22
22
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
23
23
  const cmd = process.argv[2];
@@ -106,16 +106,16 @@ By default most commands are non-interactive, such as the `build` and `dev` comm
106
106
 
107
107
  ## Routing
108
108
 
109
- Routing is, simply, a plain-old JavaScript object with certain expected properties. Many of the features of Browserless are exposed as options on routes so you can define many types of functionality with just a route definition.
109
+ Routing is based upon the JavaScript `class` fundamentals, and extends core classes inside of Browserless. Many of the features of Browserless are exposed as options on routes so you can define many types of functionality with just a simple route definition.
110
110
 
111
- Browserless has 4 different types of routes:
111
+ Browserless has 4 different types of primitive routes:
112
112
 
113
- - HTTP Routes that don't need a browser to run.
114
- - HTTP Routes that do need a browser to run.
115
- - WebSocket routes that don't a browser.
116
- - WebSockets that need a browser.
113
+ - HTTP Routes.
114
+ - HTTP Routes that require a browser.
115
+ - WebSockets Routes.
116
+ - WebSocket Routes require a browser.
117
117
 
118
- We use this same semantic in our own codebase, so feel free to see how those work in our open-source projects. All routes are TypeScript and all our modules are documented, so you should be able to effectively write routes and modules with your code editor and not necessarily need these examples open. Below are a few examples:
118
+ Internally, we use this same class-based system, so feel free to see how those work in our open-source repositories. All routes are TypeScript-based and all our modules are documented, so you should be able to effectively write routes and modules with your code editor and not necessarily need these examples open. Below are a few examples:
119
119
 
120
120
  ### Basic HTTP Route
121
121
  ```ts
@@ -1,4 +1,4 @@
1
1
  {
2
2
  "extends": "./node_modules/@browserless.io/browserless/tsconfig.json",
3
- "include": ["./src/**/*"]
3
+ "include": ["./src/**/*"],
4
4
  }
package/browser.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
- "Browser": "HeadlessChrome/120.0.6099.28",
2
+ "Browser": "HeadlessChrome/121.0.6167.57",
3
3
  "Protocol-Version": "1.3",
4
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36",
5
- "WebKit-Version": "537.36 (@f4095e9665f7d7a2531edefcea119d45d899d95b)",
4
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/121.0.6167.57 Safari/537.36",
5
+ "WebKit-Version": "537.36 (@add6d6ffbc3a1c7e78cc15e6ba2dcb15208bedd5)",
6
6
  "webSocketDebuggerUrl": "ws://localhost:3000"
7
7
  }
@@ -229,11 +229,10 @@ export class CDPChromium extends EventEmitter {
229
229
  if (!this.browserWSEndpoint) {
230
230
  return null;
231
231
  }
232
+ const serverURL = new URL(this.config.getExternalWebSocketAddress());
232
233
  const wsURL = new URL(this.browserWSEndpoint);
233
- const serverURL = new URL(this.config.getExternalAddress());
234
234
  wsURL.hostname = serverURL.hostname;
235
235
  wsURL.port = serverURL.port;
236
- wsURL.protocol = serverURL.protocol === 'https' ? 'wss' : 'ws';
237
236
  if (token) {
238
237
  wsURL.searchParams.set('token', token);
239
238
  }
@@ -18,6 +18,15 @@ export declare class BrowserManager {
18
18
  * @returns Promise<string> of the fully-qualified path of the directory
19
19
  */
20
20
  protected generateDataDir: (sessionId?: string) => Promise<string>;
21
+ getVersionJSON: () => Promise<{
22
+ Browser: string;
23
+ 'Debugger-Version': string;
24
+ 'Protocol-Version': string;
25
+ 'User-Agent': string;
26
+ 'V8-Version': string;
27
+ 'WebKit-Version': string;
28
+ webSocketDebuggerUrl: string;
29
+ }>;
21
30
  private generateSessionJson;
22
31
  close: (browser: BrowserInstance, session: BrowserlessSession) => Promise<void>;
23
32
  getAllSessions: () => Promise<BrowserlessSessionJSON[]>;
@@ -41,6 +41,30 @@ export class BrowserManager {
41
41
  });
42
42
  return dataDirPath;
43
43
  };
44
+ getVersionJSON = async () => {
45
+ const browser = new CDPChromium({
46
+ blockAds: false,
47
+ config: this.config,
48
+ record: false,
49
+ userDataDir: null,
50
+ });
51
+ await browser.launch();
52
+ const wsEndpoint = browser.wsEndpoint();
53
+ if (!wsEndpoint) {
54
+ throw new Error('There was an error launching the browser');
55
+ }
56
+ const { port } = new URL(wsEndpoint);
57
+ const res = await fetch(`http://127.0.0.1:${port}/json/version`);
58
+ const meta = await res.json();
59
+ browser.close();
60
+ const { 'WebKit-Version': webkitVersion } = meta;
61
+ const debuggerVersion = webkitVersion.match(/\s\(@(\b[0-9a-f]{5,40}\b)/)[1];
62
+ return {
63
+ ...meta,
64
+ 'Debugger-Version': debuggerVersion,
65
+ webSocketDebuggerUrl: this.config.getExternalWebSocketAddress(),
66
+ };
67
+ };
44
68
  generateSessionJson = async (browser, session) => {
45
69
  const serverAddress = this.config.getExternalAddress();
46
70
  const sessions = [
@@ -72,11 +72,10 @@ export class PlaywrightChromium extends EventEmitter {
72
72
  if (!this.browserWSEndpoint) {
73
73
  return null;
74
74
  }
75
+ const serverURL = new URL(this.config.getExternalWebSocketAddress());
75
76
  const wsURL = new URL(this.browserWSEndpoint);
76
- const serverURL = new URL(this.config.getExternalAddress());
77
77
  wsURL.hostname = serverURL.hostname;
78
78
  wsURL.port = serverURL.port;
79
- wsURL.protocol = serverURL.protocol === 'https' ? 'wss' : 'ws';
80
79
  if (token) {
81
80
  wsURL.searchParams.set('token', token);
82
81
  }
@@ -67,11 +67,10 @@ export class PlaywrightFirefox extends EventEmitter {
67
67
  if (!this.browserWSEndpoint) {
68
68
  return null;
69
69
  }
70
+ const serverURL = new URL(this.config.getExternalWebSocketAddress());
70
71
  const wsURL = new URL(this.browserWSEndpoint);
71
- const serverURL = new URL(this.config.getExternalAddress());
72
72
  wsURL.hostname = serverURL.hostname;
73
73
  wsURL.port = serverURL.port;
74
- wsURL.protocol = serverURL.protocol === 'https' ? 'wss' : 'ws';
75
74
  if (token) {
76
75
  wsURL.searchParams.set('token', token);
77
76
  }
@@ -67,11 +67,10 @@ export class PlaywrightWebkit extends EventEmitter {
67
67
  if (!this.browserWSEndpoint) {
68
68
  return null;
69
69
  }
70
+ const serverURL = new URL(this.config.getExternalWebSocketAddress());
70
71
  const wsURL = new URL(this.browserWSEndpoint);
71
- const serverURL = new URL(this.config.getExternalAddress());
72
72
  wsURL.hostname = serverURL.hostname;
73
73
  wsURL.port = serverURL.port;
74
- wsURL.protocol = serverURL.protocol === 'https' ? 'wss' : 'ws';
75
74
  if (token) {
76
75
  wsURL.searchParams.set('token', token);
77
76
  }
package/build/config.d.ts CHANGED
@@ -124,6 +124,15 @@ export declare class Config extends EventEmitter {
124
124
  * @returns {string} The URL to reach the server
125
125
  */
126
126
  getExternalAddress: () => string;
127
+ /**
128
+ * Returns the the fully-qualified WebSocket URL for the
129
+ * external address that browserless might be
130
+ * running behind *or* the server address if
131
+ * no external URL is provided.
132
+ *
133
+ * @returns {string} The URL to reach the server
134
+ */
135
+ getExternalWebSocketAddress: () => string;
127
136
  /**
128
137
  * When CORS is enabled, returns relevant CORS headers
129
138
  * to requests and for the OPTIONS call. Values can be
package/build/config.js CHANGED
@@ -335,6 +335,21 @@ export class Config extends EventEmitter {
335
335
  * @returns {string} The URL to reach the server
336
336
  */
337
337
  getExternalAddress = () => this.external ?? this.getServerAddress();
338
+ /**
339
+ * Returns the the fully-qualified WebSocket URL for the
340
+ * external address that browserless might be
341
+ * running behind *or* the server address if
342
+ * no external URL is provided.
343
+ *
344
+ * @returns {string} The URL to reach the server
345
+ */
346
+ getExternalWebSocketAddress = () => {
347
+ const httpAddress = new URL(this.external ?? this.getServerAddress());
348
+ httpAddress.protocol = httpAddress.protocol.startsWith('https')
349
+ ? 'wss:'
350
+ : 'ws:';
351
+ return httpAddress.href;
352
+ };
338
353
  /**
339
354
  * When CORS is enabled, returns relevant CORS headers
340
355
  * to requests and for the OPTIONS call. Values can be