@crawlee/playwright 3.0.0-alpha.2 → 3.0.0-alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/LICENSE.md +201 -0
- package/{README.md → dist/README.md} +0 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +73 -0
- package/dist/internals/playwright-crawler.d.ts +223 -0
- package/dist/internals/playwright-crawler.d.ts.map +1 -0
- package/dist/internals/playwright-crawler.js +108 -0
- package/dist/internals/playwright-crawler.js.map +1 -0
- package/dist/internals/playwright-launcher.d.ts +102 -0
- package/dist/internals/playwright-launcher.d.ts.map +1 -0
- package/dist/internals/playwright-launcher.js +97 -0
- package/dist/internals/playwright-launcher.js.map +1 -0
- package/dist/internals/utils/playwright-utils.d.ts +57 -0
- package/dist/internals/utils/playwright-utils.d.ts.map +1 -0
- package/dist/internals/utils/playwright-utils.js +84 -0
- package/dist/internals/utils/playwright-utils.js.map +1 -0
- package/dist/package.json +65 -0
- package/dist/tsconfig.build.tsbuildinfo +1 -0
- package/package.json +8 -7
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { Browser, BrowserType, LaunchOptions } from 'playwright';
|
|
2
|
+
import { PlaywrightPlugin } from '@crawlee/browser-pool';
|
|
3
|
+
import { BrowserLaunchContext, BrowserLauncher } from '@crawlee/browser';
|
|
4
|
+
/**
|
|
5
|
+
* Apify extends the launch options of Playwright.
|
|
6
|
+
* You can use any of the Playwright compatible
|
|
7
|
+
* [`LaunchOptions`](https://playwright.dev/docs/api/class-browsertype#browsertypelaunchoptions)
|
|
8
|
+
* options by providing the `launchOptions` property.
|
|
9
|
+
*
|
|
10
|
+
* **Example:**
|
|
11
|
+
* ```js
|
|
12
|
+
* // launch a headless Chrome (not Chromium)
|
|
13
|
+
* const launchContext = {
|
|
14
|
+
* // Apify helpers
|
|
15
|
+
* useChrome: true,
|
|
16
|
+
* proxyUrl: 'http://user:password@some.proxy.com'
|
|
17
|
+
* // Native Playwright options
|
|
18
|
+
* launchOptions: {
|
|
19
|
+
* headless: true,
|
|
20
|
+
* args: ['--some-flag'],
|
|
21
|
+
* }
|
|
22
|
+
* }
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
export interface PlaywrightLaunchContext extends BrowserLaunchContext<LaunchOptions, BrowserType> {
|
|
26
|
+
/** `browserType.launch` [options](https://playwright.dev/docs/api/class-browsertype?_highlight=launch#browsertypelaunchoptions) */
|
|
27
|
+
launchOptions?: LaunchOptions;
|
|
28
|
+
/**
|
|
29
|
+
* URL to a HTTP proxy server. It must define the port number,
|
|
30
|
+
* and it may also contain proxy username and password.
|
|
31
|
+
*
|
|
32
|
+
* Example: `http://bob:pass123@proxy.example.com:1234`.
|
|
33
|
+
*/
|
|
34
|
+
proxyUrl?: string;
|
|
35
|
+
/**
|
|
36
|
+
* If `true` and `executablePath` is not set,
|
|
37
|
+
* Playwright will launch full Google Chrome browser available on the machine
|
|
38
|
+
* rather than the bundled Chromium. The path to Chrome executable
|
|
39
|
+
* is taken from the `APIFY_CHROME_EXECUTABLE_PATH` environment variable if provided,
|
|
40
|
+
* or defaults to the typical Google Chrome executable location specific for the operating system.
|
|
41
|
+
* By default, this option is `false`.
|
|
42
|
+
* @default false
|
|
43
|
+
*/
|
|
44
|
+
useChrome?: boolean;
|
|
45
|
+
/**
|
|
46
|
+
* By default this function uses `require("playwright").chromium`.
|
|
47
|
+
* If you want to use a different browser you can pass it by this property as e.g. `require("playwright").firefox`
|
|
48
|
+
*/
|
|
49
|
+
launcher?: BrowserType;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* `PlaywrightLauncher` is based on the `BrowserLauncher`. It launches `playwright` browser instance.
|
|
53
|
+
* @ignore
|
|
54
|
+
*/
|
|
55
|
+
export declare class PlaywrightLauncher extends BrowserLauncher<PlaywrightPlugin> {
|
|
56
|
+
protected static optionsShape: {
|
|
57
|
+
launcher: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
58
|
+
proxyUrl: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
|
|
59
|
+
useChrome: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
|
|
60
|
+
useIncognitoPages: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
|
|
61
|
+
userDataDir: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
|
|
62
|
+
launchOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
63
|
+
};
|
|
64
|
+
/**
|
|
65
|
+
* All `PlaywrightLauncher` parameters are passed via this launchContext object.
|
|
66
|
+
*/
|
|
67
|
+
constructor(launchContext?: PlaywrightLaunchContext);
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Launches headless browsers using Playwright pre-configured to work within the Apify platform.
|
|
71
|
+
* The function has the same return value as `browserType.launch()`.
|
|
72
|
+
* See [Playwright documentation](https://playwright.dev/docs/api/class-browsertype) for more details.
|
|
73
|
+
*
|
|
74
|
+
* The `launchPlaywright()` function alters the following Playwright options:
|
|
75
|
+
*
|
|
76
|
+
* - Passes the setting from the `APIFY_HEADLESS` environment variable to the `headless` option,
|
|
77
|
+
* unless it was already defined by the caller or `APIFY_XVFB` environment variable is set to `1`.
|
|
78
|
+
* Note that Apify Actor cloud platform automatically sets `APIFY_HEADLESS=1` to all running actors.
|
|
79
|
+
* - Takes the `proxyUrl` option, validates it and adds it to `launchOptions` in a proper format.
|
|
80
|
+
* The proxy URL must define a port number and have one of the following schemes: `http://`,
|
|
81
|
+
* `https://`, `socks4://` or `socks5://`.
|
|
82
|
+
* If the proxy is HTTP (i.e. has the `http://` scheme) and contains username or password,
|
|
83
|
+
* the `launchPlaywright` functions sets up an anonymous proxy HTTP
|
|
84
|
+
* to make the proxy work with headless Chrome. For more information, read the
|
|
85
|
+
* [blog post about proxy-chain library](https://blog.apify.com/how-to-make-headless-chrome-and-puppeteer-use-a-proxy-server-with-authentication-249a21a79212).
|
|
86
|
+
*
|
|
87
|
+
* To use this function, you need to have the [Playwright](https://www.npmjs.com/package/playwright)
|
|
88
|
+
* NPM package installed in your project.
|
|
89
|
+
* When running on the Apify Platform, you can achieve that simply
|
|
90
|
+
* by using the `apify/actor-node-playwright-*` base Docker image for your actor - see
|
|
91
|
+
* [Apify Actor documentation](https://docs.apify.com/actor/build#base-images)
|
|
92
|
+
* for details.
|
|
93
|
+
*
|
|
94
|
+
* @param [launchContext]
|
|
95
|
+
* Optional settings passed to `browserType.launch()`. In addition to
|
|
96
|
+
* [Playwright's options](https://playwright.dev/docs/api/class-browsertype?_highlight=launch#browsertypelaunchoptions)
|
|
97
|
+
* the object may contain our own {@link PlaywrightLaunchContext} that enable additional features.
|
|
98
|
+
* @returns
|
|
99
|
+
* Promise that resolves to Playwright's `Browser` instance.
|
|
100
|
+
*/
|
|
101
|
+
export declare function launchPlaywright(launchContext?: PlaywrightLaunchContext): Promise<Browser>;
|
|
102
|
+
//# sourceMappingURL=playwright-launcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright-launcher.d.ts","sourceRoot":"","sources":["../../src/internals/playwright-launcher.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AACjE,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,EAAE,oBAAoB,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAEzE;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,MAAM,WAAW,uBAAwB,SAAQ,oBAAoB,CAAC,aAAa,EAAE,WAAW,CAAC;IAC7F,mIAAmI;IACnI,aAAa,CAAC,EAAE,aAAa,CAAC;IAE9B;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB;;;OAGG;IACH,QAAQ,CAAC,EAAE,WAAW,CAAC;CAC1B;AAED;;;GAGG;AACH,qBAAa,kBAAmB,SAAQ,eAAe,CAAC,gBAAgB,CAAC;IACrE,iBAA0B,YAAY;;;;;;;MAGpC;IAEF;;OAEG;gBACS,aAAa,GAAE,uBAA4B;CAoB1D;AA0BD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,wBAAsB,gBAAgB,CAAC,aAAa,CAAC,EAAE,uBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC,CAIhG"}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.launchPlaywright = exports.PlaywrightLauncher = void 0;
|
|
4
|
+
const tslib_1 = require("tslib");
|
|
5
|
+
const ow_1 = tslib_1.__importDefault(require("ow"));
|
|
6
|
+
const browser_pool_1 = require("@crawlee/browser-pool");
|
|
7
|
+
const browser_1 = require("@crawlee/browser");
|
|
8
|
+
/**
|
|
9
|
+
* `PlaywrightLauncher` is based on the `BrowserLauncher`. It launches `playwright` browser instance.
|
|
10
|
+
* @ignore
|
|
11
|
+
*/
|
|
12
|
+
class PlaywrightLauncher extends browser_1.BrowserLauncher {
|
|
13
|
+
/**
|
|
14
|
+
* All `PlaywrightLauncher` parameters are passed via this launchContext object.
|
|
15
|
+
*/
|
|
16
|
+
constructor(launchContext = {}) {
|
|
17
|
+
(0, ow_1.default)(launchContext, 'PlaywrightLauncherOptions', ow_1.default.object.exactShape(PlaywrightLauncher.optionsShape));
|
|
18
|
+
const { launcher = browser_1.BrowserLauncher.requireLauncherOrThrow('playwright', 'apify/actor-node-playwright-*').chromium, } = launchContext;
|
|
19
|
+
const { launchOptions = {}, ...rest } = launchContext;
|
|
20
|
+
super({
|
|
21
|
+
...rest,
|
|
22
|
+
launchOptions: {
|
|
23
|
+
...launchOptions,
|
|
24
|
+
executablePath: getDefaultExecutablePath(launchContext),
|
|
25
|
+
},
|
|
26
|
+
launcher,
|
|
27
|
+
});
|
|
28
|
+
this.Plugin = browser_pool_1.PlaywrightPlugin;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
exports.PlaywrightLauncher = PlaywrightLauncher;
|
|
32
|
+
Object.defineProperty(PlaywrightLauncher, "optionsShape", {
|
|
33
|
+
enumerable: true,
|
|
34
|
+
configurable: true,
|
|
35
|
+
writable: true,
|
|
36
|
+
value: {
|
|
37
|
+
...browser_1.BrowserLauncher.optionsShape,
|
|
38
|
+
launcher: ow_1.default.optional.object,
|
|
39
|
+
}
|
|
40
|
+
});
|
|
41
|
+
/**
|
|
42
|
+
* @returns {string | undefined} default path to browser.
|
|
43
|
+
* If actor-node-playwright-* image is used the APIFY_DEFAULT_BROWSER_PATH is considered as default.
|
|
44
|
+
* @ignore
|
|
45
|
+
*/
|
|
46
|
+
function getDefaultExecutablePath(launchContext) {
|
|
47
|
+
const pathFromPlaywrightImage = process.env.APIFY_DEFAULT_BROWSER_PATH;
|
|
48
|
+
const { launchOptions = {} } = launchContext;
|
|
49
|
+
if (launchOptions.executablePath) {
|
|
50
|
+
return launchOptions.executablePath;
|
|
51
|
+
}
|
|
52
|
+
if (launchContext.useChrome) {
|
|
53
|
+
return undefined;
|
|
54
|
+
}
|
|
55
|
+
if (pathFromPlaywrightImage) {
|
|
56
|
+
return pathFromPlaywrightImage;
|
|
57
|
+
}
|
|
58
|
+
return undefined;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Launches headless browsers using Playwright pre-configured to work within the Apify platform.
|
|
62
|
+
* The function has the same return value as `browserType.launch()`.
|
|
63
|
+
* See [Playwright documentation](https://playwright.dev/docs/api/class-browsertype) for more details.
|
|
64
|
+
*
|
|
65
|
+
* The `launchPlaywright()` function alters the following Playwright options:
|
|
66
|
+
*
|
|
67
|
+
* - Passes the setting from the `APIFY_HEADLESS` environment variable to the `headless` option,
|
|
68
|
+
* unless it was already defined by the caller or `APIFY_XVFB` environment variable is set to `1`.
|
|
69
|
+
* Note that Apify Actor cloud platform automatically sets `APIFY_HEADLESS=1` to all running actors.
|
|
70
|
+
* - Takes the `proxyUrl` option, validates it and adds it to `launchOptions` in a proper format.
|
|
71
|
+
* The proxy URL must define a port number and have one of the following schemes: `http://`,
|
|
72
|
+
* `https://`, `socks4://` or `socks5://`.
|
|
73
|
+
* If the proxy is HTTP (i.e. has the `http://` scheme) and contains username or password,
|
|
74
|
+
* the `launchPlaywright` functions sets up an anonymous proxy HTTP
|
|
75
|
+
* to make the proxy work with headless Chrome. For more information, read the
|
|
76
|
+
* [blog post about proxy-chain library](https://blog.apify.com/how-to-make-headless-chrome-and-puppeteer-use-a-proxy-server-with-authentication-249a21a79212).
|
|
77
|
+
*
|
|
78
|
+
* To use this function, you need to have the [Playwright](https://www.npmjs.com/package/playwright)
|
|
79
|
+
* NPM package installed in your project.
|
|
80
|
+
* When running on the Apify Platform, you can achieve that simply
|
|
81
|
+
* by using the `apify/actor-node-playwright-*` base Docker image for your actor - see
|
|
82
|
+
* [Apify Actor documentation](https://docs.apify.com/actor/build#base-images)
|
|
83
|
+
* for details.
|
|
84
|
+
*
|
|
85
|
+
* @param [launchContext]
|
|
86
|
+
* Optional settings passed to `browserType.launch()`. In addition to
|
|
87
|
+
* [Playwright's options](https://playwright.dev/docs/api/class-browsertype?_highlight=launch#browsertypelaunchoptions)
|
|
88
|
+
* the object may contain our own {@link PlaywrightLaunchContext} that enable additional features.
|
|
89
|
+
* @returns
|
|
90
|
+
* Promise that resolves to Playwright's `Browser` instance.
|
|
91
|
+
*/
|
|
92
|
+
async function launchPlaywright(launchContext) {
|
|
93
|
+
const playwrightLauncher = new PlaywrightLauncher(launchContext);
|
|
94
|
+
return playwrightLauncher.launch();
|
|
95
|
+
}
|
|
96
|
+
exports.launchPlaywright = launchPlaywright;
|
|
97
|
+
//# sourceMappingURL=playwright-launcher.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright-launcher.js","sourceRoot":"","sources":["../../src/internals/playwright-launcher.ts"],"names":[],"mappings":";;;;AAAA,oDAAoB;AAEpB,wDAAyD;AACzD,8CAAyE;AAqDzE;;;GAGG;AACH,MAAa,kBAAmB,SAAQ,yBAAiC;IAMrE;;OAEG;IACH,YAAY,gBAAyC,EAAE;QACnD,IAAA,YAAE,EAAC,aAAa,EAAE,2BAA2B,EAAE,YAAE,CAAC,MAAM,CAAC,UAAU,CAAC,kBAAkB,CAAC,YAAY,CAAC,CAAC,CAAC;QAEtG,MAAM,EACF,QAAQ,GAAG,yBAAe,CAAC,sBAAsB,CAA8B,YAAY,EAAE,+BAA+B,CAAC,CAAC,QAAQ,GACzI,GAAG,aAAa,CAAC;QAElB,MAAM,EAAE,aAAa,GAAG,EAAE,EAAE,GAAG,IAAI,EAAE,GAAG,aAAa,CAAC;QAEtD,KAAK,CAAC;YACF,GAAG,IAAI;YACP,aAAa,EAAE;gBACX,GAAG,aAAa;gBAChB,cAAc,EAAE,wBAAwB,CAAC,aAAa,CAAC;aAC1D;YACD,QAAQ;SACX,CAAC,CAAC;QAEH,IAAI,CAAC,MAAM,GAAG,+BAAgB,CAAC;IACnC,CAAC;;AA5BL,gDA6BC;AA5BG;;;;WAAyC;QACrC,GAAG,yBAAe,CAAC,YAAY;QAC/B,QAAQ,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;KAC/B;GAAC;AA2BN;;;;GAIG;AACH,SAAS,wBAAwB,CAAC,aAAsC;IACpE,MAAM,uBAAuB,GAAG,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC;IACvE,MAAM,EAAE,aAAa,GAAG,EAAE,EAAE,GAAG,aAAa,CAAC;IAE7C,IAAI,aAAa,CAAC,cAAc,EAAE;QAC9B,OAAO,aAAa,CAAC,cAAc,CAAC;KACvC;IAED,IAAI,aAAa,CAAC,SAAS,EAAE;QACzB,OAAO,SAAS,CAAC;KACpB;IAED,IAAI,uBAAuB,EAAE;QACzB,OAAO,uBAAuB,CAAC;KAClC;IAED,OAAO,SAAS,CAAC;AACrB,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACI,KAAK,UAAU,gBAAgB,CAAC,aAAuC;IAC1E,MAAM,kBAAkB,GAAG,IAAI,kBAAkB,CAAC,aAAa,CAAC,CAAC;IAEjE,OAAO,kBAAkB,CAAC,MAAM,EAAE,CAAC;AACvC,CAAC;AAJD,4CAIC"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A namespace that contains various utilities for
|
|
3
|
+
* [Playwright](https://github.com/microsoft/playwright) - the headless Chrome Node API.
|
|
4
|
+
*
|
|
5
|
+
* **Example usage:**
|
|
6
|
+
*
|
|
7
|
+
* ```javascript
|
|
8
|
+
* const Apify = require('apify');
|
|
9
|
+
* const { playwright } = Actor.utils;
|
|
10
|
+
*
|
|
11
|
+
* // Navigate to https://www.example.com in Playwright with a POST request
|
|
12
|
+
* const browser = await Actor.launchPlaywright();
|
|
13
|
+
* const page = await browser.newPage();
|
|
14
|
+
* await playwright.gotoExtended(page, {
|
|
15
|
+
* url: 'https://example.com,
|
|
16
|
+
* method: 'POST',
|
|
17
|
+
* });
|
|
18
|
+
* ```
|
|
19
|
+
* @module playwrightUtils
|
|
20
|
+
*/
|
|
21
|
+
import { Page, Response } from 'playwright';
|
|
22
|
+
import { Request } from '@crawlee/core';
|
|
23
|
+
export interface DirectNavigationOptions {
|
|
24
|
+
/**
|
|
25
|
+
* Maximum operation time in milliseconds, defaults to 30 seconds, pass `0` to disable timeout. The
|
|
26
|
+
* default value can be changed by using the browserContext.setDefaultNavigationTimeout(timeout),
|
|
27
|
+
* browserContext.setDefaultTimeout(timeout), page.setDefaultNavigationTimeout(timeout) or
|
|
28
|
+
* page.setDefaultTimeout(timeout) methods.
|
|
29
|
+
*/
|
|
30
|
+
timeout?: number;
|
|
31
|
+
/**
|
|
32
|
+
* When to consider operation succeeded, defaults to `load`. Events can be either:
|
|
33
|
+
* - `'domcontentloaded'` - consider operation to be finished when the `DOMContentLoaded` event is fired.
|
|
34
|
+
* - `'load'` - consider operation to be finished when the `load` event is fired.
|
|
35
|
+
* - `'networkidle'` - consider operation to be finished when there are no network connections for at least `500` ms.
|
|
36
|
+
*/
|
|
37
|
+
waitUntil?: 'domcontentloaded' | 'load' | 'networkidle';
|
|
38
|
+
/**
|
|
39
|
+
* Referer header value. If provided it will take preference over the referer header value set by page.setExtraHTTPHeaders(headers).
|
|
40
|
+
*/
|
|
41
|
+
referer?: string;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Extended version of Playwright's `page.goto()` allowing to perform requests with HTTP method other than GET,
|
|
45
|
+
* with custom headers and POST payload. URL, method, headers and payload are taken from
|
|
46
|
+
* request parameter that must be an instance of Request class.
|
|
47
|
+
*
|
|
48
|
+
* *NOTE:* In recent versions of Playwright using requests other than GET, overriding headers and adding payloads disables
|
|
49
|
+
* browser cache which degrades performance.
|
|
50
|
+
*
|
|
51
|
+
* @param page
|
|
52
|
+
* Puppeteer [`Page`](https://playwright.dev/docs/api/class-page) object.
|
|
53
|
+
* @param request
|
|
54
|
+
* @param [gotoOptions] Custom options for `page.goto()`.
|
|
55
|
+
*/
|
|
56
|
+
export declare function gotoExtended(page: Page, request: Request, gotoOptions?: DirectNavigationOptions): Promise<Response | null>;
|
|
57
|
+
//# sourceMappingURL=playwright-utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright-utils.d.ts","sourceRoot":"","sources":["../../../src/internals/utils/playwright-utils.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAGH,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAS,MAAM,YAAY,CAAC;AAEnD,OAAO,EAAc,OAAO,EAAE,MAAM,eAAe,CAAC;AAKpD,MAAM,WAAW,uBAAuB;IACpC;;;;;OAKG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,kBAAkB,GAAG,MAAM,GAAG,aAAa,CAAC;IAExD;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,GAAE,uBAA4B,GAAG,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,CA0CpI"}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* A namespace that contains various utilities for
|
|
4
|
+
* [Playwright](https://github.com/microsoft/playwright) - the headless Chrome Node API.
|
|
5
|
+
*
|
|
6
|
+
* **Example usage:**
|
|
7
|
+
*
|
|
8
|
+
* ```javascript
|
|
9
|
+
* const Apify = require('apify');
|
|
10
|
+
* const { playwright } = Actor.utils;
|
|
11
|
+
*
|
|
12
|
+
* // Navigate to https://www.example.com in Playwright with a POST request
|
|
13
|
+
* const browser = await Actor.launchPlaywright();
|
|
14
|
+
* const page = await browser.newPage();
|
|
15
|
+
* await playwright.gotoExtended(page, {
|
|
16
|
+
* url: 'https://example.com,
|
|
17
|
+
* method: 'POST',
|
|
18
|
+
* });
|
|
19
|
+
* ```
|
|
20
|
+
* @module playwrightUtils
|
|
21
|
+
*/
|
|
22
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
23
|
+
exports.gotoExtended = void 0;
|
|
24
|
+
const tslib_1 = require("tslib");
|
|
25
|
+
const ow_1 = tslib_1.__importDefault(require("ow"));
|
|
26
|
+
const log_1 = tslib_1.__importDefault(require("@apify/log"));
|
|
27
|
+
const core_1 = require("@crawlee/core");
|
|
28
|
+
const log = log_1.default.child({ prefix: 'Playwright Utils' });
|
|
29
|
+
/**
|
|
30
|
+
* Extended version of Playwright's `page.goto()` allowing to perform requests with HTTP method other than GET,
|
|
31
|
+
* with custom headers and POST payload. URL, method, headers and payload are taken from
|
|
32
|
+
* request parameter that must be an instance of Request class.
|
|
33
|
+
*
|
|
34
|
+
* *NOTE:* In recent versions of Playwright using requests other than GET, overriding headers and adding payloads disables
|
|
35
|
+
* browser cache which degrades performance.
|
|
36
|
+
*
|
|
37
|
+
* @param page
|
|
38
|
+
* Puppeteer [`Page`](https://playwright.dev/docs/api/class-page) object.
|
|
39
|
+
* @param request
|
|
40
|
+
* @param [gotoOptions] Custom options for `page.goto()`.
|
|
41
|
+
*/
|
|
42
|
+
async function gotoExtended(page, request, gotoOptions = {}) {
|
|
43
|
+
(0, ow_1.default)(page, ow_1.default.object.validate(core_1.validators.browserPage));
|
|
44
|
+
(0, ow_1.default)(request, ow_1.default.object.partialShape({
|
|
45
|
+
url: ow_1.default.string.url,
|
|
46
|
+
method: ow_1.default.optional.string,
|
|
47
|
+
headers: ow_1.default.optional.object,
|
|
48
|
+
payload: ow_1.default.optional.any(ow_1.default.string, ow_1.default.buffer),
|
|
49
|
+
}));
|
|
50
|
+
(0, ow_1.default)(gotoOptions, ow_1.default.object);
|
|
51
|
+
const { url, method, headers, payload } = request;
|
|
52
|
+
const isEmpty = (o) => !o || Object.keys(o).length === 0;
|
|
53
|
+
if (method !== 'GET' || payload || !isEmpty(headers)) {
|
|
54
|
+
// This is not deprecated, we use it to log only once.
|
|
55
|
+
log.deprecated('Using other request methods than GET, rewriting headers and adding payloads has a high impact on performance '
|
|
56
|
+
+ 'in recent versions of Playwright. Use only when necessary.');
|
|
57
|
+
let wasCalled = false;
|
|
58
|
+
const interceptRequestHandler = async (route) => {
|
|
59
|
+
try {
|
|
60
|
+
// We want to ensure that this won't get executed again in a case that there is a subsequent request
|
|
61
|
+
// for example for some asset file link from main HTML.
|
|
62
|
+
if (wasCalled) {
|
|
63
|
+
return await route.continue();
|
|
64
|
+
}
|
|
65
|
+
wasCalled = true;
|
|
66
|
+
const overrides = {};
|
|
67
|
+
if (method !== 'GET')
|
|
68
|
+
overrides.method = method;
|
|
69
|
+
if (payload)
|
|
70
|
+
overrides.postData = payload;
|
|
71
|
+
if (!isEmpty(headers))
|
|
72
|
+
overrides.headers = headers;
|
|
73
|
+
await route.continue(overrides);
|
|
74
|
+
}
|
|
75
|
+
catch (error) {
|
|
76
|
+
log.debug('Error inside request interceptor', { error });
|
|
77
|
+
}
|
|
78
|
+
};
|
|
79
|
+
await page.route('**/*', interceptRequestHandler);
|
|
80
|
+
}
|
|
81
|
+
return page.goto(url, gotoOptions);
|
|
82
|
+
}
|
|
83
|
+
exports.gotoExtended = gotoExtended;
|
|
84
|
+
//# sourceMappingURL=playwright-utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playwright-utils.js","sourceRoot":"","sources":["../../../src/internals/utils/playwright-utils.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;GAmBG;;;;AAEH,oDAAoB;AAEpB,6DAA8B;AAC9B,wCAAoD;AAGpD,MAAM,GAAG,GAAG,aAAI,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,kBAAkB,EAAE,CAAC,CAAC;AAyBvD;;;;;;;;;;;;GAYG;AACI,KAAK,UAAU,YAAY,CAAC,IAAU,EAAE,OAAgB,EAAE,cAAuC,EAAE;IACtG,IAAA,YAAE,EAAC,IAAI,EAAE,YAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,iBAAU,CAAC,WAAW,CAAC,CAAC,CAAC;IACrD,IAAA,YAAE,EAAC,OAAO,EAAE,YAAE,CAAC,MAAM,CAAC,YAAY,CAAC;QAC/B,GAAG,EAAE,YAAE,CAAC,MAAM,CAAC,GAAG;QAClB,MAAM,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QAC1B,OAAO,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QAC3B,OAAO,EAAE,YAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,YAAE,CAAC,MAAM,EAAE,YAAE,CAAC,MAAM,CAAC;KACjD,CAAC,CAAC,CAAC;IACJ,IAAA,YAAE,EAAC,WAAW,EAAE,YAAE,CAAC,MAAM,CAAC,CAAC;IAE3B,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC;IAClD,MAAM,OAAO,GAAG,CAAC,CAAU,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC;IAElE,IAAI,MAAM,KAAK,KAAK,IAAI,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE;QAClD,sDAAsD;QACtD,GAAG,CAAC,UAAU,CAAC,+GAA+G;cACxH,4DAA4D,CAAC,CAAC;QACpE,IAAI,SAAS,GAAG,KAAK,CAAC;QACtB,MAAM,uBAAuB,GAAG,KAAK,EAAE,KAAY,EAAE,EAAE;YACnD,IAAI;gBACA,oGAAoG;gBACpG,uDAAuD;gBACvD,IAAI,SAAS,EAAE;oBACX,OAAO,MAAM,KAAK,CAAC,QAAQ,EAAE,CAAC;iBACjC;gBAED,SAAS,GAAG,IAAI,CAAC;gBACjB,MAAM,SAAS,GAAe,EAAE,CAAC;gBAEjC,IAAI,MAAM,KAAK,KAAK;oBAAE,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC;gBAChD,IAAI,OAAO;oBAAE,SAAS,CAAC,QAAQ,GAAG,OAAO,CAAC;gBAC1C,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC;oBAAE,SAAS,CAAC,OAAO,GAAG,OAAO,CAAC;gBACnD,MAAM,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;aACnC;YAAC,OAAO,KAAK,EAAE;gBACZ,GAAG,CAAC,KAAK,CAAC,kCAAkC,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;aAC5D;QACL,CAAC,CAAC;QAEF,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,uBAAuB,CAAC,CAAC;KACrD;IAED,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,CAAC;AACvC,CAAC;AA1CD,oCA0CC"}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@crawlee/playwright",
|
|
3
|
+
"version": "3.0.0-alpha.3",
|
|
4
|
+
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
|
+
"engines": {
|
|
6
|
+
"node": ">=16.0.0"
|
|
7
|
+
},
|
|
8
|
+
"types": "index.d.ts",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"import": "./index.mjs",
|
|
12
|
+
"require": "./index.js"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"keywords": [
|
|
16
|
+
"apify",
|
|
17
|
+
"headless",
|
|
18
|
+
"chrome",
|
|
19
|
+
"puppeteer",
|
|
20
|
+
"crawler",
|
|
21
|
+
"scraper"
|
|
22
|
+
],
|
|
23
|
+
"author": {
|
|
24
|
+
"name": "Apify",
|
|
25
|
+
"email": "support@apify.com",
|
|
26
|
+
"url": "https://apify.com"
|
|
27
|
+
},
|
|
28
|
+
"contributors": [
|
|
29
|
+
"Jan Curn <jan@apify.com>",
|
|
30
|
+
"Marek Trunkat <marek@apify.com>",
|
|
31
|
+
"Ondra Urban <ondra@apify.com>"
|
|
32
|
+
],
|
|
33
|
+
"license": "Apache-2.0",
|
|
34
|
+
"repository": {
|
|
35
|
+
"type": "git",
|
|
36
|
+
"url": "git+https://github.com/apify/apify-js"
|
|
37
|
+
},
|
|
38
|
+
"bugs": {
|
|
39
|
+
"url": "https://github.com/apify/apify-js/issues"
|
|
40
|
+
},
|
|
41
|
+
"homepage": "https://sdk.apify.com/",
|
|
42
|
+
"files": [
|
|
43
|
+
"dist"
|
|
44
|
+
],
|
|
45
|
+
"scripts": {
|
|
46
|
+
"build": "npm run clean && npm run compile && npm run copy",
|
|
47
|
+
"clean": "rimraf ./dist",
|
|
48
|
+
"compile": "tsc -p tsconfig.build.json && gen-esm-wrapper ./index.js ./index.mjs",
|
|
49
|
+
"copy": "ts-node -T ../../scripts/copy.ts"
|
|
50
|
+
},
|
|
51
|
+
"publishConfig": {
|
|
52
|
+
"access": "public"
|
|
53
|
+
},
|
|
54
|
+
"dependencies": {
|
|
55
|
+
"@crawlee/browser": "^3.0.0-alpha.3"
|
|
56
|
+
},
|
|
57
|
+
"peerDependencies": {
|
|
58
|
+
"playwright": "^1.11.0"
|
|
59
|
+
},
|
|
60
|
+
"peerDependenciesMeta": {
|
|
61
|
+
"playwright": {
|
|
62
|
+
"optional": true
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|