@crawlee/playwright 3.0.3-beta.8 → 3.0.4-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -63,7 +63,7 @@ Additionally, the package provides various helper functions to simplify running
63
63
 
64
64
  ## Quick Start
65
65
 
66
- This short tutorial will set you up to start using Crawlee in a minute or two. If you want to learn more, proceed to the [Getting Started](https://crawlee.dev/docs/guides/getting-started) tutorial that will take you step by step through creating your first scraper.
66
+ This short tutorial will set you up to start using Crawlee in a minute or two. If you want to learn more, proceed to the [Getting Started](https://crawlee.dev/docs/introduction) tutorial that will take you step by step through creating your first scraper.
67
67
 
68
68
  ### Local stand-alone usage
69
69
 
package/index.mjs CHANGED
@@ -37,6 +37,7 @@ export const REQUESTS_PERSISTENCE_KEY = mod.REQUESTS_PERSISTENCE_KEY;
37
37
  export const Request = mod.Request;
38
38
  export const RequestList = mod.RequestList;
39
39
  export const RequestQueue = mod.RequestQueue;
40
+ export const RetryRequestError = mod.RetryRequestError;
40
41
  export const Router = mod.Router;
41
42
  export const STATE_PERSISTENCE_KEY = mod.STATE_PERSISTENCE_KEY;
42
43
  export const STORAGE_CONSISTENCY_DELAY_MILLIS = mod.STORAGE_CONSISTENCY_DELAY_MILLIS;
@@ -18,14 +18,14 @@ export interface PlaywrightCrawlerOptions extends BrowserCrawlerOptions<Playwrig
18
18
  browserPlugins: [PlaywrightPlugin];
19
19
  }> {
20
20
  /**
21
- * The same options as used by {@link launchPlaywright}.
21
+ * The same options as used by {@apilink launchPlaywright}.
22
22
  */
23
23
  launchContext?: PlaywrightLaunchContext;
24
24
  /**
25
25
  * Function that is called to process each request.
26
26
  *
27
- * The function receives the {@link PlaywrightCrawlingContext} as an argument, where:
28
- * - `request` is an instance of the {@link Request} object with details about the URL to open, HTTP method etc.
27
+ * The function receives the {@apilink PlaywrightCrawlingContext} as an argument, where:
28
+ * - `request` is an instance of the {@apilink Request} object with details about the URL to open, HTTP method etc.
29
29
  * - `page` is an instance of the `Playwright`
30
30
  * [`Page`](https://playwright.dev/docs/api/class-page)
31
31
  * - `browserController` is an instance of the
@@ -43,14 +43,14 @@ export interface PlaywrightCrawlerOptions extends BrowserCrawlerOptions<Playwrig
43
43
  * To make this work, you should **always**
44
44
  * let your function throw exceptions rather than catch them.
45
45
  * The exceptions are logged to the request using the
46
- * {@link Request.pushErrorMessage} function.
46
+ * {@apilink Request.pushErrorMessage} function.
47
47
  */
48
48
  requestHandler?: PlaywrightRequestHandler;
49
49
  /**
50
50
  * Function that is called to process each request.
51
51
  *
52
- * The function receives the {@link PlaywrightCrawlingContext} as an argument, where:
53
- * - `request` is an instance of the {@link Request} object with details about the URL to open, HTTP method etc.
52
+ * The function receives the {@apilink PlaywrightCrawlingContext} as an argument, where:
53
+ * - `request` is an instance of the {@apilink Request} object with details about the URL to open, HTTP method etc.
54
54
  * - `page` is an instance of the `Playwright`
55
55
  * [`Page`](https://playwright.dev/docs/api/class-page)
56
56
  * - `browserController` is an instance of the
@@ -68,7 +68,7 @@ export interface PlaywrightCrawlerOptions extends BrowserCrawlerOptions<Playwrig
68
68
  * To make this work, you should **always**
69
69
  * let your function throw exceptions rather than catch them.
70
70
  * The exceptions are logged to the request using the
71
- * {@link Request.pushErrorMessage} function.
71
+ * {@apilink Request.pushErrorMessage} function.
72
72
  *
73
73
  * @deprecated `handlePageFunction` has been renamed to `requestHandler` and will be removed in a future version.
74
74
  * @ignore
@@ -113,27 +113,27 @@ export interface PlaywrightCrawlerOptions extends BrowserCrawlerOptions<Playwrig
113
113
  *
114
114
  * Since `Playwright` uses headless browser to download web pages and extract data,
115
115
  * it is useful for crawling of websites that require to execute JavaScript.
116
- * If the target website doesn't need JavaScript, consider using {@link CheerioCrawler},
116
+ * If the target website doesn't need JavaScript, consider using {@apilink CheerioCrawler},
117
117
  * which downloads the pages using raw HTTP requests and is about 10x faster.
118
118
  *
119
- * The source URLs are represented using {@link Request} objects that are fed from
120
- * {@link RequestList} or {@link RequestQueue} instances provided by the {@link PlaywrightCrawlerOptions.requestList}
121
- * or {@link PlaywrightCrawlerOptions.requestQueue} constructor options, respectively.
119
+ * The source URLs are represented using {@apilink Request} objects that are fed from
120
+ * {@apilink RequestList} or {@apilink RequestQueue} instances provided by the {@apilink PlaywrightCrawlerOptions.requestList}
121
+ * or {@apilink PlaywrightCrawlerOptions.requestQueue} constructor options, respectively.
122
122
  *
123
- * If both {@link PlaywrightCrawlerOptions.requestList} and {@link PlaywrightCrawlerOptions.requestQueue} are used,
124
- * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
125
- * to {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
123
+ * If both {@apilink PlaywrightCrawlerOptions.requestList} and {@apilink PlaywrightCrawlerOptions.requestQueue} are used,
124
+ * the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them
125
+ * to {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
126
126
  *
127
- * The crawler finishes when there are no more {@link Request} objects to crawl.
127
+ * The crawler finishes when there are no more {@apilink Request} objects to crawl.
128
128
  *
129
- * `PlaywrightCrawler` opens a new Chrome page (i.e. tab) for each {@link Request} object to crawl
130
- * and then calls the function provided by user as the {@link PlaywrightCrawlerOptions.requestHandler} option.
129
+ * `PlaywrightCrawler` opens a new Chrome page (i.e. tab) for each {@apilink Request} object to crawl
130
+ * and then calls the function provided by user as the {@apilink PlaywrightCrawlerOptions.requestHandler} option.
131
131
  *
132
132
  * New pages are only opened when there is enough free CPU and memory available,
133
- * using the functionality provided by the {@link AutoscaledPool} class.
134
- * All {@link AutoscaledPool} configuration options can be passed to the {@link PlaywrightCrawlerOptions.autoscaledPoolOptions}
133
+ * using the functionality provided by the {@apilink AutoscaledPool} class.
134
+ * All {@apilink AutoscaledPool} configuration options can be passed to the {@apilink PlaywrightCrawlerOptions.autoscaledPoolOptions}
135
135
  * parameter of the `PlaywrightCrawler` constructor. For user convenience, the `minConcurrency` and `maxConcurrency`
136
- * {@link AutoscaledPoolOptions} are available directly in the `PlaywrightCrawler` constructor.
136
+ * {@apilink AutoscaledPoolOptions} are available directly in the `PlaywrightCrawler` constructor.
137
137
  *
138
138
  * Note that the pool of Playwright instances is internally managed by the [BrowserPool](https://github.com/apify/browser-pool) class.
139
139
  *
@@ -141,7 +141,6 @@ export interface PlaywrightCrawlerOptions extends BrowserCrawlerOptions<Playwrig
141
141
  *
142
142
  * ```javascript
143
143
  * const crawler = new PlaywrightCrawler({
144
- * requestList,
145
144
  * async requestHandler({ page, request }) {
146
145
  * // This function is called to extract data from a single web page
147
146
  * // 'page' is an instance of Playwright.Page with page.goto(request.url) already called
@@ -162,7 +161,10 @@ export interface PlaywrightCrawlerOptions extends BrowserCrawlerOptions<Playwrig
162
161
  * },
163
162
  * });
164
163
  *
165
- * await crawler.run();
164
+ * await crawler.run([
165
+ * 'http://www.example.com/page-1',
166
+ * 'http://www.example.com/page-2',
167
+ * ]);
166
168
  * ```
167
169
  * @category Crawlers
168
170
  */
@@ -178,6 +180,7 @@ export declare class PlaywrightCrawler extends BrowserCrawler<{
178
180
  preNavigationHooks: import("ow").ArrayPredicate<unknown> & import("ow").BasePredicate<unknown[] | undefined>;
179
181
  postNavigationHooks: import("ow").ArrayPredicate<unknown> & import("ow").BasePredicate<unknown[] | undefined>;
180
182
  launchContext: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
183
+ headless: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
181
184
  sessionPoolOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
182
185
  persistCookiesPerSession: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
183
186
  useSessionPool: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
@@ -197,6 +200,7 @@ export declare class PlaywrightCrawler extends BrowserCrawler<{
197
200
  minConcurrency: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
198
201
  maxConcurrency: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
199
202
  maxRequestsPerMinute: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
203
+ keepAlive: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
200
204
  log: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
201
205
  };
202
206
  /**
@@ -207,9 +211,9 @@ export declare class PlaywrightCrawler extends BrowserCrawler<{
207
211
  protected _navigationHandler(crawlingContext: PlaywrightCrawlingContext, gotoOptions: DirectNavigationOptions): Promise<Response | null>;
208
212
  }
209
213
  /**
210
- * Creates new {@link Router} instance that works based on request labels.
211
- * This instance can then serve as a `requestHandler` of your {@link PlaywrightCrawler}.
212
- * Defaults to the {@link PlaywrightCrawlingContext}.
214
+ * Creates new {@apilink Router} instance that works based on request labels.
215
+ * This instance can then serve as a `requestHandler` of your {@apilink PlaywrightCrawler}.
216
+ * Defaults to the {@apilink PlaywrightCrawlingContext}.
213
217
  *
214
218
  * > Serves as a shortcut for using `Router.create<PlaywrightCrawlingContext>()`.
215
219
  *
@@ -1 +1 @@
1
- {"version":3,"file":"playwright-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/playwright-crawler.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAChE,OAAO,KAAK,EAAsB,oBAAoB,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACxG,OAAO,KAAK,EAAE,qBAAqB,EAAE,sBAAsB,EAAE,qBAAqB,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC1H,OAAO,EAAE,cAAc,EAAE,aAAa,EAAU,MAAM,kBAAkB,CAAC;AACzE,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC;AAErE,OAAO,KAAK,EAAE,uBAAuB,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AAGhG,MAAM,WAAW,yBAAyB,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU,CAAE,SACjF,sBAAsB,CAAC,IAAI,EAAE,QAAQ,EAAE,oBAAoB,EAAE,QAAQ,CAAC,EAAE,sBAAsB;CAAG;AACrG,MAAM,WAAW,cAAe,SAAQ,WAAW,CAAC,yBAAyB,EAAE,qBAAqB,CAAC;CAAG;AACxG,MAAM,WAAW,wBAAyB,SAAQ,qBAAqB,CAAC,yBAAyB,CAAC;CAAG;AACrG,oBAAY,qBAAqB,GAAG,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAEhE,MAAM,WAAW,wBAAyB,SAAQ,qBAAqB,CACnE,yBAAyB,EACzB;IAAE,cAAc,EAAE,CAAC,gBAAgB,CAAC,CAAA;CAAE,CACzC;IACG;;OAEG;IACH,aAAa,CAAC,EAAE,uBAAuB,CAAC;IAExC;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACH,cAAc,CAAC,EAAE,wBAAwB,CAAC;IAE1C;;;;;;;;;;;;;;;;;;;;;;;;;;OA0BG;IACH,kBAAkB,CAAC,EAAE,wBAAwB,CAAC;IAE9C;;;;;;;;;;;;OAYG;IACH,kBAAkB,CAAC,EAAE,cAAc,EAAE,CAAC;IAEtC;;;;;;;;;;;;;;OAcG;IACH,mBAAmB,CAAC,EAAE,cAAc,EAAE,CAAC;CAC1C;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4DG;AACH,qBAAa,iBAAkB,SAAQ,cAAc,CAAC;IAAE,cAAc,EAAE,CAAC,gBAAgB,CAAC,CAAA;CAAE,EAAE,aAAa,EAAE,yBAAyB,CAAC;aAU7D,MAAM;IAT5E,iBAA0B,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAIpC;IAEF;;OAEG;gBACS,OAAO,GAAE,wBAA6B,EAAoB,MAAM,gBAAkC;cAuBrF,kBAAkB,CAAC,OAAO,EAAE,yBAAyB;cAMrD,kBAAkB,CAAC,eAAe,EAAE,yBAAyB,EAAE,WAAW,EAAE,uBAAuB;CAG/H;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,sBAAsB,CAAC,OAAO,SAAS,yBAAyB,GAAG,yBAAyB,uDAE3G"}
1
+ {"version":3,"file":"playwright-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/playwright-crawler.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAChE,OAAO,KAAK,EAAsB,oBAAoB,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACxG,OAAO,KAAK,EAAE,qBAAqB,EAAE,sBAAsB,EAAE,qBAAqB,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC1H,OAAO,EAAE,cAAc,EAAE,aAAa,EAAU,MAAM,kBAAkB,CAAC;AACzE,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC;AAErE,OAAO,KAAK,EAAE,uBAAuB,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AAGhG,MAAM,WAAW,yBAAyB,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU,CAAE,SACjF,sBAAsB,CAAC,IAAI,EAAE,QAAQ,EAAE,oBAAoB,EAAE,QAAQ,CAAC,EAAE,sBAAsB;CAAG;AACrG,MAAM,WAAW,cAAe,SAAQ,WAAW,CAAC,yBAAyB,EAAE,qBAAqB,CAAC;CAAG;AACxG,MAAM,WAAW,wBAAyB,SAAQ,qBAAqB,CAAC,yBAAyB,CAAC;CAAG;AACrG,oBAAY,qBAAqB,GAAG,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAEhE,MAAM,WAAW,wBAAyB,SAAQ,qBAAqB,CACnE,yBAAyB,EACzB;IAAE,cAAc,EAAE,CAAC,gBAAgB,CAAC,CAAA;CAAE,CACzC;IACG;;OAEG;IACH,aAAa,CAAC,EAAE,uBAAuB,CAAC;IAExC;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACH,cAAc,CAAC,EAAE,wBAAwB,CAAC;IAE1C;;;;;;;;;;;;;;;;;;;;;;;;;;OA0BG;IACH,kBAAkB,CAAC,EAAE,wBAAwB,CAAC;IAE9C;;;;;;;;;;;;OAYG;IACH,kBAAkB,CAAC,EAAE,cAAc,EAAE,CAAC;IAEtC;;;;;;;;;;;;;;OAcG;IACH,mBAAmB,CAAC,EAAE,cAAc,EAAE,CAAC;CAC1C;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8DG;AACH,qBAAa,iBAAkB,SAAQ,cAAc,CAAC;IAAE,cAAc,EAAE,CAAC,gBAAgB,CAAC,CAAA;CAAE,EAAE,aAAa,EAAE,yBAAyB,CAAC;aAU7D,MAAM;IAT5E,iBAA0B,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAIpC;IAEF;;OAEG;gBACS,OAAO,GAAE,wBAA6B,EAAoB,MAAM,gBAAkC;cA6BrF,kBAAkB,CAAC,OAAO,EAAE,yBAAyB;cAMrD,kBAAkB,CAAC,eAAe,EAAE,yBAAyB,EAAE,WAAW,EAAE,uBAAuB;CAG/H;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,sBAAsB,CAAC,OAAO,SAAS,yBAAyB,GAAG,yBAAyB,uDAE3G"}
@@ -14,27 +14,27 @@ const playwright_utils_1 = require("./utils/playwright-utils");
14
14
  *
15
15
  * Since `Playwright` uses headless browser to download web pages and extract data,
16
16
  * it is useful for crawling of websites that require to execute JavaScript.
17
- * If the target website doesn't need JavaScript, consider using {@link CheerioCrawler},
17
+ * If the target website doesn't need JavaScript, consider using {@apilink CheerioCrawler},
18
18
  * which downloads the pages using raw HTTP requests and is about 10x faster.
19
19
  *
20
- * The source URLs are represented using {@link Request} objects that are fed from
21
- * {@link RequestList} or {@link RequestQueue} instances provided by the {@link PlaywrightCrawlerOptions.requestList}
22
- * or {@link PlaywrightCrawlerOptions.requestQueue} constructor options, respectively.
20
+ * The source URLs are represented using {@apilink Request} objects that are fed from
21
+ * {@apilink RequestList} or {@apilink RequestQueue} instances provided by the {@apilink PlaywrightCrawlerOptions.requestList}
22
+ * or {@apilink PlaywrightCrawlerOptions.requestQueue} constructor options, respectively.
23
23
  *
24
- * If both {@link PlaywrightCrawlerOptions.requestList} and {@link PlaywrightCrawlerOptions.requestQueue} are used,
25
- * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
26
- * to {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
24
+ * If both {@apilink PlaywrightCrawlerOptions.requestList} and {@apilink PlaywrightCrawlerOptions.requestQueue} are used,
25
+ * the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them
26
+ * to {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
27
27
  *
28
- * The crawler finishes when there are no more {@link Request} objects to crawl.
28
+ * The crawler finishes when there are no more {@apilink Request} objects to crawl.
29
29
  *
30
- * `PlaywrightCrawler` opens a new Chrome page (i.e. tab) for each {@link Request} object to crawl
31
- * and then calls the function provided by user as the {@link PlaywrightCrawlerOptions.requestHandler} option.
30
+ * `PlaywrightCrawler` opens a new Chrome page (i.e. tab) for each {@apilink Request} object to crawl
31
+ * and then calls the function provided by user as the {@apilink PlaywrightCrawlerOptions.requestHandler} option.
32
32
  *
33
33
  * New pages are only opened when there is enough free CPU and memory available,
34
- * using the functionality provided by the {@link AutoscaledPool} class.
35
- * All {@link AutoscaledPool} configuration options can be passed to the {@link PlaywrightCrawlerOptions.autoscaledPoolOptions}
34
+ * using the functionality provided by the {@apilink AutoscaledPool} class.
35
+ * All {@apilink AutoscaledPool} configuration options can be passed to the {@apilink PlaywrightCrawlerOptions.autoscaledPoolOptions}
36
36
  * parameter of the `PlaywrightCrawler` constructor. For user convenience, the `minConcurrency` and `maxConcurrency`
37
- * {@link AutoscaledPoolOptions} are available directly in the `PlaywrightCrawler` constructor.
37
+ * {@apilink AutoscaledPoolOptions} are available directly in the `PlaywrightCrawler` constructor.
38
38
  *
39
39
  * Note that the pool of Playwright instances is internally managed by the [BrowserPool](https://github.com/apify/browser-pool) class.
40
40
  *
@@ -42,7 +42,6 @@ const playwright_utils_1 = require("./utils/playwright-utils");
42
42
  *
43
43
  * ```javascript
44
44
  * const crawler = new PlaywrightCrawler({
45
- * requestList,
46
45
  * async requestHandler({ page, request }) {
47
46
  * // This function is called to extract data from a single web page
48
47
  * // 'page' is an instance of Playwright.Page with page.goto(request.url) already called
@@ -63,7 +62,10 @@ const playwright_utils_1 = require("./utils/playwright-utils");
63
62
  * },
64
63
  * });
65
64
  *
66
- * await crawler.run();
65
+ * await crawler.run([
66
+ * 'http://www.example.com/page-1',
67
+ * 'http://www.example.com/page-2',
68
+ * ]);
67
69
  * ```
68
70
  * @category Crawlers
69
71
  */
@@ -73,11 +75,15 @@ class PlaywrightCrawler extends browser_1.BrowserCrawler {
73
75
  */
74
76
  constructor(options = {}, config = browser_1.Configuration.getGlobalConfig()) {
75
77
  (0, ow_1.default)(options, 'PlaywrightCrawlerOptions', ow_1.default.object.exactShape(PlaywrightCrawler.optionsShape));
76
- const { launchContext = {}, browserPoolOptions = {}, ...browserCrawlerOptions } = options;
78
+ const { launchContext = {}, headless, browserPoolOptions = {}, ...browserCrawlerOptions } = options;
77
79
  if (launchContext.proxyUrl) {
78
80
  throw new Error('PlaywrightCrawlerOptions.launchContext.proxyUrl is not allowed in PlaywrightCrawler.'
79
81
  + 'Use PlaywrightCrawlerOptions.proxyConfiguration');
80
82
  }
83
+ if (headless != null) {
84
+ launchContext.launchOptions ?? (launchContext.launchOptions = {});
85
+ launchContext.launchOptions.headless = headless;
86
+ }
81
87
  const playwrightLauncher = new playwright_launcher_1.PlaywrightLauncher(launchContext, config);
82
88
  browserPoolOptions.browserPlugins = [
83
89
  playwrightLauncher.createBrowserPlugin(),
@@ -111,9 +117,9 @@ Object.defineProperty(PlaywrightCrawler, "optionsShape", {
111
117
  }
112
118
  });
113
119
  /**
114
- * Creates new {@link Router} instance that works based on request labels.
115
- * This instance can then serve as a `requestHandler` of your {@link PlaywrightCrawler}.
116
- * Defaults to the {@link PlaywrightCrawlingContext}.
120
+ * Creates new {@apilink Router} instance that works based on request labels.
121
+ * This instance can then serve as a `requestHandler` of your {@apilink PlaywrightCrawler}.
122
+ * Defaults to the {@apilink PlaywrightCrawlingContext}.
117
123
  *
118
124
  * > Serves as a shortcut for using `Router.create<PlaywrightCrawlingContext>()`.
119
125
  *
@@ -1 +1 @@
1
- {"version":3,"file":"playwright-crawler.js","sourceRoot":"","sources":["../../src/internals/playwright-crawler.ts"],"names":[],"mappings":";;;;AAAA,oDAAoB;AAIpB,8CAAyE;AAGzE,+DAA2D;AAE3D,+DAAgF;AAyGhF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4DG;AACH,MAAa,iBAAkB,SAAQ,wBAAgG;IAOnI;;OAEG;IACH,YAAY,UAAoC,EAAE,EAAoB,SAAS,uBAAa,CAAC,eAAe,EAAE;QAC1G,IAAA,YAAE,EAAC,OAAO,EAAE,0BAA0B,EAAE,YAAE,CAAC,MAAM,CAAC,UAAU,CAAC,iBAAiB,CAAC,YAAY,CAAC,CAAC,CAAC;QAE9F,MAAM,EACF,aAAa,GAAG,EAAE,EAClB,kBAAkB,GAAG,EAA0C,EAC/D,GAAG,qBAAqB,EAC3B,GAAG,OAAO,CAAC;QAEZ,IAAI,aAAa,CAAC,QAAQ,EAAE;YACxB,MAAM,IAAI,KAAK,CAAC,sFAAsF;kBAChG,iDAAiD,CAAC,CAAC;SAC5D;QAED,MAAM,kBAAkB,GAAG,IAAI,wCAAkB,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;QAEzE,kBAAkB,CAAC,cAAc,GAAG;YAChC,kBAAkB,CAAC,mBAAmB,EAAE;SAC3C,CAAC;QAEF,KAAK,CAAC,EAAE,GAAG,qBAAqB,EAAE,aAAa,EAAE,kBAAkB,EAAE,EAAE,MAAM,CAAC,CAAC;;;;;mBApBb;;IAqBtE,CAAC;IAEkB,KAAK,CAAC,kBAAkB,CAAC,OAAkC;QAC1E,IAAA,yCAAsB,EAAC,OAAO,CAAC,CAAC;QAChC,gDAAgD;QAChD,MAAM,KAAK,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAC5C,CAAC;IAEkB,KAAK,CAAC,kBAAkB,CAAC,eAA0C,EAAE,WAAoC;QACxH,OAAO,IAAA,+BAAY,EAAC,eAAe,CAAC,IAAI,EAAE,eAAe,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;IACpF,CAAC;;AAzCL,8CA0CC;AAzCG;;;;WAAyC;QACrC,GAAG,wBAAc,CAAC,YAAY;QAC9B,kBAAkB,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QACtC,QAAQ,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;KAC/B;GAAC;AAuCN;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,SAAgB,sBAAsB;IAClC,OAAO,gBAAM,CAAC,MAAM,EAAW,CAAC;AACpC,CAAC;AAFD,wDAEC"}
1
+ {"version":3,"file":"playwright-crawler.js","sourceRoot":"","sources":["../../src/internals/playwright-crawler.ts"],"names":[],"mappings":";;;;AAAA,oDAAoB;AAIpB,8CAAyE;AAGzE,+DAA2D;AAE3D,+DAAgF;AAyGhF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8DG;AACH,MAAa,iBAAkB,SAAQ,wBAAgG;IAOnI;;OAEG;IACH,YAAY,UAAoC,EAAE,EAAoB,SAAS,uBAAa,CAAC,eAAe,EAAE;QAC1G,IAAA,YAAE,EAAC,OAAO,EAAE,0BAA0B,EAAE,YAAE,CAAC,MAAM,CAAC,UAAU,CAAC,iBAAiB,CAAC,YAAY,CAAC,CAAC,CAAC;QAE9F,MAAM,EACF,aAAa,GAAG,EAAE,EAClB,QAAQ,EACR,kBAAkB,GAAG,EAA0C,EAC/D,GAAG,qBAAqB,EAC3B,GAAG,OAAO,CAAC;QAEZ,IAAI,aAAa,CAAC,QAAQ,EAAE;YACxB,MAAM,IAAI,KAAK,CAAC,sFAAsF;kBAChG,iDAAiD,CAAC,CAAC;SAC5D;QAED,IAAI,QAAQ,IAAI,IAAI,EAAE;YAClB,aAAa,CAAC,aAAa,KAA3B,aAAa,CAAC,aAAa,GAAK,EAAmB,EAAC;YACpD,aAAa,CAAC,aAAa,CAAC,QAAQ,GAAG,QAAQ,CAAC;SACnD;QAED,MAAM,kBAAkB,GAAG,IAAI,wCAAkB,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;QAEzE,kBAAkB,CAAC,cAAc,GAAG;YAChC,kBAAkB,CAAC,mBAAmB,EAAE;SAC3C,CAAC;QAEF,KAAK,CAAC,EAAE,GAAG,qBAAqB,EAAE,aAAa,EAAE,kBAAkB,EAAE,EAAE,MAAM,CAAC,CAAC;;;;;mBA1Bb;;IA2BtE,CAAC;IAEkB,KAAK,CAAC,kBAAkB,CAAC,OAAkC;QAC1E,IAAA,yCAAsB,EAAC,OAAO,CAAC,CAAC;QAChC,gDAAgD;QAChD,MAAM,KAAK,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAC5C,CAAC;IAEkB,KAAK,CAAC,kBAAkB,CAAC,eAA0C,EAAE,WAAoC;QACxH,OAAO,IAAA,+BAAY,EAAC,eAAe,CAAC,IAAI,EAAE,eAAe,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;IACpF,CAAC;;AA/CL,8CAgDC;AA/CG;;;;WAAyC;QACrC,GAAG,wBAAc,CAAC,YAAY;QAC9B,kBAAkB,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QACtC,QAAQ,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;KAC/B;GAAC;AA6CN;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,SAAgB,sBAAsB;IAClC,OAAO,gBAAM,CAAC,MAAM,EAAW,CAAC;AACpC,CAAC;AAFD,wDAEC"}
@@ -53,6 +53,7 @@ export interface PlaywrightLaunchContext extends BrowserLaunchContext<LaunchOpti
53
53
  /**
54
54
  * @experimental
55
55
  * Like `useIncognitoPages`, but for persistent contexts, so cache is used for faster loading.
56
+ * Works best with Firefox. Unstable on Chromium.
56
57
  */
57
58
  experimentalContainers?: boolean;
58
59
  /**
@@ -116,7 +117,7 @@ export declare class PlaywrightLauncher extends BrowserLauncher<PlaywrightPlugin
116
117
  * @param [launchContext]
117
118
  * Optional settings passed to `browserType.launch()`. In addition to
118
119
  * [Playwright's options](https://playwright.dev/docs/api/class-browsertype?_highlight=launch#browsertypelaunchoptions)
119
- * the object may contain our own {@link PlaywrightLaunchContext} that enable additional features.
120
+ * the object may contain our own {@apilink PlaywrightLaunchContext} that enable additional features.
120
121
  * @param [config]
121
122
  * @returns
122
123
  * Promise that resolves to Playwright's `Browser` instance.
@@ -1 +1 @@
1
- {"version":3,"file":"playwright-launcher.d.ts","sourceRoot":"","sources":["../../src/internals/playwright-launcher.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AACtE,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAC7D,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAElE;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,MAAM,WAAW,uBAAwB,SAAQ,oBAAoB,CAAC,aAAa,EAAE,WAAW,CAAC;IAC7F,4GAA4G;IAC5G,aAAa,CAAC,EAAE,aAAa,CAAC;IAE9B;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB;;;;MAIE;IACF,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAE5B;;;MAGE;IACF,sBAAsB,CAAC,EAAE,OAAO,CAAC;IAEjC;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB;;;OAGG;IACH,QAAQ,CAAC,EAAE,WAAW,CAAC;CAC1B;AAED;;;GAGG;AACH,qBAAa,kBAAmB,SAAQ,eAAe,CAAC,gBAAgB,CAAC;aAW/C,MAAM;IAV5B,iBAA0B,YAAY;;;;;;;;;MAGpC;IAEF;;OAEG;gBAEC,aAAa,GAAE,uBAA4B,EACzB,MAAM,gBAAkC;CAqBjE;AA0BD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,wBAAsB,gBAAgB,CAAC,aAAa,CAAC,EAAE,uBAAuB,EAAE,MAAM,gBAAkC,GAAG,OAAO,CAAC,OAAO,CAAC,CAI1I"}
1
+ {"version":3,"file":"playwright-launcher.d.ts","sourceRoot":"","sources":["../../src/internals/playwright-launcher.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AACtE,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAC7D,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAElE;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,MAAM,WAAW,uBAAwB,SAAQ,oBAAoB,CAAC,aAAa,EAAE,WAAW,CAAC;IAC7F,4GAA4G;IAC5G,aAAa,CAAC,EAAE,aAAa,CAAC;IAE9B;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB;;;;MAIE;IACF,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAE5B;;;;MAIE;IACF,sBAAsB,CAAC,EAAE,OAAO,CAAC;IAEjC;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB;;;OAGG;IACH,QAAQ,CAAC,EAAE,WAAW,CAAC;CAC1B;AAED;;;GAGG;AACH,qBAAa,kBAAmB,SAAQ,eAAe,CAAC,gBAAgB,CAAC;aAW/C,MAAM;IAV5B,iBAA0B,YAAY;;;;;;;;;MAGpC;IAEF;;OAEG;gBAEC,aAAa,GAAE,uBAA4B,EACzB,MAAM,gBAAkC;CAqBjE;AA0BD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,wBAAsB,gBAAgB,CAAC,aAAa,CAAC,EAAE,uBAAuB,EAAE,MAAM,gBAAkC,GAAG,OAAO,CAAC,OAAO,CAAC,CAI1I"}
@@ -91,7 +91,7 @@ function getDefaultExecutablePath(launchContext, config) {
91
91
  * @param [launchContext]
92
92
  * Optional settings passed to `browserType.launch()`. In addition to
93
93
  * [Playwright's options](https://playwright.dev/docs/api/class-browsertype?_highlight=launch#browsertypelaunchoptions)
94
- * the object may contain our own {@link PlaywrightLaunchContext} that enable additional features.
94
+ * the object may contain our own {@apilink PlaywrightLaunchContext} that enable additional features.
95
95
  * @param [config]
96
96
  * @returns
97
97
  * Promise that resolves to Playwright's `Browser` instance.
@@ -1 +1 @@
1
- {"version":3,"file":"playwright-launcher.js","sourceRoot":"","sources":["../../src/internals/playwright-launcher.ts"],"names":[],"mappings":";;;;AAAA,oDAAoB;AAEpB,wDAAyD;AAEzD,8CAAkE;AAyElE;;;GAGG;AACH,MAAa,kBAAmB,SAAQ,yBAAiC;IAMrE;;OAEG;IACH,YACI,gBAAyC,EAAE,EACzB,SAAS,uBAAa,CAAC,eAAe,EAAE;QAE1D,IAAA,YAAE,EAAC,aAAa,EAAE,2BAA2B,EAAE,YAAE,CAAC,MAAM,CAAC,UAAU,CAAC,kBAAkB,CAAC,YAAY,CAAC,CAAC,CAAC;QAEtG,MAAM,EACF,QAAQ,GAAG,yBAAe,CAAC,sBAAsB,CAA8B,YAAY,EAAE,+BAA+B,CAAC,CAAC,QAAQ,GACzI,GAAG,aAAa,CAAC;QAElB,MAAM,EAAE,aAAa,GAAG,EAAE,EAAE,GAAG,IAAI,EAAE,GAAG,aAAa,CAAC;QAEtD,KAAK,CAAC;YACF,GAAG,IAAI;YACP,aAAa,EAAE;gBACX,GAAG,aAAa;gBAChB,cAAc,EAAE,wBAAwB,CAAC,aAAa,EAAE,MAAM,CAAC;aAClE;YACD,QAAQ;SACX,EAAE,MAAM,CAAC,CAAC;;;;;mBAjBO;;QAmBlB,IAAI,CAAC,MAAM,GAAG,+BAAgB,CAAC;IACnC,CAAC;;AA/BL,gDAgCC;AA/BG;;;;WAAyC;QACrC,GAAG,yBAAe,CAAC,YAAY;QAC/B,QAAQ,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;KAC/B;GAAC;AA8BN;;;;GAIG;AACH,SAAS,wBAAwB,CAAC,aAAsC,EAAE,MAAqB;IAC3F,MAAM,uBAAuB,GAAG,MAAM,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;IACjE,MAAM,EAAE,aAAa,GAAG,EAAE,EAAE,GAAG,aAAa,CAAC;IAE7C,IAAI,aAAa,CAAC,cAAc,EAAE;QAC9B,OAAO,aAAa,CAAC,cAAc,CAAC;KACvC;IAED,IAAI,aAAa,CAAC,SAAS,EAAE;QACzB,OAAO,SAAS,CAAC;KACpB;IAED,IAAI,uBAAuB,EAAE;QACzB,OAAO,uBAAuB,CAAC;KAClC;IAED,OAAO,SAAS,CAAC;AACrB,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACI,KAAK,UAAU,gBAAgB,CAAC,aAAuC,EAAE,MAAM,GAAG,uBAAa,CAAC,eAAe,EAAE;IACpH,MAAM,kBAAkB,GAAG,IAAI,kBAAkB,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;IAEzE,OAAO,kBAAkB,CAAC,MAAM,EAAE,CAAC;AACvC,CAAC;AAJD,4CAIC"}
1
+ {"version":3,"file":"playwright-launcher.js","sourceRoot":"","sources":["../../src/internals/playwright-launcher.ts"],"names":[],"mappings":";;;;AAAA,oDAAoB;AAEpB,wDAAyD;AAEzD,8CAAkE;AA0ElE;;;GAGG;AACH,MAAa,kBAAmB,SAAQ,yBAAiC;IAMrE;;OAEG;IACH,YACI,gBAAyC,EAAE,EACzB,SAAS,uBAAa,CAAC,eAAe,EAAE;QAE1D,IAAA,YAAE,EAAC,aAAa,EAAE,2BAA2B,EAAE,YAAE,CAAC,MAAM,CAAC,UAAU,CAAC,kBAAkB,CAAC,YAAY,CAAC,CAAC,CAAC;QAEtG,MAAM,EACF,QAAQ,GAAG,yBAAe,CAAC,sBAAsB,CAA8B,YAAY,EAAE,+BAA+B,CAAC,CAAC,QAAQ,GACzI,GAAG,aAAa,CAAC;QAElB,MAAM,EAAE,aAAa,GAAG,EAAE,EAAE,GAAG,IAAI,EAAE,GAAG,aAAa,CAAC;QAEtD,KAAK,CAAC;YACF,GAAG,IAAI;YACP,aAAa,EAAE;gBACX,GAAG,aAAa;gBAChB,cAAc,EAAE,wBAAwB,CAAC,aAAa,EAAE,MAAM,CAAC;aAClE;YACD,QAAQ;SACX,EAAE,MAAM,CAAC,CAAC;;;;;mBAjBO;;QAmBlB,IAAI,CAAC,MAAM,GAAG,+BAAgB,CAAC;IACnC,CAAC;;AA/BL,gDAgCC;AA/BG;;;;WAAyC;QACrC,GAAG,yBAAe,CAAC,YAAY;QAC/B,QAAQ,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;KAC/B;GAAC;AA8BN;;;;GAIG;AACH,SAAS,wBAAwB,CAAC,aAAsC,EAAE,MAAqB;IAC3F,MAAM,uBAAuB,GAAG,MAAM,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;IACjE,MAAM,EAAE,aAAa,GAAG,EAAE,EAAE,GAAG,aAAa,CAAC;IAE7C,IAAI,aAAa,CAAC,cAAc,EAAE;QAC9B,OAAO,aAAa,CAAC,cAAc,CAAC;KACvC;IAED,IAAI,aAAa,CAAC,SAAS,EAAE;QACzB,OAAO,SAAS,CAAC;KACpB;IAED,IAAI,uBAAuB,EAAE;QACzB,OAAO,uBAAuB,CAAC;KAClC;IAED,OAAO,SAAS,CAAC;AACrB,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACI,KAAK,UAAU,gBAAgB,CAAC,aAAuC,EAAE,MAAM,GAAG,uBAAa,CAAC,eAAe,EAAE;IACpH,MAAM,kBAAkB,GAAG,IAAI,kBAAkB,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;IAEzE,OAAO,kBAAkB,CAAC,MAAM,EAAE,CAAC;AACvC,CAAC;AAJD,4CAIC"}
@@ -30,6 +30,19 @@ export interface InjectFileOptions {
30
30
  */
31
31
  surviveNavigations?: boolean;
32
32
  }
33
+ export interface BlockRequestsOptions {
34
+ /**
35
+ * The patterns of URLs to block from being loaded by the browser.
36
+ * Only `*` can be used as a wildcard. It is also automatically added to the beginning
37
+ * and end of the pattern. This limitation is enforced by the DevTools protocol.
38
+ * `.png` is the same as `*.png*`.
39
+ */
40
+ urlPatterns?: string[];
41
+ /**
42
+ * If you just want to append to the default blocked patterns, use this property.
43
+ */
44
+ extraUrlPatterns?: string[];
45
+ }
33
46
  /**
34
47
  * Injects a JavaScript file into a Playwright page.
35
48
  * Unlike Playwright's `addScriptTag` function, this function works on pages
@@ -102,7 +115,50 @@ export interface DirectNavigationOptions {
102
115
  */
103
116
  export declare function gotoExtended(page: Page, request: Request, gotoOptions?: DirectNavigationOptions): Promise<Response | null>;
104
117
  /**
105
- * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@link CheerioCrawler}.
118
+ * Forces the Playwright browser tab to block loading URLs that match a provided pattern.
119
+ * This is useful to speed up crawling of websites, since it reduces the amount
120
+ * of data that needs to be downloaded from the web, but it may break some websites
121
+ * or unexpectedly prevent loading of resources.
122
+ *
123
+ * By default, the function will block all URLs including the following patterns:
124
+ *
125
+ * ```json
126
+ * [".css", ".jpg", ".jpeg", ".png", ".svg", ".gif", ".woff", ".pdf", ".zip"]
127
+ * ```
128
+ *
129
+ * If you want to extend this list further, use the `extraUrlPatterns` option,
130
+ * which will keep blocking the default patterns, as well as add your custom ones.
131
+ * If you would like to block only specific patterns, use the `urlPatterns` option,
132
+ * which will override the defaults and block only URLs with your custom patterns.
133
+ *
134
+ * This function does not use Playwright's request interception and therefore does not interfere
135
+ * with browser cache. It's also faster than blocking requests using interception,
136
+ * because the blocking happens directly in the browser without the round-trip to Node.js,
137
+ * but it does not provide the extra benefits of request interception.
138
+ *
139
+ * The function will never block main document loads and their respective redirects.
140
+ *
141
+ * **Example usage**
142
+ * ```javascript
143
+ * import { launchPlaywright, playwrightUtils } from 'crawlee';
144
+ *
145
+ * const browser = await launchPlaywright();
146
+ * const page = await browser.newPage();
147
+ *
148
+ * // Block all requests to URLs that include `adsbygoogle.js` and also all defaults.
149
+ * await playwrightUtils.blockRequests(page, {
150
+ * extraUrlPatterns: ['adsbygoogle.js'],
151
+ * });
152
+ *
153
+ * await page.goto('https://cnn.com');
154
+ * ```
155
+ *
156
+ * @param page Playwright [`Page`](https://playwright.dev/docs/api/class-page) object.
157
+ * @param [options]
158
+ */
159
+ export declare function blockRequests(page: Page, options?: BlockRequestsOptions): Promise<void>;
160
+ /**
161
+ * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@apilink CheerioCrawler}.
106
162
  *
107
163
  * **Example usage:**
108
164
  * ```javascript
@@ -116,6 +172,7 @@ export declare function parseWithCheerio(page: Page): Promise<CheerioRoot>;
116
172
  export interface PlaywrightContextUtils {
117
173
  injectFile(filePath: string, options?: InjectFileOptions): Promise<unknown>;
118
174
  injectJQuery(): Promise<unknown>;
175
+ blockRequests(options?: BlockRequestsOptions): Promise<void>;
119
176
  parseWithCheerio(): Promise<CheerioRoot>;
120
177
  }
121
178
  export declare function registerUtilsToContext(context: PlaywrightCrawlingContext): void;
@@ -124,6 +181,7 @@ export declare const playwrightUtils: {
124
181
  injectFile: typeof injectFile;
125
182
  injectJQuery: typeof injectJQuery;
126
183
  gotoExtended: typeof gotoExtended;
184
+ blockRequests: typeof blockRequests;
127
185
  parseWithCheerio: typeof parseWithCheerio;
128
186
  };
129
187
  //# sourceMappingURL=playwright-utils.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"playwright-utils.d.ts","sourceRoot":"","sources":["../../../src/internals/utils/playwright-utils.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAIH,OAAO,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAS,MAAM,YAAY,CAAC;AAGxD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAE7C,OAAO,KAAK,EAAE,WAAW,EAAc,MAAM,gBAAgB,CAAC;AAE9D,OAAO,KAAK,EAAE,yBAAyB,EAAE,MAAM,uBAAuB,CAAC;AAQvE,MAAM,WAAW,iBAAiB;IAC9B;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,OAAO,CAAC;CAChC;AAOD;;;;;;;;;;GAUG;AACH,wBAAsB,UAAU,CAAC,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,GAAE,iBAAsB,GAAG,OAAO,CAAC,OAAO,CAAC,CAqBhH;AAED;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,CAGzD;AAED,MAAM,WAAW,uBAAuB;IACpC;;;;;OAKG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,kBAAkB,GAAG,MAAM,GAAG,aAAa,CAAC;IAExD;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;;;;;;;;GAWG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,GAAE,uBAA4B,GAAG,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,CA0CpI;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,gBAAgB,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,WAAW,CAAC,CAIvE;AAED,MAAM,WAAW,sBAAsB;IACnC,UAAU,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAC5E,YAAY,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC;IACjC,gBAAgB,IAAI,OAAO,CAAC,WAAW,CAAC,CAAC;CAC5C;AAED,wBAAgB,sBAAsB,CAAC,OAAO,EAAE,yBAAyB,GAAG,IAAI,CAI/E;AAED,gBAAgB;AAChB,eAAO,MAAM,eAAe;;;;;CAK3B,CAAC"}
1
+ {"version":3,"file":"playwright-utils.d.ts","sourceRoot":"","sources":["../../../src/internals/utils/playwright-utils.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAIH,OAAO,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAS,MAAM,YAAY,CAAC;AAGxD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAE7C,OAAO,KAAK,EAAE,WAAW,EAAc,MAAM,gBAAgB,CAAC;AAE9D,OAAO,KAAK,EAAE,yBAAyB,EAAE,MAAM,uBAAuB,CAAC;AASvE,MAAM,WAAW,iBAAiB;IAC9B;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,OAAO,CAAC;CAChC;AAED,MAAM,WAAW,oBAAoB;IACjC;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IAEvB;;OAEG;IACH,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;CAC/B;AAOD;;;;;;;;;;GAUG;AACH,wBAAsB,UAAU,CAAC,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,GAAE,iBAAsB,GAAG,OAAO,CAAC,OAAO,CAAC,CAqBhH;AAED;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,CAGzD;AAED,MAAM,WAAW,uBAAuB;IACpC;;;;;OAKG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,kBAAkB,GAAG,MAAM,GAAG,aAAa,CAAC;IAExD;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;;;;;;;;GAWG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,GAAE,uBAA4B,GAAG,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,CA0CpI;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAyCG;AACH,wBAAsB,aAAa,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,GAAE,oBAAyB,GAAG,OAAO,CAAC,IAAI,CAAC,CAkBjG;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,gBAAgB,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,WAAW,CAAC,CAIvE;AAED,MAAM,WAAW,sBAAsB;IACnC,UAAU,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAC5E,YAAY,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC;IACjC,aAAa,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC7D,gBAAgB,IAAI,OAAO,CAAC,WAAW,CAAC,CAAC;CAC5C;AAED,wBAAgB,sBAAsB,CAAC,OAAO,EAAE,yBAAyB,GAAG,IAAI,CAK/E;AAED,gBAAgB;AAChB,eAAO,MAAM,eAAe;;;;;;CAM3B,CAAC"}
@@ -19,7 +19,7 @@
19
19
  * @module playwrightUtils
20
20
  */
21
21
  Object.defineProperty(exports, "__esModule", { value: true });
22
- exports.playwrightUtils = exports.registerUtilsToContext = exports.parseWithCheerio = exports.gotoExtended = exports.injectJQuery = exports.injectFile = void 0;
22
+ exports.playwrightUtils = exports.registerUtilsToContext = exports.parseWithCheerio = exports.blockRequests = exports.gotoExtended = exports.injectJQuery = exports.injectFile = void 0;
23
23
  const tslib_1 = require("tslib");
24
24
  const promises_1 = require("node:fs/promises");
25
25
  const ow_1 = tslib_1.__importDefault(require("ow"));
@@ -30,6 +30,7 @@ const cheerio = tslib_1.__importStar(require("cheerio"));
30
30
  const log = log_1.default.child({ prefix: 'Playwright Utils' });
31
31
  const jqueryPath = require.resolve('jquery');
32
32
  const MAX_INJECT_FILE_CACHE_SIZE = 10;
33
+ const DEFAULT_BLOCK_REQUEST_URL_PATTERNS = ['.css', '.jpg', '.jpeg', '.png', '.svg', '.gif', '.woff', '.pdf', '.zip'];
33
34
  /**
34
35
  * Cache contents of previously injected files to limit file system access.
35
36
  */
@@ -149,7 +150,62 @@ async function gotoExtended(page, request, gotoOptions = {}) {
149
150
  }
150
151
  exports.gotoExtended = gotoExtended;
151
152
  /**
152
- * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@link CheerioCrawler}.
153
+ * Forces the Playwright browser tab to block loading URLs that match a provided pattern.
154
+ * This is useful to speed up crawling of websites, since it reduces the amount
155
+ * of data that needs to be downloaded from the web, but it may break some websites
156
+ * or unexpectedly prevent loading of resources.
157
+ *
158
+ * By default, the function will block all URLs including the following patterns:
159
+ *
160
+ * ```json
161
+ * [".css", ".jpg", ".jpeg", ".png", ".svg", ".gif", ".woff", ".pdf", ".zip"]
162
+ * ```
163
+ *
164
+ * If you want to extend this list further, use the `extraUrlPatterns` option,
165
+ * which will keep blocking the default patterns, as well as add your custom ones.
166
+ * If you would like to block only specific patterns, use the `urlPatterns` option,
167
+ * which will override the defaults and block only URLs with your custom patterns.
168
+ *
169
+ * This function does not use Playwright's request interception and therefore does not interfere
170
+ * with browser cache. It's also faster than blocking requests using interception,
171
+ * because the blocking happens directly in the browser without the round-trip to Node.js,
172
+ * but it does not provide the extra benefits of request interception.
173
+ *
174
+ * The function will never block main document loads and their respective redirects.
175
+ *
176
+ * **Example usage**
177
+ * ```javascript
178
+ * import { launchPlaywright, playwrightUtils } from 'crawlee';
179
+ *
180
+ * const browser = await launchPlaywright();
181
+ * const page = await browser.newPage();
182
+ *
183
+ * // Block all requests to URLs that include `adsbygoogle.js` and also all defaults.
184
+ * await playwrightUtils.blockRequests(page, {
185
+ * extraUrlPatterns: ['adsbygoogle.js'],
186
+ * });
187
+ *
188
+ * await page.goto('https://cnn.com');
189
+ * ```
190
+ *
191
+ * @param page Playwright [`Page`](https://playwright.dev/docs/api/class-page) object.
192
+ * @param [options]
193
+ */
194
+ async function blockRequests(page, options = {}) {
195
+ (0, ow_1.default)(page, ow_1.default.object.validate(core_1.validators.browserPage));
196
+ (0, ow_1.default)(options, ow_1.default.object.exactShape({
197
+ urlPatterns: ow_1.default.optional.array.ofType(ow_1.default.string),
198
+ extraUrlPatterns: ow_1.default.optional.array.ofType(ow_1.default.string),
199
+ }));
200
+ const { urlPatterns = DEFAULT_BLOCK_REQUEST_URL_PATTERNS, extraUrlPatterns = [], } = options;
201
+ const patternsToBlock = [...urlPatterns, ...extraUrlPatterns];
202
+ const client = await page.context().newCDPSession(page);
203
+ await client.send('Network.enable');
204
+ await client.send('Network.setBlockedURLs', { urls: patternsToBlock });
205
+ }
206
+ exports.blockRequests = blockRequests;
207
+ /**
208
+ * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@apilink CheerioCrawler}.
153
209
  *
154
210
  * **Example usage:**
155
211
  * ```javascript
@@ -168,6 +224,7 @@ exports.parseWithCheerio = parseWithCheerio;
168
224
  function registerUtilsToContext(context) {
169
225
  context.injectFile = (filePath, options) => injectFile(context.page, filePath, options);
170
226
  context.injectJQuery = () => injectJQuery(context.page);
227
+ context.blockRequests = (options) => blockRequests(context.page, options);
171
228
  context.parseWithCheerio = () => parseWithCheerio(context.page);
172
229
  }
173
230
  exports.registerUtilsToContext = registerUtilsToContext;
@@ -176,6 +233,7 @@ exports.playwrightUtils = {
176
233
  injectFile,
177
234
  injectJQuery,
178
235
  gotoExtended,
236
+ blockRequests,
179
237
  parseWithCheerio,
180
238
  };
181
239
  //# sourceMappingURL=playwright-utils.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"playwright-utils.js","sourceRoot":"","sources":["../../../src/internals/utils/playwright-utils.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;GAkBG;;;;AAEH,+CAA4C;AAC5C,oDAAoB;AAEpB,0DAAiD;AACjD,6DAA8B;AAE9B,wCAA2C;AAE3C,yDAAmC;AAGnC,MAAM,GAAG,GAAG,aAAI,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,kBAAkB,EAAE,CAAC,CAAC;AAEvD,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;AAE7C,MAAM,0BAA0B,GAAG,EAAE,CAAC;AAWtC;;GAEG;AACH,MAAM,kBAAkB,GAAG,IAAI,yBAAQ,CAAC,EAAE,SAAS,EAAE,0BAA0B,EAAE,CAAC,CAAC;AAEnF;;;;;;;;;;GAUG;AACI,KAAK,UAAU,UAAU,CAAC,IAAU,EAAE,QAAgB,EAAE,UAA6B,EAAE;IAC1F,IAAA,YAAE,EAAC,IAAI,EAAE,YAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,iBAAU,CAAC,WAAW,CAAC,CAAC,CAAC;IACrD,IAAA,YAAE,EAAC,QAAQ,EAAE,YAAE,CAAC,MAAM,CAAC,CAAC;IACxB,IAAA,YAAE,EAAC,OAAO,EAAE,YAAE,CAAC,MAAM,CAAC,UAAU,CAAC;QAC7B,kBAAkB,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;KAC1C,CAAC,CAAC,CAAC;IAEJ,IAAI,QAAQ,GAAG,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAChD,IAAI,CAAC,QAAQ,EAAE;QACX,QAAQ,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAC5C,kBAAkB,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;KAC9C;IACD,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAEtC,IAAI,OAAO,CAAC,kBAAkB,EAAE;QAC5B,IAAI,CAAC,EAAE,CAAC,gBAAgB,EACpB,GAAG,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;aACxB,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,GAAG,CAAC,OAAO,CAAC,gDAAgD,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC;KACxG;IAED,OAAO,KAAK,CAAC;AACjB,CAAC;AArBD,gCAqBC;AAED;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,SAAgB,YAAY,CAAC,IAAU;IACnC,IAAA,YAAE,EAAC,IAAI,EAAE,YAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,iBAAU,CAAC,WAAW,CAAC,CAAC,CAAC;IACrD,OAAO,UAAU,CAAC,IAAI,EAAE,UAAU,EAAE,EAAE,kBAAkB,EAAE,IAAI,EAAE,CAAC,CAAC;AACtE,CAAC;AAHD,oCAGC;AAyBD;;;;;;;;;;;GAWG;AACI,KAAK,UAAU,YAAY,CAAC,IAAU,EAAE,OAAgB,EAAE,cAAuC,EAAE;IACtG,IAAA,YAAE,EAAC,IAAI,EAAE,YAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,iBAAU,CAAC,WAAW,CAAC,CAAC,CAAC;IACrD,IAAA,YAAE,EAAC,OAAO,EAAE,YAAE,CAAC,MAAM,CAAC,YAAY,CAAC;QAC/B,GAAG,EAAE,YAAE,CAAC,MAAM,CAAC,GAAG;QAClB,MAAM,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QAC1B,OAAO,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QAC3B,OAAO,EAAE,YAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,YAAE,CAAC,MAAM,EAAE,YAAE,CAAC,MAAM,CAAC;KACjD,CAAC,CAAC,CAAC;IACJ,IAAA,YAAE,EAAC,WAAW,EAAE,YAAE,CAAC,MAAM,CAAC,CAAC;IAE3B,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC;IAClD,MAAM,OAAO,GAAG,CAAC,CAAU,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC;IAElE,IAAI,MAAM,KAAK,KAAK,IAAI,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE;QAClD,sDAAsD;QACtD,GAAG,CAAC,UAAU,CAAC,+GAA+G;cACxH,4DAA4D,CAAC,CAAC;QACpE,IAAI,SAAS,GAAG,KAAK,CAAC;QACtB,MAAM,uBAAuB,GAAG,KAAK,EAAE,KAAY,EAAE,EAAE;YACnD,IAAI;gBACA,oGAAoG;gBACpG,uDAAuD;gBACvD,IAAI,SAAS,EAAE;oBACX,OAAO,MAAM,KAAK,CAAC,QAAQ,EAAE,CAAC;iBACjC;gBAED,SAAS,GAAG,IAAI,CAAC;gBACjB,MAAM,SAAS,GAAe,EAAE,CAAC;gBAEjC,IAAI,MAAM,KAAK,KAAK;oBAAE,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC;gBAChD,IAAI,OAAO;oBAAE,SAAS,CAAC,QAAQ,GAAG,OAAO,CAAC;gBAC1C,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC;oBAAE,SAAS,CAAC,OAAO,GAAG,OAAO,CAAC;gBACnD,MAAM,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;aACnC;YAAC,OAAO,KAAK,EAAE;gBACZ,GAAG,CAAC,KAAK,CAAC,kCAAkC,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;aAC5D;QACL,CAAC,CAAC;QAEF,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,uBAAuB,CAAC,CAAC;KACrD;IAED,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,CAAC;AACvC,CAAC;AA1CD,oCA0CC;AAED;;;;;;;;;;GAUG;AACI,KAAK,UAAU,gBAAgB,CAAC,IAAU;IAC7C,IAAA,YAAE,EAAC,IAAI,EAAE,YAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,iBAAU,CAAC,WAAW,CAAC,CAAC,CAAC;IACrD,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;IACzC,OAAO,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;AACrC,CAAC;AAJD,4CAIC;AAQD,SAAgB,sBAAsB,CAAC,OAAkC;IACrE,OAAO,CAAC,UAAU,GAAG,CAAC,QAAgB,EAAE,OAA2B,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;IACpH,OAAO,CAAC,YAAY,GAAG,GAAG,EAAE,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IACxD,OAAO,CAAC,gBAAgB,GAAG,GAAG,EAAE,CAAC,gBAAgB,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;AACpE,CAAC;AAJD,wDAIC;AAED,gBAAgB;AACH,QAAA,eAAe,GAAG;IAC3B,UAAU;IACV,YAAY;IACZ,YAAY;IACZ,gBAAgB;CACnB,CAAC"}
1
+ {"version":3,"file":"playwright-utils.js","sourceRoot":"","sources":["../../../src/internals/utils/playwright-utils.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;GAkBG;;;;AAEH,+CAA4C;AAC5C,oDAAoB;AAEpB,0DAAiD;AACjD,6DAA8B;AAE9B,wCAA2C;AAE3C,yDAAmC;AAGnC,MAAM,GAAG,GAAG,aAAI,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,kBAAkB,EAAE,CAAC,CAAC;AAEvD,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;AAE7C,MAAM,0BAA0B,GAAG,EAAE,CAAC;AACtC,MAAM,kCAAkC,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;AA0BtH;;GAEG;AACH,MAAM,kBAAkB,GAAG,IAAI,yBAAQ,CAAC,EAAE,SAAS,EAAE,0BAA0B,EAAE,CAAC,CAAC;AAEnF;;;;;;;;;;GAUG;AACI,KAAK,UAAU,UAAU,CAAC,IAAU,EAAE,QAAgB,EAAE,UAA6B,EAAE;IAC1F,IAAA,YAAE,EAAC,IAAI,EAAE,YAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,iBAAU,CAAC,WAAW,CAAC,CAAC,CAAC;IACrD,IAAA,YAAE,EAAC,QAAQ,EAAE,YAAE,CAAC,MAAM,CAAC,CAAC;IACxB,IAAA,YAAE,EAAC,OAAO,EAAE,YAAE,CAAC,MAAM,CAAC,UAAU,CAAC;QAC7B,kBAAkB,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;KAC1C,CAAC,CAAC,CAAC;IAEJ,IAAI,QAAQ,GAAG,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAChD,IAAI,CAAC,QAAQ,EAAE;QACX,QAAQ,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAC5C,kBAAkB,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;KAC9C;IACD,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAEtC,IAAI,OAAO,CAAC,kBAAkB,EAAE;QAC5B,IAAI,CAAC,EAAE,CAAC,gBAAgB,EACpB,GAAG,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;aACxB,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,GAAG,CAAC,OAAO,CAAC,gDAAgD,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC;KACxG;IAED,OAAO,KAAK,CAAC;AACjB,CAAC;AArBD,gCAqBC;AAED;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,SAAgB,YAAY,CAAC,IAAU;IACnC,IAAA,YAAE,EAAC,IAAI,EAAE,YAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,iBAAU,CAAC,WAAW,CAAC,CAAC,CAAC;IACrD,OAAO,UAAU,CAAC,IAAI,EAAE,UAAU,EAAE,EAAE,kBAAkB,EAAE,IAAI,EAAE,CAAC,CAAC;AACtE,CAAC;AAHD,oCAGC;AAyBD;;;;;;;;;;;GAWG;AACI,KAAK,UAAU,YAAY,CAAC,IAAU,EAAE,OAAgB,EAAE,cAAuC,EAAE;IACtG,IAAA,YAAE,EAAC,IAAI,EAAE,YAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,iBAAU,CAAC,WAAW,CAAC,CAAC,CAAC;IACrD,IAAA,YAAE,EAAC,OAAO,EAAE,YAAE,CAAC,MAAM,CAAC,YAAY,CAAC;QAC/B,GAAG,EAAE,YAAE,CAAC,MAAM,CAAC,GAAG;QAClB,MAAM,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QAC1B,OAAO,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QAC3B,OAAO,EAAE,YAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,YAAE,CAAC,MAAM,EAAE,YAAE,CAAC,MAAM,CAAC;KACjD,CAAC,CAAC,CAAC;IACJ,IAAA,YAAE,EAAC,WAAW,EAAE,YAAE,CAAC,MAAM,CAAC,CAAC;IAE3B,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC;IAClD,MAAM,OAAO,GAAG,CAAC,CAAU,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC;IAElE,IAAI,MAAM,KAAK,KAAK,IAAI,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE;QAClD,sDAAsD;QACtD,GAAG,CAAC,UAAU,CAAC,+GAA+G;cACxH,4DAA4D,CAAC,CAAC;QACpE,IAAI,SAAS,GAAG,KAAK,CAAC;QACtB,MAAM,uBAAuB,GAAG,KAAK,EAAE,KAAY,EAAE,EAAE;YACnD,IAAI;gBACA,oGAAoG;gBACpG,uDAAuD;gBACvD,IAAI,SAAS,EAAE;oBACX,OAAO,MAAM,KAAK,CAAC,QAAQ,EAAE,CAAC;iBACjC;gBAED,SAAS,GAAG,IAAI,CAAC;gBACjB,MAAM,SAAS,GAAe,EAAE,CAAC;gBAEjC,IAAI,MAAM,KAAK,KAAK;oBAAE,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC;gBAChD,IAAI,OAAO;oBAAE,SAAS,CAAC,QAAQ,GAAG,OAAO,CAAC;gBAC1C,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC;oBAAE,SAAS,CAAC,OAAO,GAAG,OAAO,CAAC;gBACnD,MAAM,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;aACnC;YAAC,OAAO,KAAK,EAAE;gBACZ,GAAG,CAAC,KAAK,CAAC,kCAAkC,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;aAC5D;QACL,CAAC,CAAC;QAEF,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,uBAAuB,CAAC,CAAC;KACrD;IAED,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,CAAC;AACvC,CAAC;AA1CD,oCA0CC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAyCG;AACI,KAAK,UAAU,aAAa,CAAC,IAAU,EAAE,UAAgC,EAAE;IAC9E,IAAA,YAAE,EAAC,IAAI,EAAE,YAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,iBAAU,CAAC,WAAW,CAAC,CAAC,CAAC;IACrD,IAAA,YAAE,EAAC,OAAO,EAAE,YAAE,CAAC,MAAM,CAAC,UAAU,CAAC;QAC7B,WAAW,EAAE,YAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,YAAE,CAAC,MAAM,CAAC;QAChD,gBAAgB,EAAE,YAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,YAAE,CAAC,MAAM,CAAC;KACxD,CAAC,CAAC,CAAC;IAEJ,MAAM,EACF,WAAW,GAAG,kCAAkC,EAChD,gBAAgB,GAAG,EAAE,GACxB,GAAG,OAAO,CAAC;IAEZ,MAAM,eAAe,GAAG,CAAC,GAAG,WAAW,EAAE,GAAG,gBAAgB,CAAC,CAAC;IAE9D,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IAExD,MAAM,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IACpC,MAAM,MAAM,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,IAAI,EAAE,eAAe,EAAE,CAAC,CAAC;AAC3E,CAAC;AAlBD,sCAkBC;AAED;;;;;;;;;;GAUG;AACI,KAAK,UAAU,gBAAgB,CAAC,IAAU;IAC7C,IAAA,YAAE,EAAC,IAAI,EAAE,YAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,iBAAU,CAAC,WAAW,CAAC,CAAC,CAAC;IACrD,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;IACzC,OAAO,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;AACrC,CAAC;AAJD,4CAIC;AASD,SAAgB,sBAAsB,CAAC,OAAkC;IACrE,OAAO,CAAC,UAAU,GAAG,CAAC,QAAgB,EAAE,OAA2B,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;IACpH,OAAO,CAAC,YAAY,GAAG,GAAG,EAAE,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IACxD,OAAO,CAAC,aAAa,GAAG,CAAC,OAA8B,EAAE,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACjG,OAAO,CAAC,gBAAgB,GAAG,GAAG,EAAE,CAAC,gBAAgB,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;AACpE,CAAC;AALD,wDAKC;AAED,gBAAgB;AACH,QAAA,eAAe,GAAG;IAC3B,UAAU;IACV,YAAY;IACZ,YAAY;IACZ,aAAa;IACb,gBAAgB;CACnB,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@crawlee/playwright",
3
- "version": "3.0.3-beta.8",
3
+ "version": "3.0.4-beta.0",
4
4
  "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
5
5
  "engines": {
6
6
  "node": ">=16.0.0"
@@ -55,10 +55,10 @@
55
55
  "dependencies": {
56
56
  "@apify/log": "^2.0.0",
57
57
  "@apify/datastructures": "^2.0.0",
58
- "@crawlee/browser": "^3.0.3-beta.8",
59
- "@crawlee/browser-pool": "^3.0.3-beta.8",
60
- "@crawlee/core": "^3.0.3-beta.8",
61
- "@crawlee/utils": "^3.0.3-beta.8",
58
+ "@crawlee/browser": "^3.0.4-beta.0",
59
+ "@crawlee/browser-pool": "^3.0.4-beta.0",
60
+ "@crawlee/core": "^3.0.4-beta.0",
61
+ "@crawlee/utils": "^3.0.4-beta.0",
62
62
  "cheerio": "1.0.0-rc.12",
63
63
  "jquery": "^3.6.0",
64
64
  "ow": "^0.28.1"