@crawlee/browser 3.13.3-beta.8 → 3.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,15 +18,15 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
18
18
  /**
19
19
  * Function that is called to process each request.
20
20
  *
21
- * The function receives the {@apilink BrowserCrawlingContext}
21
+ * The function receives the {@link BrowserCrawlingContext}
22
22
  * (actual context will be enhanced with the crawler specific properties) as an argument, where:
23
- * - {@apilink BrowserCrawlingContext.request|`request`} is an instance of the {@apilink Request} object
23
+ * - {@link BrowserCrawlingContext.request|`request`} is an instance of the {@link Request} object
24
24
  * with details about the URL to open, HTTP method etc;
25
- * - {@apilink BrowserCrawlingContext.page|`page`} is an instance of the
25
+ * - {@link BrowserCrawlingContext.page|`page`} is an instance of the
26
26
  * Puppeteer [Page](https://pptr.dev/api/puppeteer.page) or
27
27
  * Playwright [Page](https://playwright.dev/docs/api/class-page);
28
- * - {@apilink BrowserCrawlingContext.browserController|`browserController`} is an instance of the {@apilink BrowserController};
29
- * - {@apilink BrowserCrawlingContext.response|`response`} is an instance of the
28
+ * - {@link BrowserCrawlingContext.browserController|`browserController`} is an instance of the {@link BrowserController};
29
+ * - {@link BrowserCrawlingContext.response|`response`} is an instance of the
30
30
  * Puppeteer [Response](https://pptr.dev/api/puppeteer.httpresponse) or
31
31
  * Playwright [Response](https://playwright.dev/docs/api/class-response),
32
32
  * which is the main resource response as returned by the respective `page.goto()` function.
@@ -34,27 +34,27 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
34
34
  * The function must return a promise, which is then awaited by the crawler.
35
35
  *
36
36
  * If the function throws an exception, the crawler will try to re-crawl the
37
- * request later, up to the {@apilink BrowserCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
37
+ * request later, up to the {@link BrowserCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
38
38
  * If all the retries fail, the crawler calls the function
39
- * provided to the {@apilink BrowserCrawlerOptions.failedRequestHandler|`failedRequestHandler`} parameter.
39
+ * provided to the {@link BrowserCrawlerOptions.failedRequestHandler|`failedRequestHandler`} parameter.
40
40
  * To make this work, we should **always**
41
41
  * let our function throw exceptions rather than catch them.
42
42
  * The exceptions are logged to the request using the
43
- * {@apilink Request.pushErrorMessage|`Request.pushErrorMessage()`} function.
43
+ * {@link Request.pushErrorMessage|`Request.pushErrorMessage()`} function.
44
44
  */
45
45
  requestHandler?: BrowserRequestHandler<LoadedContext<Context>>;
46
46
  /**
47
47
  * Function that is called to process each request.
48
48
  *
49
- * The function receives the {@apilink BrowserCrawlingContext}
49
+ * The function receives the {@link BrowserCrawlingContext}
50
50
  * (actual context will be enhanced with the crawler specific properties) as an argument, where:
51
- * - {@apilink BrowserCrawlingContext.request|`request`} is an instance of the {@apilink Request} object
51
+ * - {@link BrowserCrawlingContext.request|`request`} is an instance of the {@link Request} object
52
52
  * with details about the URL to open, HTTP method etc;
53
- * - {@apilink BrowserCrawlingContext.page|`page`} is an instance of the
53
+ * - {@link BrowserCrawlingContext.page|`page`} is an instance of the
54
54
  * Puppeteer [Page](https://pptr.dev/api/puppeteer.page) or
55
55
  * Playwright [Page](https://playwright.dev/docs/api/class-page);
56
- * - {@apilink BrowserCrawlingContext.browserController|`browserController`} is an instance of the {@apilink BrowserController};
57
- * - {@apilink BrowserCrawlingContext.response|`response`} is an instance of the
56
+ * - {@link BrowserCrawlingContext.browserController|`browserController`} is an instance of the {@link BrowserController};
57
+ * - {@link BrowserCrawlingContext.response|`response`} is an instance of the
58
58
  * Puppeteer [Response](https://pptr.dev/api/puppeteer.httpresponse) or
59
59
  * Playwright [Response](https://playwright.dev/docs/api/class-response),
60
60
  * which is the main resource response as returned by the respective `page.goto()` function.
@@ -62,13 +62,13 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
62
62
  * The function must return a promise, which is then awaited by the crawler.
63
63
  *
64
64
  * If the function throws an exception, the crawler will try to re-crawl the
65
- * request later, up to the {@apilink BrowserCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
65
+ * request later, up to the {@link BrowserCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
66
66
  * If all the retries fail, the crawler calls the function
67
- * provided to the {@apilink BrowserCrawlerOptions.failedRequestHandler|`failedRequestHandler`} parameter.
67
+ * provided to the {@link BrowserCrawlerOptions.failedRequestHandler|`failedRequestHandler`} parameter.
68
68
  * To make this work, we should **always**
69
69
  * let our function throw exceptions rather than catch them.
70
70
  * The exceptions are logged to the request using the
71
- * {@apilink Request.pushErrorMessage|`Request.pushErrorMessage()`} function.
71
+ * {@link Request.pushErrorMessage|`Request.pushErrorMessage()`} function.
72
72
  *
73
73
  * @deprecated `handlePageFunction` has been renamed to `requestHandler` and will be removed in a future version.
74
74
  * @ignore
@@ -76,11 +76,11 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
76
76
  handlePageFunction?: BrowserRequestHandler<LoadedContext<Context>>;
77
77
  /**
78
78
  * User-provided function that allows modifying the request object before it gets retried by the crawler.
79
- * It's executed before each retry for the requests that failed less than {@apilink BrowserCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
79
+ * It's executed before each retry for the requests that failed less than {@link BrowserCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
80
80
  *
81
- * The function receives the {@apilink BrowserCrawlingContext}
81
+ * The function receives the {@link BrowserCrawlingContext}
82
82
  * (actual context will be enhanced with the crawler specific properties) as the first argument,
83
- * where the {@apilink BrowserCrawlingContext.request|`request`} corresponds to the request to be retried.
83
+ * where the {@link BrowserCrawlingContext.request|`request`} corresponds to the request to be retried.
84
84
  * Second argument is the `Error` instance that
85
85
  * represents the last error thrown during processing of the request.
86
86
  */
@@ -88,9 +88,9 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
88
88
  /**
89
89
  * A function to handle requests that failed more than `option.maxRequestRetries` times.
90
90
  *
91
- * The function receives the {@apilink BrowserCrawlingContext}
91
+ * The function receives the {@link BrowserCrawlingContext}
92
92
  * (actual context will be enhanced with the crawler specific properties) as the first argument,
93
- * where the {@apilink BrowserCrawlingContext.request|`request`} corresponds to the failed request.
93
+ * where the {@link BrowserCrawlingContext.request|`request`} corresponds to the failed request.
94
94
  * Second argument is the `Error` instance that
95
95
  * represents the last error thrown during processing of the request.
96
96
  */
@@ -98,9 +98,9 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
98
98
  /**
99
99
  * A function to handle requests that failed more than `option.maxRequestRetries` times.
100
100
  *
101
- * The function receives the {@apilink BrowserCrawlingContext}
101
+ * The function receives the {@link BrowserCrawlingContext}
102
102
  * (actual context will be enhanced with the crawler specific properties) as the first argument,
103
- * where the {@apilink BrowserCrawlingContext.request|`request`} corresponds to the failed request.
103
+ * where the {@link BrowserCrawlingContext.request|`request`} corresponds to the failed request.
104
104
  * Second argument is the `Error` instance that
105
105
  * represents the last error thrown during processing of the request.
106
106
  *
@@ -109,7 +109,7 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
109
109
  */
110
110
  handleFailedRequestFunction?: BrowserErrorHandler<Context>;
111
111
  /**
112
- * Custom options passed to the underlying {@apilink BrowserPool} constructor.
112
+ * Custom options passed to the underlying {@link BrowserPool} constructor.
113
113
  * We can tweak those to fine-tune browser management.
114
114
  */
115
115
  browserPoolOptions?: Partial<BrowserPoolOptions> & Partial<BrowserPoolHooks<__BrowserControllerReturn, __LaunchContextReturn>>;
@@ -137,7 +137,7 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
137
137
  * ```
138
138
  *
139
139
  * Modyfing `pageOptions` is supported only in Playwright incognito.
140
- * See {@apilink PrePageCreateHook}
140
+ * See {@link PrePageCreateHook}
141
141
  */
142
142
  preNavigationHooks?: BrowserHook<Context>[];
143
143
  /**
@@ -169,7 +169,7 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
169
169
  persistCookiesPerSession?: boolean;
170
170
  /**
171
171
  * Whether to run browser in headless mode. Defaults to `true`.
172
- * Can be also set via {@apilink Configuration}.
172
+ * Can be also set via {@link Configuration}.
173
173
  */
174
174
  headless?: boolean | 'new' | 'old';
175
175
  /**
@@ -192,45 +192,45 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
192
192
  *
193
193
  * Since `BrowserCrawler` uses headless (or even headful) browsers to download web pages and extract data,
194
194
  * it is useful for crawling of websites that require to execute JavaScript.
195
- * If the target website doesn't need JavaScript, we should consider using the {@apilink CheerioCrawler},
195
+ * If the target website doesn't need JavaScript, we should consider using the {@link CheerioCrawler},
196
196
  * which downloads the pages using raw HTTP requests and is about 10x faster.
197
197
  *
198
- * The source URLs are represented by the {@apilink Request} objects that are fed from the {@apilink RequestList} or {@apilink RequestQueue} instances
199
- * provided by the {@apilink BrowserCrawlerOptions.requestList|`requestList`} or {@apilink BrowserCrawlerOptions.requestQueue|`requestQueue`}
198
+ * The source URLs are represented by the {@link Request} objects that are fed from the {@link RequestList} or {@link RequestQueue} instances
199
+ * provided by the {@link BrowserCrawlerOptions.requestList|`requestList`} or {@link BrowserCrawlerOptions.requestQueue|`requestQueue`}
200
200
  * constructor options, respectively. If neither `requestList` nor `requestQueue` options are provided,
201
- * the crawler will open the default request queue either when the {@apilink BrowserCrawler.addRequests|`crawler.addRequests()`} function is called,
202
- * or if `requests` parameter (representing the initial requests) of the {@apilink BrowserCrawler.run|`crawler.run()`} function is provided.
201
+ * the crawler will open the default request queue either when the {@link BrowserCrawler.addRequests|`crawler.addRequests()`} function is called,
202
+ * or if `requests` parameter (representing the initial requests) of the {@link BrowserCrawler.run|`crawler.run()`} function is provided.
203
203
  *
204
- * If both {@apilink BrowserCrawlerOptions.requestList|`requestList`} and {@apilink BrowserCrawlerOptions.requestQueue|`requestQueue`} options are used,
205
- * the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them
206
- * to the {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
204
+ * If both {@link BrowserCrawlerOptions.requestList|`requestList`} and {@link BrowserCrawlerOptions.requestQueue|`requestQueue`} options are used,
205
+ * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
206
+ * to the {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
207
207
  *
208
- * The crawler finishes when there are no more {@apilink Request} objects to crawl.
208
+ * The crawler finishes when there are no more {@link Request} objects to crawl.
209
209
  *
210
- * `BrowserCrawler` opens a new browser page (i.e. tab or window) for each {@apilink Request} object to crawl
211
- * and then calls the function provided by user as the {@apilink BrowserCrawlerOptions.requestHandler|`requestHandler`} option.
210
+ * `BrowserCrawler` opens a new browser page (i.e. tab or window) for each {@link Request} object to crawl
211
+ * and then calls the function provided by user as the {@link BrowserCrawlerOptions.requestHandler|`requestHandler`} option.
212
212
  *
213
213
  * New pages are only opened when there is enough free CPU and memory available,
214
- * using the functionality provided by the {@apilink AutoscaledPool} class.
215
- * All {@apilink AutoscaledPool} configuration options can be passed to the {@apilink BrowserCrawlerOptions.autoscaledPoolOptions|`autoscaledPoolOptions`}
214
+ * using the functionality provided by the {@link AutoscaledPool} class.
215
+ * All {@link AutoscaledPool} configuration options can be passed to the {@link BrowserCrawlerOptions.autoscaledPoolOptions|`autoscaledPoolOptions`}
216
216
  * parameter of the `BrowserCrawler` constructor.
217
- * For user convenience, the {@apilink AutoscaledPoolOptions.minConcurrency|`minConcurrency`} and
218
- * {@apilink AutoscaledPoolOptions.maxConcurrency|`maxConcurrency`} options of the
219
- * underlying {@apilink AutoscaledPool} constructor are available directly in the `BrowserCrawler` constructor.
217
+ * For user convenience, the {@link AutoscaledPoolOptions.minConcurrency|`minConcurrency`} and
218
+ * {@link AutoscaledPoolOptions.maxConcurrency|`maxConcurrency`} options of the
219
+ * underlying {@link AutoscaledPool} constructor are available directly in the `BrowserCrawler` constructor.
220
220
  *
221
- * > *NOTE:* the pool of browser instances is internally managed by the {@apilink BrowserPool} class.
221
+ * > *NOTE:* the pool of browser instances is internally managed by the {@link BrowserPool} class.
222
222
  *
223
223
  * @category Crawlers
224
224
  */
225
225
  export declare abstract class BrowserCrawler<InternalBrowserPoolOptions extends BrowserPoolOptions = BrowserPoolOptions, LaunchOptions extends Dictionary | undefined = Dictionary, Context extends BrowserCrawlingContext = BrowserCrawlingContext, GoToOptions extends Dictionary = Dictionary> extends BasicCrawler<Context> {
226
226
  readonly config: Configuration;
227
227
  /**
228
- * A reference to the underlying {@apilink ProxyConfiguration} class that manages the crawler's proxies.
228
+ * A reference to the underlying {@link ProxyConfiguration} class that manages the crawler's proxies.
229
229
  * Only available if used by the crawler.
230
230
  */
231
231
  proxyConfiguration?: ProxyConfiguration;
232
232
  /**
233
- * A reference to the underlying {@apilink BrowserPool} class that manages the crawler's browsers.
233
+ * A reference to the underlying {@link BrowserPool} class that manages the crawler's browsers.
234
234
  */
235
235
  browserPool: BrowserPool<InternalBrowserPoolOptions>;
236
236
  launchContext: BrowserLaunchContext<LaunchOptions, unknown>;
@@ -18,33 +18,33 @@ const timeout_1 = require("@apify/timeout");
18
18
  *
19
19
  * Since `BrowserCrawler` uses headless (or even headful) browsers to download web pages and extract data,
20
20
  * it is useful for crawling of websites that require to execute JavaScript.
21
- * If the target website doesn't need JavaScript, we should consider using the {@apilink CheerioCrawler},
21
+ * If the target website doesn't need JavaScript, we should consider using the {@link CheerioCrawler},
22
22
  * which downloads the pages using raw HTTP requests and is about 10x faster.
23
23
  *
24
- * The source URLs are represented by the {@apilink Request} objects that are fed from the {@apilink RequestList} or {@apilink RequestQueue} instances
25
- * provided by the {@apilink BrowserCrawlerOptions.requestList|`requestList`} or {@apilink BrowserCrawlerOptions.requestQueue|`requestQueue`}
24
+ * The source URLs are represented by the {@link Request} objects that are fed from the {@link RequestList} or {@link RequestQueue} instances
25
+ * provided by the {@link BrowserCrawlerOptions.requestList|`requestList`} or {@link BrowserCrawlerOptions.requestQueue|`requestQueue`}
26
26
  * constructor options, respectively. If neither `requestList` nor `requestQueue` options are provided,
27
- * the crawler will open the default request queue either when the {@apilink BrowserCrawler.addRequests|`crawler.addRequests()`} function is called,
28
- * or if `requests` parameter (representing the initial requests) of the {@apilink BrowserCrawler.run|`crawler.run()`} function is provided.
27
+ * the crawler will open the default request queue either when the {@link BrowserCrawler.addRequests|`crawler.addRequests()`} function is called,
28
+ * or if `requests` parameter (representing the initial requests) of the {@link BrowserCrawler.run|`crawler.run()`} function is provided.
29
29
  *
30
- * If both {@apilink BrowserCrawlerOptions.requestList|`requestList`} and {@apilink BrowserCrawlerOptions.requestQueue|`requestQueue`} options are used,
31
- * the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them
32
- * to the {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
30
+ * If both {@link BrowserCrawlerOptions.requestList|`requestList`} and {@link BrowserCrawlerOptions.requestQueue|`requestQueue`} options are used,
31
+ * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
32
+ * to the {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
33
33
  *
34
- * The crawler finishes when there are no more {@apilink Request} objects to crawl.
34
+ * The crawler finishes when there are no more {@link Request} objects to crawl.
35
35
  *
36
- * `BrowserCrawler` opens a new browser page (i.e. tab or window) for each {@apilink Request} object to crawl
37
- * and then calls the function provided by user as the {@apilink BrowserCrawlerOptions.requestHandler|`requestHandler`} option.
36
+ * `BrowserCrawler` opens a new browser page (i.e. tab or window) for each {@link Request} object to crawl
37
+ * and then calls the function provided by user as the {@link BrowserCrawlerOptions.requestHandler|`requestHandler`} option.
38
38
  *
39
39
  * New pages are only opened when there is enough free CPU and memory available,
40
- * using the functionality provided by the {@apilink AutoscaledPool} class.
41
- * All {@apilink AutoscaledPool} configuration options can be passed to the {@apilink BrowserCrawlerOptions.autoscaledPoolOptions|`autoscaledPoolOptions`}
40
+ * using the functionality provided by the {@link AutoscaledPool} class.
41
+ * All {@link AutoscaledPool} configuration options can be passed to the {@link BrowserCrawlerOptions.autoscaledPoolOptions|`autoscaledPoolOptions`}
42
42
  * parameter of the `BrowserCrawler` constructor.
43
- * For user convenience, the {@apilink AutoscaledPoolOptions.minConcurrency|`minConcurrency`} and
44
- * {@apilink AutoscaledPoolOptions.maxConcurrency|`maxConcurrency`} options of the
45
- * underlying {@apilink AutoscaledPool} constructor are available directly in the `BrowserCrawler` constructor.
43
+ * For user convenience, the {@link AutoscaledPoolOptions.minConcurrency|`minConcurrency`} and
44
+ * {@link AutoscaledPoolOptions.maxConcurrency|`maxConcurrency`} options of the
45
+ * underlying {@link AutoscaledPool} constructor are available directly in the `BrowserCrawler` constructor.
46
46
  *
47
- * > *NOTE:* the pool of browser instances is internally managed by the {@apilink BrowserPool} class.
47
+ * > *NOTE:* the pool of browser instances is internally managed by the {@link BrowserPool} class.
48
48
  *
49
49
  * @category Crawlers
50
50
  */
@@ -70,7 +70,7 @@ class BrowserCrawler extends basic_1.BasicCrawler {
70
70
  value: config
71
71
  });
72
72
  /**
73
- * A reference to the underlying {@apilink ProxyConfiguration} class that manages the crawler's proxies.
73
+ * A reference to the underlying {@link ProxyConfiguration} class that manages the crawler's proxies.
74
74
  * Only available if used by the crawler.
75
75
  */
76
76
  Object.defineProperty(this, "proxyConfiguration", {
@@ -80,7 +80,7 @@ class BrowserCrawler extends basic_1.BasicCrawler {
80
80
  value: void 0
81
81
  });
82
82
  /**
83
- * A reference to the underlying {@apilink BrowserPool} class that manages the crawler's browsers.
83
+ * A reference to the underlying {@link BrowserPool} class that manages the crawler's browsers.
84
84
  */
85
85
  Object.defineProperty(this, "browserPool", {
86
86
  enumerable: true,
@@ -11,7 +11,7 @@ export interface BrowserLaunchContext<TOptions, Launcher> extends BrowserPluginO
11
11
  */
12
12
  proxyUrl?: string;
13
13
  /**
14
- * If `true` and the `executablePath` option of {@apilink BrowserLaunchContext.launchOptions|`launchOptions`} is not set,
14
+ * If `true` and the `executablePath` option of {@link BrowserLaunchContext.launchOptions|`launchOptions`} is not set,
15
15
  * the launcher will launch full Google Chrome browser available on the machine
16
16
  * rather than the bundled Chromium. The path to Chrome executable
17
17
  * is taken from the `CRAWLEE_CHROME_EXECUTABLE_PATH` environment variable if provided,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@crawlee/browser",
3
- "version": "3.13.3-beta.8",
3
+ "version": "3.13.3",
4
4
  "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
5
5
  "engines": {
6
6
  "node": ">=16.0.0"
@@ -54,10 +54,10 @@
54
54
  },
55
55
  "dependencies": {
56
56
  "@apify/timeout": "^0.3.0",
57
- "@crawlee/basic": "3.13.3-beta.8",
58
- "@crawlee/browser-pool": "3.13.3-beta.8",
59
- "@crawlee/types": "3.13.3-beta.8",
60
- "@crawlee/utils": "3.13.3-beta.8",
57
+ "@crawlee/basic": "3.13.3",
58
+ "@crawlee/browser-pool": "3.13.3",
59
+ "@crawlee/types": "3.13.3",
60
+ "@crawlee/utils": "3.13.3",
61
61
  "ow": "^0.28.1",
62
62
  "tslib": "^2.4.0",
63
63
  "type-fest": "^4.0.0"
@@ -81,5 +81,5 @@
81
81
  }
82
82
  }
83
83
  },
84
- "gitHead": "0bcd58dd82d533da05a2bd7a524624fa441e7a71"
84
+ "gitHead": "279cadbd3cd6342f36cc4d841e07b999e472420d"
85
85
  }