@crawlee/puppeteer 3.13.3-beta.9 → 3.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,13 +12,13 @@ export interface EnqueueLinksByClickingElementsOptions {
12
12
  */
13
13
  requestQueue: RequestProvider;
14
14
  /**
15
- * A CSS selector matching elements to be clicked on. Unlike in {@apilink enqueueLinks}, there is no default
15
+ * A CSS selector matching elements to be clicked on. Unlike in {@link enqueueLinks}, there is no default
16
16
  * value. This is to prevent suboptimal use of this function by using it too broadly.
17
17
  */
18
18
  selector: string;
19
- /** Sets {@apilink Request.userData} for newly enqueued requests. */
19
+ /** Sets {@link Request.userData} for newly enqueued requests. */
20
20
  userData?: Dictionary;
21
- /** Sets {@apilink Request.label} for newly enqueued requests. */
21
+ /** Sets {@link Request.label} for newly enqueued requests. */
22
22
  label?: string;
23
23
  /**
24
24
  * Click options for use in Puppeteer's click handler.
@@ -29,7 +29,7 @@ export interface EnqueueLinksByClickingElementsOptions {
29
29
  * containing glob pattern strings matching the URLs to be enqueued.
30
30
  *
31
31
  * The plain objects must include at least the `glob` property, which holds the glob pattern string.
32
- * All remaining keys will be used as request options for the corresponding enqueued {@apilink Request} objects.
32
+ * All remaining keys will be used as request options for the corresponding enqueued {@link Request} objects.
33
33
  *
34
34
  * The matching is always case-insensitive.
35
35
  * If you need case-sensitive matching, use `regexps` property directly.
@@ -44,7 +44,7 @@ export interface EnqueueLinksByClickingElementsOptions {
44
44
  * containing regular expressions matching the URLs to be enqueued.
45
45
  *
46
46
  * The plain objects must include at least the `regexp` property, which holds the regular expression.
47
- * All remaining keys will be used as request options for the corresponding enqueued {@apilink Request} objects.
47
+ * All remaining keys will be used as request options for the corresponding enqueued {@link Request} objects.
48
48
  *
49
49
  * If `regexps` is an empty array or `undefined`, then the function
50
50
  * enqueues all the intercepted navigation requests produced by the page
@@ -55,11 +55,11 @@ export interface EnqueueLinksByClickingElementsOptions {
55
55
  * *NOTE:* In future versions of SDK the options will be removed.
56
56
  * Please use `globs` or `regexps` instead.
57
57
  *
58
- * An array of {@apilink PseudoUrl} strings or plain objects
59
- * containing {@apilink PseudoUrl} strings matching the URLs to be enqueued.
58
+ * An array of {@link PseudoUrl} strings or plain objects
59
+ * containing {@link PseudoUrl} strings matching the URLs to be enqueued.
60
60
  *
61
61
  * The plain objects must include at least the `purl` property, which holds the pseudo-URL pattern string.
62
- * All remaining keys will be used as request options for the corresponding enqueued {@apilink Request} objects.
62
+ * All remaining keys will be used as request options for the corresponding enqueued {@link Request} objects.
63
63
  *
64
64
  * With a pseudo-URL string, the matching is always case-insensitive.
65
65
  * If you need case-sensitive matching, use `regexps` property directly.
@@ -72,7 +72,7 @@ export interface EnqueueLinksByClickingElementsOptions {
72
72
  */
73
73
  pseudoUrls?: PseudoUrlInput[];
74
74
  /**
75
- * Just before a new {@apilink Request} is constructed and enqueued to the {@apilink RequestQueue}, this function can be used
75
+ * Just before a new {@link Request} is constructed and enqueued to the {@link RequestQueue}, this function can be used
76
76
  * to remove it or modify its contents such as `userData`, `payload` or, most importantly `uniqueKey`. This is useful
77
77
  * when you need to enqueue multiple `Requests` to the queue that share the same URL, but differ in methods or payloads,
78
78
  * or to dynamically update or create `userData`.
@@ -120,7 +120,7 @@ export interface EnqueueLinksByClickingElementsOptions {
120
120
  * If set to `true`:
121
121
  * - while adding the request to the queue: the request will be added to the foremost position in the queue.
122
122
  * - while reclaiming the request: the request will be placed to the beginning of the queue, so that it's returned
123
- * in the next call to {@apilink RequestQueue.fetchNextRequest}.
123
+ * in the next call to {@link RequestQueue.fetchNextRequest}.
124
124
  * By default, it's put to the end of the queue.
125
125
  * @default false
126
126
  */
@@ -136,12 +136,12 @@ export interface EnqueueLinksByClickingElementsOptions {
136
136
  * clicks all those elements using a mouse move and a left mouse button click and intercepts
137
137
  * all the navigation requests that are subsequently produced by the page. The intercepted
138
138
  * requests, including their methods, headers and payloads are then enqueued to a provided
139
- * {@apilink RequestQueue}. This is useful to crawl JavaScript heavy pages where links are not available
139
+ * {@link RequestQueue}. This is useful to crawl JavaScript heavy pages where links are not available
140
140
  * in `href` elements, but rather navigations are triggered in click handlers.
141
- * If you're looking to find URLs in `href` attributes of the page, see {@apilink enqueueLinks}.
141
+ * If you're looking to find URLs in `href` attributes of the page, see {@link enqueueLinks}.
142
142
  *
143
- * Optionally, the function allows you to filter the target links' URLs using an array of {@apilink PseudoUrl} objects
144
- * and override settings of the enqueued {@apilink Request} objects.
143
+ * Optionally, the function allows you to filter the target links' URLs using an array of {@link PseudoUrl} objects
144
+ * and override settings of the enqueued {@link Request} objects.
145
145
  *
146
146
  * **IMPORTANT**: To be able to do this, this function uses various mutations on the page,
147
147
  * such as changing the Z-index of elements being clicked and their visibility. Therefore,
@@ -170,7 +170,7 @@ export interface EnqueueLinksByClickingElementsOptions {
170
170
  * });
171
171
  * ```
172
172
  *
173
- * @returns Promise that resolves to {@apilink BatchAddRequestsResult} object.
173
+ * @returns Promise that resolves to {@link BatchAddRequestsResult} object.
174
174
  */
175
175
  export declare function enqueueLinksByClickingElements(options: EnqueueLinksByClickingElementsOptions): Promise<BatchAddRequestsResult>;
176
176
  interface WaitForPageIdleOptions {
@@ -17,12 +17,12 @@ const log = log_1.default.child({ prefix: 'Puppeteer Click Elements' });
17
17
  * clicks all those elements using a mouse move and a left mouse button click and intercepts
18
18
  * all the navigation requests that are subsequently produced by the page. The intercepted
19
19
  * requests, including their methods, headers and payloads are then enqueued to a provided
20
- * {@apilink RequestQueue}. This is useful to crawl JavaScript heavy pages where links are not available
20
+ * {@link RequestQueue}. This is useful to crawl JavaScript heavy pages where links are not available
21
21
  * in `href` elements, but rather navigations are triggered in click handlers.
22
- * If you're looking to find URLs in `href` attributes of the page, see {@apilink enqueueLinks}.
22
+ * If you're looking to find URLs in `href` attributes of the page, see {@link enqueueLinks}.
23
23
  *
24
- * Optionally, the function allows you to filter the target links' URLs using an array of {@apilink PseudoUrl} objects
25
- * and override settings of the enqueued {@apilink Request} objects.
24
+ * Optionally, the function allows you to filter the target links' URLs using an array of {@link PseudoUrl} objects
25
+ * and override settings of the enqueued {@link Request} objects.
26
26
  *
27
27
  * **IMPORTANT**: To be able to do this, this function uses various mutations on the page,
28
28
  * such as changing the Z-index of elements being clicked and their visibility. Therefore,
@@ -51,7 +51,7 @@ const log = log_1.default.child({ prefix: 'Puppeteer Click Elements' });
51
51
  * });
52
52
  * ```
53
53
  *
54
- * @returns Promise that resolves to {@apilink BatchAddRequestsResult} object.
54
+ * @returns Promise that resolves to {@link BatchAddRequestsResult} object.
55
55
  */
56
56
  async function enqueueLinksByClickingElements(options) {
57
57
  (0, ow_1.default)(options, ow_1.default.object.exactShape({
@@ -18,7 +18,7 @@ export interface PuppeteerCrawlerOptions extends BrowserCrawlerOptions<Puppeteer
18
18
  browserPlugins: [PuppeteerPlugin];
19
19
  }> {
20
20
  /**
21
- * Options used by {@apilink launchPuppeteer} to start new Puppeteer instances.
21
+ * Options used by {@link launchPuppeteer} to start new Puppeteer instances.
22
22
  */
23
23
  launchContext?: PuppeteerLaunchContext;
24
24
  /**
@@ -36,7 +36,7 @@ export interface PuppeteerCrawlerOptions extends BrowserCrawlerOptions<Puppeteer
36
36
  * ```
37
37
  *
38
38
  * Modyfing `pageOptions` is supported only in Playwright incognito.
39
- * See {@apilink PrePageCreateHook}
39
+ * See {@link PrePageCreateHook}
40
40
  */
41
41
  preNavigationHooks?: PuppeteerHook[];
42
42
  /**
@@ -64,27 +64,27 @@ export interface PuppeteerCrawlerOptions extends BrowserCrawlerOptions<Puppeteer
64
64
  *
65
65
  * Since `PuppeteerCrawler` uses headless Chrome to download web pages and extract data,
66
66
  * it is useful for crawling of websites that require to execute JavaScript.
67
- * If the target website doesn't need JavaScript, consider using {@apilink CheerioCrawler},
67
+ * If the target website doesn't need JavaScript, consider using {@link CheerioCrawler},
68
68
  * which downloads the pages using raw HTTP requests and is about 10x faster.
69
69
  *
70
- * The source URLs are represented using {@apilink Request} objects that are fed from
71
- * {@apilink RequestList} or {@apilink RequestQueue} instances provided by the {@apilink PuppeteerCrawlerOptions.requestList}
72
- * or {@apilink PuppeteerCrawlerOptions.requestQueue} constructor options, respectively.
70
+ * The source URLs are represented using {@link Request} objects that are fed from
71
+ * {@link RequestList} or {@link RequestQueue} instances provided by the {@link PuppeteerCrawlerOptions.requestList}
72
+ * or {@link PuppeteerCrawlerOptions.requestQueue} constructor options, respectively.
73
73
  *
74
- * If both {@apilink PuppeteerCrawlerOptions.requestList} and {@apilink PuppeteerCrawlerOptions.requestQueue} are used,
75
- * the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them
76
- * to {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
74
+ * If both {@link PuppeteerCrawlerOptions.requestList} and {@link PuppeteerCrawlerOptions.requestQueue} are used,
75
+ * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
76
+ * to {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
77
77
  *
78
- * The crawler finishes when there are no more {@apilink Request} objects to crawl.
78
+ * The crawler finishes when there are no more {@link Request} objects to crawl.
79
79
  *
80
- * `PuppeteerCrawler` opens a new Chrome page (i.e. tab) for each {@apilink Request} object to crawl
81
- * and then calls the function provided by user as the {@apilink PuppeteerCrawlerOptions.requestHandler} option.
80
+ * `PuppeteerCrawler` opens a new Chrome page (i.e. tab) for each {@link Request} object to crawl
81
+ * and then calls the function provided by user as the {@link PuppeteerCrawlerOptions.requestHandler} option.
82
82
  *
83
83
  * New pages are only opened when there is enough free CPU and memory available,
84
- * using the functionality provided by the {@apilink AutoscaledPool} class.
85
- * All {@apilink AutoscaledPool} configuration options can be passed to the {@apilink PuppeteerCrawlerOptions.autoscaledPoolOptions}
84
+ * using the functionality provided by the {@link AutoscaledPool} class.
85
+ * All {@link AutoscaledPool} configuration options can be passed to the {@link PuppeteerCrawlerOptions.autoscaledPoolOptions}
86
86
  * parameter of the `PuppeteerCrawler` constructor. For user convenience, the `minConcurrency` and `maxConcurrency`
87
- * {@apilink AutoscaledPoolOptions} are available directly in the `PuppeteerCrawler` constructor.
87
+ * {@link AutoscaledPoolOptions} are available directly in the `PuppeteerCrawler` constructor.
88
88
  *
89
89
  * Note that the pool of Puppeteer instances is internally managed by the [BrowserPool](https://github.com/apify/browser-pool) class.
90
90
  *
@@ -214,9 +214,9 @@ export declare class PuppeteerCrawler extends BrowserCrawler<{
214
214
  protected _navigationHandler(crawlingContext: PuppeteerCrawlingContext, gotoOptions: DirectNavigationOptions): Promise<HTTPResponse | null>;
215
215
  }
216
216
  /**
217
- * Creates new {@apilink Router} instance that works based on request labels.
218
- * This instance can then serve as a `requestHandler` of your {@apilink PuppeteerCrawler}.
219
- * Defaults to the {@apilink PuppeteerCrawlingContext}.
217
+ * Creates new {@link Router} instance that works based on request labels.
218
+ * This instance can then serve as a `requestHandler` of your {@link PuppeteerCrawler}.
219
+ * Defaults to the {@link PuppeteerCrawlingContext}.
220
220
  *
221
221
  * > Serves as a shortcut for using `Router.create<PuppeteerCrawlingContext>()`.
222
222
  *
@@ -15,27 +15,27 @@ const puppeteer_utils_1 = require("./utils/puppeteer_utils");
15
15
  *
16
16
  * Since `PuppeteerCrawler` uses headless Chrome to download web pages and extract data,
17
17
  * it is useful for crawling of websites that require to execute JavaScript.
18
- * If the target website doesn't need JavaScript, consider using {@apilink CheerioCrawler},
18
+ * If the target website doesn't need JavaScript, consider using {@link CheerioCrawler},
19
19
  * which downloads the pages using raw HTTP requests and is about 10x faster.
20
20
  *
21
- * The source URLs are represented using {@apilink Request} objects that are fed from
22
- * {@apilink RequestList} or {@apilink RequestQueue} instances provided by the {@apilink PuppeteerCrawlerOptions.requestList}
23
- * or {@apilink PuppeteerCrawlerOptions.requestQueue} constructor options, respectively.
21
+ * The source URLs are represented using {@link Request} objects that are fed from
22
+ * {@link RequestList} or {@link RequestQueue} instances provided by the {@link PuppeteerCrawlerOptions.requestList}
23
+ * or {@link PuppeteerCrawlerOptions.requestQueue} constructor options, respectively.
24
24
  *
25
- * If both {@apilink PuppeteerCrawlerOptions.requestList} and {@apilink PuppeteerCrawlerOptions.requestQueue} are used,
26
- * the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them
27
- * to {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
25
+ * If both {@link PuppeteerCrawlerOptions.requestList} and {@link PuppeteerCrawlerOptions.requestQueue} are used,
26
+ * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
27
+ * to {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
28
28
  *
29
- * The crawler finishes when there are no more {@apilink Request} objects to crawl.
29
+ * The crawler finishes when there are no more {@link Request} objects to crawl.
30
30
  *
31
- * `PuppeteerCrawler` opens a new Chrome page (i.e. tab) for each {@apilink Request} object to crawl
32
- * and then calls the function provided by user as the {@apilink PuppeteerCrawlerOptions.requestHandler} option.
31
+ * `PuppeteerCrawler` opens a new Chrome page (i.e. tab) for each {@link Request} object to crawl
32
+ * and then calls the function provided by user as the {@link PuppeteerCrawlerOptions.requestHandler} option.
33
33
  *
34
34
  * New pages are only opened when there is enough free CPU and memory available,
35
- * using the functionality provided by the {@apilink AutoscaledPool} class.
36
- * All {@apilink AutoscaledPool} configuration options can be passed to the {@apilink PuppeteerCrawlerOptions.autoscaledPoolOptions}
35
+ * using the functionality provided by the {@link AutoscaledPool} class.
36
+ * All {@link AutoscaledPool} configuration options can be passed to the {@link PuppeteerCrawlerOptions.autoscaledPoolOptions}
37
37
  * parameter of the `PuppeteerCrawler` constructor. For user convenience, the `minConcurrency` and `maxConcurrency`
38
- * {@apilink AutoscaledPoolOptions} are available directly in the `PuppeteerCrawler` constructor.
38
+ * {@link AutoscaledPoolOptions} are available directly in the `PuppeteerCrawler` constructor.
39
39
  *
40
40
  * Note that the pool of Puppeteer instances is internally managed by the [BrowserPool](https://github.com/apify/browser-pool) class.
41
41
  *
@@ -128,9 +128,9 @@ Object.defineProperty(PuppeteerCrawler, "optionsShape", {
128
128
  }
129
129
  });
130
130
  /**
131
- * Creates new {@apilink Router} instance that works based on request labels.
132
- * This instance can then serve as a `requestHandler` of your {@apilink PuppeteerCrawler}.
133
- * Defaults to the {@apilink PuppeteerCrawlingContext}.
131
+ * Creates new {@link Router} instance that works based on request labels.
132
+ * This instance can then serve as a `requestHandler` of your {@link PuppeteerCrawler}.
133
+ * Defaults to the {@link PuppeteerCrawlingContext}.
134
134
  *
135
135
  * > Serves as a shortcut for using `Router.create<PuppeteerCrawlingContext>()`.
136
136
  *
@@ -119,7 +119,7 @@ export declare function injectJQuery(page: Page, options?: {
119
119
  surviveNavigations?: boolean;
120
120
  }): Promise<unknown>;
121
121
  /**
122
- * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@apilink CheerioCrawler}.
122
+ * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@link CheerioCrawler}.
123
123
  *
124
124
  * **Example usage:**
125
125
  * ```javascript
@@ -211,7 +211,7 @@ export declare function cacheResponses(page: Page, cache: Dictionary<Partial<Res
211
211
  * }
212
212
  * ```
213
213
  * Where `page` is a Puppeteer [`Page`](https://pptr.dev/api/puppeteer.page)
214
- * and `request` is a {@apilink Request}.
214
+ * and `request` is a {@link Request}.
215
215
  *
216
216
  * The function is compiled by using the `scriptString` parameter as the function's body,
217
217
  * so any limitations to function bodies apply. Return value of the compiled function
@@ -368,7 +368,7 @@ export interface PuppeteerContextUtils {
368
368
  */
369
369
  waitForSelector(selector: string, timeoutMs?: number): Promise<void>;
370
370
  /**
371
- * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@apilink CheerioCrawler}.
371
+ * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@link CheerioCrawler}.
372
372
  * When provided with the `selector` argument, it waits for it to be available first.
373
373
  *
374
374
  * **Example usage:**
@@ -385,12 +385,12 @@ export interface PuppeteerContextUtils {
385
385
  * clicks all those elements using a mouse move and a left mouse button click and intercepts
386
386
  * all the navigation requests that are subsequently produced by the page. The intercepted
387
387
  * requests, including their methods, headers and payloads are then enqueued to a provided
388
- * {@apilink RequestQueue}. This is useful to crawl JavaScript heavy pages where links are not available
388
+ * {@link RequestQueue}. This is useful to crawl JavaScript heavy pages where links are not available
389
389
  * in `href` elements, but rather navigations are triggered in click handlers.
390
- * If you're looking to find URLs in `href` attributes of the page, see {@apilink enqueueLinks}.
390
+ * If you're looking to find URLs in `href` attributes of the page, see {@link enqueueLinks}.
391
391
  *
392
- * Optionally, the function allows you to filter the target links' URLs using an array of {@apilink PseudoUrl} objects
393
- * and override settings of the enqueued {@apilink Request} objects.
392
+ * Optionally, the function allows you to filter the target links' URLs using an array of {@link PseudoUrl} objects
393
+ * and override settings of the enqueued {@link Request} objects.
394
394
  *
395
395
  * **IMPORTANT**: To be able to do this, this function uses various mutations on the page,
396
396
  * such as changing the Z-index of elements being clicked and their visibility. Therefore,
@@ -419,7 +419,7 @@ export interface PuppeteerContextUtils {
419
419
  * });
420
420
  * ```
421
421
  *
422
- * @returns Promise that resolves to {@apilink BatchAddRequestsResult} object.
422
+ * @returns Promise that resolves to {@link BatchAddRequestsResult} object.
423
423
  */
424
424
  enqueueLinksByClickingElements(options: Omit<EnqueueLinksByClickingElementsOptions, 'page' | 'requestQueue'>): Promise<BatchAddRequestsResult>;
425
425
  /**
@@ -490,7 +490,7 @@ export interface PuppeteerContextUtils {
490
490
  * }
491
491
  * ```
492
492
  * Where `page` is a Puppeteer [`Page`](https://pptr.dev/api/puppeteer.page)
493
- * and `request` is a {@apilink Request}.
493
+ * and `request` is a {@link Request}.
494
494
  *
495
495
  * The function is compiled by using the `scriptString` parameter as the function's body,
496
496
  * so any limitations to function bodies apply. Return value of the compiled function
@@ -116,7 +116,7 @@ async function injectJQuery(page, options) {
116
116
  return injectFile(page, jqueryPath, { surviveNavigations: options?.surviveNavigations ?? true });
117
117
  }
118
118
  /**
119
- * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@apilink CheerioCrawler}.
119
+ * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@link CheerioCrawler}.
120
120
  *
121
121
  * **Example usage:**
122
122
  * ```javascript
@@ -309,7 +309,7 @@ async function cacheResponses(page, cache, responseUrlRules) {
309
309
  * }
310
310
  * ```
311
311
  * Where `page` is a Puppeteer [`Page`](https://pptr.dev/api/puppeteer.page)
312
- * and `request` is a {@apilink Request}.
312
+ * and `request` is a {@link Request}.
313
313
  *
314
314
  * The function is compiled by using the `scriptString` parameter as the function's body,
315
315
  * so any limitations to function bodies apply. Return value of the compiled function
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@crawlee/puppeteer",
3
- "version": "3.13.3-beta.9",
3
+ "version": "3.13.3",
4
4
  "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
5
5
  "engines": {
6
6
  "node": ">=16.0.0"
@@ -55,10 +55,10 @@
55
55
  "dependencies": {
56
56
  "@apify/datastructures": "^2.0.0",
57
57
  "@apify/log": "^2.4.0",
58
- "@crawlee/browser": "3.13.3-beta.9",
59
- "@crawlee/browser-pool": "3.13.3-beta.9",
60
- "@crawlee/types": "3.13.3-beta.9",
61
- "@crawlee/utils": "3.13.3-beta.9",
58
+ "@crawlee/browser": "3.13.3",
59
+ "@crawlee/browser-pool": "3.13.3",
60
+ "@crawlee/types": "3.13.3",
61
+ "@crawlee/utils": "3.13.3",
62
62
  "cheerio": "1.0.0-rc.12",
63
63
  "devtools-protocol": "*",
64
64
  "idcac-playwright": "^0.1.2",
@@ -81,5 +81,5 @@
81
81
  }
82
82
  }
83
83
  },
84
- "gitHead": "b15fa18267747df8d3e9ef4dde29e139187ab684"
84
+ "gitHead": "279cadbd3cd6342f36cc4d841e07b999e472420d"
85
85
  }