npm - @crawlee/puppeteer - Versions diffs - 3.13.3-beta.9 → 3.13.3 - Mend

@crawlee/puppeteer 3.13.3-beta.9 → 3.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/internals/enqueue-links/click-elements.d.ts +15 -15
package/internals/enqueue-links/click-elements.js +5 -5
package/internals/puppeteer-crawler.d.ts +18 -18
package/internals/puppeteer-crawler.js +16 -16
package/internals/utils/puppeteer_utils.d.ts +9 -9
package/internals/utils/puppeteer_utils.js +2 -2
package/package.json +6 -6
package/tsconfig.build.tsbuildinfo +1 -1

package/internals/enqueue-links/click-elements.d.ts CHANGED Viewed

@@ -12,13 +12,13 @@ export interface EnqueueLinksByClickingElementsOptions {
      */
     requestQueue: RequestProvider;
     /**
-     * A CSS selector matching elements to be clicked on. Unlike in {@apilink enqueueLinks}, there is no default
+     * A CSS selector matching elements to be clicked on. Unlike in {@link enqueueLinks}, there is no default
      * value. This is to prevent suboptimal use of this function by using it too broadly.
      */
     selector: string;
-    /** Sets {@apilink Request.userData} for newly enqueued requests. */
+    /** Sets {@link Request.userData} for newly enqueued requests. */
     userData?: Dictionary;
-    /** Sets {@apilink Request.label} for newly enqueued requests. */
+    /** Sets {@link Request.label} for newly enqueued requests. */
     label?: string;
     /**
      * Click options for use in Puppeteer's click handler.
@@ -29,7 +29,7 @@ export interface EnqueueLinksByClickingElementsOptions {
      * containing glob pattern strings matching the URLs to be enqueued.
      *
      * The plain objects must include at least the `glob` property, which holds the glob pattern string.
-     * All remaining keys will be used as request options for the corresponding enqueued {@apilink Request} objects.
+     * All remaining keys will be used as request options for the corresponding enqueued {@link Request} objects.
      *
      * The matching is always case-insensitive.
      * If you need case-sensitive matching, use `regexps` property directly.
@@ -44,7 +44,7 @@ export interface EnqueueLinksByClickingElementsOptions {
      * containing regular expressions matching the URLs to be enqueued.
      *
      * The plain objects must include at least the `regexp` property, which holds the regular expression.
-     * All remaining keys will be used as request options for the corresponding enqueued {@apilink Request} objects.
+     * All remaining keys will be used as request options for the corresponding enqueued {@link Request} objects.
      *
      * If `regexps` is an empty array or `undefined`, then the function
      * enqueues all the intercepted navigation requests produced by the page
@@ -55,11 +55,11 @@ export interface EnqueueLinksByClickingElementsOptions {
      * *NOTE:* In future versions of SDK the options will be removed.
      * Please use `globs` or `regexps` instead.
      *
-     * An array of {@apilink PseudoUrl} strings or plain objects
-     * containing {@apilink PseudoUrl} strings matching the URLs to be enqueued.
+     * An array of {@link PseudoUrl} strings or plain objects
+     * containing {@link PseudoUrl} strings matching the URLs to be enqueued.
      *
      * The plain objects must include at least the `purl` property, which holds the pseudo-URL pattern string.
-     * All remaining keys will be used as request options for the corresponding enqueued {@apilink Request} objects.
+     * All remaining keys will be used as request options for the corresponding enqueued {@link Request} objects.
      *
      * With a pseudo-URL string, the matching is always case-insensitive.
      * If you need case-sensitive matching, use `regexps` property directly.
@@ -72,7 +72,7 @@ export interface EnqueueLinksByClickingElementsOptions {
      */
     pseudoUrls?: PseudoUrlInput[];
     /**
-     * Just before a new {@apilink Request} is constructed and enqueued to the {@apilink RequestQueue}, this function can be used
+     * Just before a new {@link Request} is constructed and enqueued to the {@link RequestQueue}, this function can be used
      * to remove it or modify its contents such as `userData`, `payload` or, most importantly `uniqueKey`. This is useful
      * when you need to enqueue multiple `Requests` to the queue that share the same URL, but differ in methods or payloads,
      * or to dynamically update or create `userData`.
@@ -120,7 +120,7 @@ export interface EnqueueLinksByClickingElementsOptions {
      * If set to `true`:
      *   - while adding the request to the queue: the request will be added to the foremost position in the queue.
      *   - while reclaiming the request: the request will be placed to the beginning of the queue, so that it's returned
-     *   in the next call to {@apilink RequestQueue.fetchNextRequest}.
+     *   in the next call to {@link RequestQueue.fetchNextRequest}.
      * By default, it's put to the end of the queue.
      * @default false
      */
@@ -136,12 +136,12 @@ export interface EnqueueLinksByClickingElementsOptions {
  * clicks all those elements using a mouse move and a left mouse button click and intercepts
  * all the navigation requests that are subsequently produced by the page. The intercepted
  * requests, including their methods, headers and payloads are then enqueued to a provided
- * {@apilink RequestQueue}. This is useful to crawl JavaScript heavy pages where links are not available
+ * {@link RequestQueue}. This is useful to crawl JavaScript heavy pages where links are not available
  * in `href` elements, but rather navigations are triggered in click handlers.
- * If you're looking to find URLs in `href` attributes of the page, see {@apilink enqueueLinks}.
+ * If you're looking to find URLs in `href` attributes of the page, see {@link enqueueLinks}.
  *
- * Optionally, the function allows you to filter the target links' URLs using an array of {@apilink PseudoUrl} objects
- * and override settings of the enqueued {@apilink Request} objects.
+ * Optionally, the function allows you to filter the target links' URLs using an array of {@link PseudoUrl} objects
+ * and override settings of the enqueued {@link Request} objects.
  *
  * **IMPORTANT**: To be able to do this, this function uses various mutations on the page,
  * such as changing the Z-index of elements being clicked and their visibility. Therefore,
@@ -170,7 +170,7 @@ export interface EnqueueLinksByClickingElementsOptions {
  * });
  * ```
  *
- * @returns Promise that resolves to {@apilink BatchAddRequestsResult} object.
+ * @returns Promise that resolves to {@link BatchAddRequestsResult} object.
  */
 export declare function enqueueLinksByClickingElements(options: EnqueueLinksByClickingElementsOptions): Promise<BatchAddRequestsResult>;
 interface WaitForPageIdleOptions {

package/internals/enqueue-links/click-elements.js CHANGED Viewed

@@ -17,12 +17,12 @@ const log = log_1.default.child({ prefix: 'Puppeteer Click Elements' });
  * clicks all those elements using a mouse move and a left mouse button click and intercepts
  * all the navigation requests that are subsequently produced by the page. The intercepted
  * requests, including their methods, headers and payloads are then enqueued to a provided
- * {@apilink RequestQueue}. This is useful to crawl JavaScript heavy pages where links are not available
+ * {@link RequestQueue}. This is useful to crawl JavaScript heavy pages where links are not available
  * in `href` elements, but rather navigations are triggered in click handlers.
- * If you're looking to find URLs in `href` attributes of the page, see {@apilink enqueueLinks}.
+ * If you're looking to find URLs in `href` attributes of the page, see {@link enqueueLinks}.
  *
- * Optionally, the function allows you to filter the target links' URLs using an array of {@apilink PseudoUrl} objects
- * and override settings of the enqueued {@apilink Request} objects.
+ * Optionally, the function allows you to filter the target links' URLs using an array of {@link PseudoUrl} objects
+ * and override settings of the enqueued {@link Request} objects.
  *
  * **IMPORTANT**: To be able to do this, this function uses various mutations on the page,
  * such as changing the Z-index of elements being clicked and their visibility. Therefore,
@@ -51,7 +51,7 @@ const log = log_1.default.child({ prefix: 'Puppeteer Click Elements' });
  * });
  * ```
  *
- * @returns Promise that resolves to {@apilink BatchAddRequestsResult} object.
+ * @returns Promise that resolves to {@link BatchAddRequestsResult} object.
  */
 async function enqueueLinksByClickingElements(options) {
     (0, ow_1.default)(options, ow_1.default.object.exactShape({

package/internals/puppeteer-crawler.d.ts CHANGED Viewed

@@ -18,7 +18,7 @@ export interface PuppeteerCrawlerOptions extends BrowserCrawlerOptions<Puppeteer
     browserPlugins: [PuppeteerPlugin];
 }> {
     /**
-     * Options used by {@apilink launchPuppeteer} to start new Puppeteer instances.
+     * Options used by {@link launchPuppeteer} to start new Puppeteer instances.
      */
     launchContext?: PuppeteerLaunchContext;
     /**
@@ -36,7 +36,7 @@ export interface PuppeteerCrawlerOptions extends BrowserCrawlerOptions<Puppeteer
      * ```
      *
      * Modyfing `pageOptions` is supported only in Playwright incognito.
-     * See {@apilink PrePageCreateHook}
+     * See {@link PrePageCreateHook}
      */
     preNavigationHooks?: PuppeteerHook[];
     /**
@@ -64,27 +64,27 @@ export interface PuppeteerCrawlerOptions extends BrowserCrawlerOptions<Puppeteer
  *
  * Since `PuppeteerCrawler` uses headless Chrome to download web pages and extract data,
  * it is useful for crawling of websites that require to execute JavaScript.
- * If the target website doesn't need JavaScript, consider using {@apilink CheerioCrawler},
+ * If the target website doesn't need JavaScript, consider using {@link CheerioCrawler},
  * which downloads the pages using raw HTTP requests and is about 10x faster.
  *
- * The source URLs are represented using {@apilink Request} objects that are fed from
- * {@apilink RequestList} or {@apilink RequestQueue} instances provided by the {@apilink PuppeteerCrawlerOptions.requestList}
- * or {@apilink PuppeteerCrawlerOptions.requestQueue} constructor options, respectively.
+ * The source URLs are represented using {@link Request} objects that are fed from
+ * {@link RequestList} or {@link RequestQueue} instances provided by the {@link PuppeteerCrawlerOptions.requestList}
+ * or {@link PuppeteerCrawlerOptions.requestQueue} constructor options, respectively.
  *
- * If both {@apilink PuppeteerCrawlerOptions.requestList} and {@apilink PuppeteerCrawlerOptions.requestQueue} are used,
- * the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them
- * to {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
+ * If both {@link PuppeteerCrawlerOptions.requestList} and {@link PuppeteerCrawlerOptions.requestQueue} are used,
+ * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
+ * to {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
  *
- * The crawler finishes when there are no more {@apilink Request} objects to crawl.
+ * The crawler finishes when there are no more {@link Request} objects to crawl.
  *
- * `PuppeteerCrawler` opens a new Chrome page (i.e. tab) for each {@apilink Request} object to crawl
- * and then calls the function provided by user as the {@apilink PuppeteerCrawlerOptions.requestHandler} option.
+ * `PuppeteerCrawler` opens a new Chrome page (i.e. tab) for each {@link Request} object to crawl
+ * and then calls the function provided by user as the {@link PuppeteerCrawlerOptions.requestHandler} option.
  *
  * New pages are only opened when there is enough free CPU and memory available,
- * using the functionality provided by the {@apilink AutoscaledPool} class.
- * All {@apilink AutoscaledPool} configuration options can be passed to the {@apilink PuppeteerCrawlerOptions.autoscaledPoolOptions}
+ * using the functionality provided by the {@link AutoscaledPool} class.
+ * All {@link AutoscaledPool} configuration options can be passed to the {@link PuppeteerCrawlerOptions.autoscaledPoolOptions}
  * parameter of the `PuppeteerCrawler` constructor. For user convenience, the `minConcurrency` and `maxConcurrency`
- * {@apilink AutoscaledPoolOptions} are available directly in the `PuppeteerCrawler` constructor.
+ * {@link AutoscaledPoolOptions} are available directly in the `PuppeteerCrawler` constructor.
  *
  * Note that the pool of Puppeteer instances is internally managed by the [BrowserPool](https://github.com/apify/browser-pool) class.
  *
@@ -214,9 +214,9 @@ export declare class PuppeteerCrawler extends BrowserCrawler<{
     protected _navigationHandler(crawlingContext: PuppeteerCrawlingContext, gotoOptions: DirectNavigationOptions): Promise<HTTPResponse | null>;
 }
 /**
- * Creates new {@apilink Router} instance that works based on request labels.
- * This instance can then serve as a `requestHandler` of your {@apilink PuppeteerCrawler}.
- * Defaults to the {@apilink PuppeteerCrawlingContext}.
+ * Creates new {@link Router} instance that works based on request labels.
+ * This instance can then serve as a `requestHandler` of your {@link PuppeteerCrawler}.
+ * Defaults to the {@link PuppeteerCrawlingContext}.
  *
  * > Serves as a shortcut for using `Router.create<PuppeteerCrawlingContext>()`.
  *

package/internals/puppeteer-crawler.js CHANGED Viewed

@@ -15,27 +15,27 @@ const puppeteer_utils_1 = require("./utils/puppeteer_utils");
  *
  * Since `PuppeteerCrawler` uses headless Chrome to download web pages and extract data,
  * it is useful for crawling of websites that require to execute JavaScript.
- * If the target website doesn't need JavaScript, consider using {@apilink CheerioCrawler},
+ * If the target website doesn't need JavaScript, consider using {@link CheerioCrawler},
  * which downloads the pages using raw HTTP requests and is about 10x faster.
  *
- * The source URLs are represented using {@apilink Request} objects that are fed from
- * {@apilink RequestList} or {@apilink RequestQueue} instances provided by the {@apilink PuppeteerCrawlerOptions.requestList}
- * or {@apilink PuppeteerCrawlerOptions.requestQueue} constructor options, respectively.
+ * The source URLs are represented using {@link Request} objects that are fed from
+ * {@link RequestList} or {@link RequestQueue} instances provided by the {@link PuppeteerCrawlerOptions.requestList}
+ * or {@link PuppeteerCrawlerOptions.requestQueue} constructor options, respectively.
  *
- * If both {@apilink PuppeteerCrawlerOptions.requestList} and {@apilink PuppeteerCrawlerOptions.requestQueue} are used,
- * the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them
- * to {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
+ * If both {@link PuppeteerCrawlerOptions.requestList} and {@link PuppeteerCrawlerOptions.requestQueue} are used,
+ * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
+ * to {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
  *
- * The crawler finishes when there are no more {@apilink Request} objects to crawl.
+ * The crawler finishes when there are no more {@link Request} objects to crawl.
  *
- * `PuppeteerCrawler` opens a new Chrome page (i.e. tab) for each {@apilink Request} object to crawl
- * and then calls the function provided by user as the {@apilink PuppeteerCrawlerOptions.requestHandler} option.
+ * `PuppeteerCrawler` opens a new Chrome page (i.e. tab) for each {@link Request} object to crawl
+ * and then calls the function provided by user as the {@link PuppeteerCrawlerOptions.requestHandler} option.
  *
  * New pages are only opened when there is enough free CPU and memory available,
- * using the functionality provided by the {@apilink AutoscaledPool} class.
- * All {@apilink AutoscaledPool} configuration options can be passed to the {@apilink PuppeteerCrawlerOptions.autoscaledPoolOptions}
+ * using the functionality provided by the {@link AutoscaledPool} class.
+ * All {@link AutoscaledPool} configuration options can be passed to the {@link PuppeteerCrawlerOptions.autoscaledPoolOptions}
  * parameter of the `PuppeteerCrawler` constructor. For user convenience, the `minConcurrency` and `maxConcurrency`
- * {@apilink AutoscaledPoolOptions} are available directly in the `PuppeteerCrawler` constructor.
+ * {@link AutoscaledPoolOptions} are available directly in the `PuppeteerCrawler` constructor.
  *
  * Note that the pool of Puppeteer instances is internally managed by the [BrowserPool](https://github.com/apify/browser-pool) class.
  *
@@ -128,9 +128,9 @@ Object.defineProperty(PuppeteerCrawler, "optionsShape", {
     }
 });
 /**
- * Creates new {@apilink Router} instance that works based on request labels.
- * This instance can then serve as a `requestHandler` of your {@apilink PuppeteerCrawler}.
- * Defaults to the {@apilink PuppeteerCrawlingContext}.
+ * Creates new {@link Router} instance that works based on request labels.
+ * This instance can then serve as a `requestHandler` of your {@link PuppeteerCrawler}.
+ * Defaults to the {@link PuppeteerCrawlingContext}.
  *
  * > Serves as a shortcut for using `Router.create<PuppeteerCrawlingContext>()`.
  *

package/internals/utils/puppeteer_utils.d.ts CHANGED Viewed

@@ -119,7 +119,7 @@ export declare function injectJQuery(page: Page, options?: {
     surviveNavigations?: boolean;
 }): Promise<unknown>;
 /**
- * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@apilink CheerioCrawler}.
+ * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@link CheerioCrawler}.
  *
  * **Example usage:**
  * ```javascript
@@ -211,7 +211,7 @@ export declare function cacheResponses(page: Page, cache: Dictionary<Partial<Res
  * }
  * ```
  * Where `page` is a Puppeteer [`Page`](https://pptr.dev/api/puppeteer.page)
- * and `request` is a {@apilink Request}.
+ * and `request` is a {@link Request}.
  *
  * The function is compiled by using the `scriptString` parameter as the function's body,
  * so any limitations to function bodies apply. Return value of the compiled function
@@ -368,7 +368,7 @@ export interface PuppeteerContextUtils {
      */
     waitForSelector(selector: string, timeoutMs?: number): Promise<void>;
     /**
-     * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@apilink CheerioCrawler}.
+     * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@link CheerioCrawler}.
      * When provided with the `selector` argument, it waits for it to be available first.
      *
      * **Example usage:**
@@ -385,12 +385,12 @@ export interface PuppeteerContextUtils {
      * clicks all those elements using a mouse move and a left mouse button click and intercepts
      * all the navigation requests that are subsequently produced by the page. The intercepted
      * requests, including their methods, headers and payloads are then enqueued to a provided
-     * {@apilink RequestQueue}. This is useful to crawl JavaScript heavy pages where links are not available
+     * {@link RequestQueue}. This is useful to crawl JavaScript heavy pages where links are not available
      * in `href` elements, but rather navigations are triggered in click handlers.
-     * If you're looking to find URLs in `href` attributes of the page, see {@apilink enqueueLinks}.
+     * If you're looking to find URLs in `href` attributes of the page, see {@link enqueueLinks}.
      *
-     * Optionally, the function allows you to filter the target links' URLs using an array of {@apilink PseudoUrl} objects
-     * and override settings of the enqueued {@apilink Request} objects.
+     * Optionally, the function allows you to filter the target links' URLs using an array of {@link PseudoUrl} objects
+     * and override settings of the enqueued {@link Request} objects.
      *
      * **IMPORTANT**: To be able to do this, this function uses various mutations on the page,
      * such as changing the Z-index of elements being clicked and their visibility. Therefore,
@@ -419,7 +419,7 @@ export interface PuppeteerContextUtils {
      * });
      * ```
      *
-     * @returns Promise that resolves to {@apilink BatchAddRequestsResult} object.
+     * @returns Promise that resolves to {@link BatchAddRequestsResult} object.
      */
     enqueueLinksByClickingElements(options: Omit<EnqueueLinksByClickingElementsOptions, 'page' | 'requestQueue'>): Promise<BatchAddRequestsResult>;
     /**
@@ -490,7 +490,7 @@ export interface PuppeteerContextUtils {
      * }
      * ```
      * Where `page` is a Puppeteer [`Page`](https://pptr.dev/api/puppeteer.page)
-     * and `request` is a {@apilink Request}.
+     * and `request` is a {@link Request}.
      *
      * The function is compiled by using the `scriptString` parameter as the function's body,
      * so any limitations to function bodies apply. Return value of the compiled function

package/internals/utils/puppeteer_utils.js CHANGED Viewed

@@ -116,7 +116,7 @@ async function injectJQuery(page, options) {
     return injectFile(page, jqueryPath, { surviveNavigations: options?.surviveNavigations ?? true });
 }
 /**
- * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@apilink CheerioCrawler}.
+ * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@link CheerioCrawler}.
  *
  * **Example usage:**
  * ```javascript
@@ -309,7 +309,7 @@ async function cacheResponses(page, cache, responseUrlRules) {
  * }
  * ```
  * Where `page` is a Puppeteer [`Page`](https://pptr.dev/api/puppeteer.page)
- * and `request` is a {@apilink Request}.
+ * and `request` is a {@link Request}.
  *
  * The function is compiled by using the `scriptString` parameter as the function's body,
  * so any limitations to function bodies apply. Return value of the compiled function

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@crawlee/puppeteer",
-    "version": "3.13.3-beta.9",
+    "version": "3.13.3",
     "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
     "engines": {
         "node": ">=16.0.0"
@@ -55,10 +55,10 @@
     "dependencies": {
         "@apify/datastructures": "^2.0.0",
         "@apify/log": "^2.4.0",
-        "@crawlee/browser": "3.13.3-beta.9",
-        "@crawlee/browser-pool": "3.13.3-beta.9",
-        "@crawlee/types": "3.13.3-beta.9",
-        "@crawlee/utils": "3.13.3-beta.9",
+        "@crawlee/browser": "3.13.3",
+        "@crawlee/browser-pool": "3.13.3",
+        "@crawlee/types": "3.13.3",
+        "@crawlee/utils": "3.13.3",
         "cheerio": "1.0.0-rc.12",
         "devtools-protocol": "*",
         "idcac-playwright": "^0.1.2",
@@ -81,5 +81,5 @@
             }
         }
     },
-    "gitHead": "b15fa18267747df8d3e9ef4dde29e139187ab684"
+    "gitHead": "279cadbd3cd6342f36cc4d841e07b999e472420d"
 }