npm - @crawlee/browser - Versions diffs - 3.13.3-beta.8 → 3.13.3 - Mend

@crawlee/browser 3.13.3-beta.8 → 3.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/internals/browser-crawler.d.ts +45 -45
package/internals/browser-crawler.js +19 -19
package/internals/browser-launcher.d.ts +1 -1
package/package.json +6 -6
package/tsconfig.build.tsbuildinfo +1 -1

package/internals/browser-crawler.d.ts CHANGED Viewed

@@ -18,15 +18,15 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
     /**
      * Function that is called to process each request.
      *
-     * The function receives the {@apilink BrowserCrawlingContext}
+     * The function receives the {@link BrowserCrawlingContext}
      * (actual context will be enhanced with the crawler specific properties) as an argument, where:
-     * - {@apilink BrowserCrawlingContext.request|`request`} is an instance of the {@apilink Request} object
+     * - {@link BrowserCrawlingContext.request|`request`} is an instance of the {@link Request} object
      * with details about the URL to open, HTTP method etc;
-     * - {@apilink BrowserCrawlingContext.page|`page`} is an instance of the
+     * - {@link BrowserCrawlingContext.page|`page`} is an instance of the
      * Puppeteer [Page](https://pptr.dev/api/puppeteer.page) or
      * Playwright [Page](https://playwright.dev/docs/api/class-page);
-     * - {@apilink BrowserCrawlingContext.browserController|`browserController`} is an instance of the {@apilink BrowserController};
-     * - {@apilink BrowserCrawlingContext.response|`response`} is an instance of the
+     * - {@link BrowserCrawlingContext.browserController|`browserController`} is an instance of the {@link BrowserController};
+     * - {@link BrowserCrawlingContext.response|`response`} is an instance of the
      * Puppeteer [Response](https://pptr.dev/api/puppeteer.httpresponse) or
      * Playwright [Response](https://playwright.dev/docs/api/class-response),
      * which is the main resource response as returned by the respective `page.goto()` function.
@@ -34,27 +34,27 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
      * The function must return a promise, which is then awaited by the crawler.
      *
      * If the function throws an exception, the crawler will try to re-crawl the
-     * request later, up to the {@apilink BrowserCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
+     * request later, up to the {@link BrowserCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
      * If all the retries fail, the crawler calls the function
-     * provided to the {@apilink BrowserCrawlerOptions.failedRequestHandler|`failedRequestHandler`} parameter.
+     * provided to the {@link BrowserCrawlerOptions.failedRequestHandler|`failedRequestHandler`} parameter.
      * To make this work, we should **always**
      * let our function throw exceptions rather than catch them.
      * The exceptions are logged to the request using the
-     * {@apilink Request.pushErrorMessage|`Request.pushErrorMessage()`} function.
+     * {@link Request.pushErrorMessage|`Request.pushErrorMessage()`} function.
      */
     requestHandler?: BrowserRequestHandler<LoadedContext<Context>>;
     /**
      * Function that is called to process each request.
      *
-     * The function receives the {@apilink BrowserCrawlingContext}
+     * The function receives the {@link BrowserCrawlingContext}
      * (actual context will be enhanced with the crawler specific properties) as an argument, where:
-     * - {@apilink BrowserCrawlingContext.request|`request`} is an instance of the {@apilink Request} object
+     * - {@link BrowserCrawlingContext.request|`request`} is an instance of the {@link Request} object
      * with details about the URL to open, HTTP method etc;
-     * - {@apilink BrowserCrawlingContext.page|`page`} is an instance of the
+     * - {@link BrowserCrawlingContext.page|`page`} is an instance of the
      * Puppeteer [Page](https://pptr.dev/api/puppeteer.page) or
      * Playwright [Page](https://playwright.dev/docs/api/class-page);
-     * - {@apilink BrowserCrawlingContext.browserController|`browserController`} is an instance of the {@apilink BrowserController};
-     * - {@apilink BrowserCrawlingContext.response|`response`} is an instance of the
+     * - {@link BrowserCrawlingContext.browserController|`browserController`} is an instance of the {@link BrowserController};
+     * - {@link BrowserCrawlingContext.response|`response`} is an instance of the
      * Puppeteer [Response](https://pptr.dev/api/puppeteer.httpresponse) or
      * Playwright [Response](https://playwright.dev/docs/api/class-response),
      * which is the main resource response as returned by the respective `page.goto()` function.
@@ -62,13 +62,13 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
      * The function must return a promise, which is then awaited by the crawler.
      *
      * If the function throws an exception, the crawler will try to re-crawl the
-     * request later, up to the {@apilink BrowserCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
+     * request later, up to the {@link BrowserCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
      * If all the retries fail, the crawler calls the function
-     * provided to the {@apilink BrowserCrawlerOptions.failedRequestHandler|`failedRequestHandler`} parameter.
+     * provided to the {@link BrowserCrawlerOptions.failedRequestHandler|`failedRequestHandler`} parameter.
      * To make this work, we should **always**
      * let our function throw exceptions rather than catch them.
      * The exceptions are logged to the request using the
-     * {@apilink Request.pushErrorMessage|`Request.pushErrorMessage()`} function.
+     * {@link Request.pushErrorMessage|`Request.pushErrorMessage()`} function.
      *
      * @deprecated `handlePageFunction` has been renamed to `requestHandler` and will be removed in a future version.
      * @ignore
@@ -76,11 +76,11 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
     handlePageFunction?: BrowserRequestHandler<LoadedContext<Context>>;
     /**
      * User-provided function that allows modifying the request object before it gets retried by the crawler.
-     * It's executed before each retry for the requests that failed less than {@apilink BrowserCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
+     * It's executed before each retry for the requests that failed less than {@link BrowserCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
      *
-     * The function receives the {@apilink BrowserCrawlingContext}
+     * The function receives the {@link BrowserCrawlingContext}
      * (actual context will be enhanced with the crawler specific properties) as the first argument,
-     * where the {@apilink BrowserCrawlingContext.request|`request`} corresponds to the request to be retried.
+     * where the {@link BrowserCrawlingContext.request|`request`} corresponds to the request to be retried.
      * Second argument is the `Error` instance that
      * represents the last error thrown during processing of the request.
      */
@@ -88,9 +88,9 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
     /**
      * A function to handle requests that failed more than `option.maxRequestRetries` times.
      *
-     * The function receives the {@apilink BrowserCrawlingContext}
+     * The function receives the {@link BrowserCrawlingContext}
      * (actual context will be enhanced with the crawler specific properties) as the first argument,
-     * where the {@apilink BrowserCrawlingContext.request|`request`} corresponds to the failed request.
+     * where the {@link BrowserCrawlingContext.request|`request`} corresponds to the failed request.
      * Second argument is the `Error` instance that
      * represents the last error thrown during processing of the request.
      */
@@ -98,9 +98,9 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
     /**
      * A function to handle requests that failed more than `option.maxRequestRetries` times.
      *
-     * The function receives the {@apilink BrowserCrawlingContext}
+     * The function receives the {@link BrowserCrawlingContext}
      * (actual context will be enhanced with the crawler specific properties) as the first argument,
-     * where the {@apilink BrowserCrawlingContext.request|`request`} corresponds to the failed request.
+     * where the {@link BrowserCrawlingContext.request|`request`} corresponds to the failed request.
      * Second argument is the `Error` instance that
      * represents the last error thrown during processing of the request.
      *
@@ -109,7 +109,7 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
      */
     handleFailedRequestFunction?: BrowserErrorHandler<Context>;
     /**
-     * Custom options passed to the underlying {@apilink BrowserPool} constructor.
+     * Custom options passed to the underlying {@link BrowserPool} constructor.
      * We can tweak those to fine-tune browser management.
      */
     browserPoolOptions?: Partial<BrowserPoolOptions> & Partial<BrowserPoolHooks<__BrowserControllerReturn, __LaunchContextReturn>>;
@@ -137,7 +137,7 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
      * ```
      *
      * Modyfing `pageOptions` is supported only in Playwright incognito.
-     * See {@apilink PrePageCreateHook}
+     * See {@link PrePageCreateHook}
      */
     preNavigationHooks?: BrowserHook<Context>[];
     /**
@@ -169,7 +169,7 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
     persistCookiesPerSession?: boolean;
     /**
      * Whether to run browser in headless mode. Defaults to `true`.
-     * Can be also set via {@apilink Configuration}.
+     * Can be also set via {@link Configuration}.
      */
     headless?: boolean | 'new' | 'old';
     /**
@@ -192,45 +192,45 @@ export interface BrowserCrawlerOptions<Context extends BrowserCrawlingContext =
  *
  * Since `BrowserCrawler` uses headless (or even headful) browsers to download web pages and extract data,
  * it is useful for crawling of websites that require to execute JavaScript.
- * If the target website doesn't need JavaScript, we should consider using the {@apilink CheerioCrawler},
+ * If the target website doesn't need JavaScript, we should consider using the {@link CheerioCrawler},
  * which downloads the pages using raw HTTP requests and is about 10x faster.
  *
- * The source URLs are represented by the {@apilink Request} objects that are fed from the {@apilink RequestList} or {@apilink RequestQueue} instances
- * provided by the {@apilink BrowserCrawlerOptions.requestList|`requestList`} or {@apilink BrowserCrawlerOptions.requestQueue|`requestQueue`}
+ * The source URLs are represented by the {@link Request} objects that are fed from the {@link RequestList} or {@link RequestQueue} instances
+ * provided by the {@link BrowserCrawlerOptions.requestList|`requestList`} or {@link BrowserCrawlerOptions.requestQueue|`requestQueue`}
  * constructor options, respectively. If neither `requestList` nor `requestQueue` options are provided,
- * the crawler will open the default request queue either when the {@apilink BrowserCrawler.addRequests|`crawler.addRequests()`} function is called,
- * or if `requests` parameter (representing the initial requests) of the {@apilink BrowserCrawler.run|`crawler.run()`} function is provided.
+ * the crawler will open the default request queue either when the {@link BrowserCrawler.addRequests|`crawler.addRequests()`} function is called,
+ * or if `requests` parameter (representing the initial requests) of the {@link BrowserCrawler.run|`crawler.run()`} function is provided.
  *
- * If both {@apilink BrowserCrawlerOptions.requestList|`requestList`} and {@apilink BrowserCrawlerOptions.requestQueue|`requestQueue`} options are used,
- * the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them
- * to the {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
+ * If both {@link BrowserCrawlerOptions.requestList|`requestList`} and {@link BrowserCrawlerOptions.requestQueue|`requestQueue`} options are used,
+ * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
+ * to the {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
  *
- * The crawler finishes when there are no more {@apilink Request} objects to crawl.
+ * The crawler finishes when there are no more {@link Request} objects to crawl.
  *
- * `BrowserCrawler` opens a new browser page (i.e. tab or window) for each {@apilink Request} object to crawl
- * and then calls the function provided by user as the {@apilink BrowserCrawlerOptions.requestHandler|`requestHandler`} option.
+ * `BrowserCrawler` opens a new browser page (i.e. tab or window) for each {@link Request} object to crawl
+ * and then calls the function provided by user as the {@link BrowserCrawlerOptions.requestHandler|`requestHandler`} option.
  *
  * New pages are only opened when there is enough free CPU and memory available,
- * using the functionality provided by the {@apilink AutoscaledPool} class.
- * All {@apilink AutoscaledPool} configuration options can be passed to the {@apilink BrowserCrawlerOptions.autoscaledPoolOptions|`autoscaledPoolOptions`}
+ * using the functionality provided by the {@link AutoscaledPool} class.
+ * All {@link AutoscaledPool} configuration options can be passed to the {@link BrowserCrawlerOptions.autoscaledPoolOptions|`autoscaledPoolOptions`}
  * parameter of the `BrowserCrawler` constructor.
- * For user convenience, the {@apilink AutoscaledPoolOptions.minConcurrency|`minConcurrency`} and
- * {@apilink AutoscaledPoolOptions.maxConcurrency|`maxConcurrency`} options of the
- * underlying {@apilink AutoscaledPool} constructor are available directly in the `BrowserCrawler` constructor.
+ * For user convenience, the {@link AutoscaledPoolOptions.minConcurrency|`minConcurrency`} and
+ * {@link AutoscaledPoolOptions.maxConcurrency|`maxConcurrency`} options of the
+ * underlying {@link AutoscaledPool} constructor are available directly in the `BrowserCrawler` constructor.
  *
- * > *NOTE:* the pool of browser instances is internally managed by the {@apilink BrowserPool} class.
+ * > *NOTE:* the pool of browser instances is internally managed by the {@link BrowserPool} class.
  *
  * @category Crawlers
  */
 export declare abstract class BrowserCrawler<InternalBrowserPoolOptions extends BrowserPoolOptions = BrowserPoolOptions, LaunchOptions extends Dictionary | undefined = Dictionary, Context extends BrowserCrawlingContext = BrowserCrawlingContext, GoToOptions extends Dictionary = Dictionary> extends BasicCrawler<Context> {
     readonly config: Configuration;
     /**
-     * A reference to the underlying {@apilink ProxyConfiguration} class that manages the crawler's proxies.
+     * A reference to the underlying {@link ProxyConfiguration} class that manages the crawler's proxies.
      * Only available if used by the crawler.
      */
     proxyConfiguration?: ProxyConfiguration;
     /**
-     * A reference to the underlying {@apilink BrowserPool} class that manages the crawler's browsers.
+     * A reference to the underlying {@link BrowserPool} class that manages the crawler's browsers.
      */
     browserPool: BrowserPool<InternalBrowserPoolOptions>;
     launchContext: BrowserLaunchContext<LaunchOptions, unknown>;

package/internals/browser-crawler.js CHANGED Viewed

@@ -18,33 +18,33 @@ const timeout_1 = require("@apify/timeout");
  *
  * Since `BrowserCrawler` uses headless (or even headful) browsers to download web pages and extract data,
  * it is useful for crawling of websites that require to execute JavaScript.
- * If the target website doesn't need JavaScript, we should consider using the {@apilink CheerioCrawler},
+ * If the target website doesn't need JavaScript, we should consider using the {@link CheerioCrawler},
  * which downloads the pages using raw HTTP requests and is about 10x faster.
  *
- * The source URLs are represented by the {@apilink Request} objects that are fed from the {@apilink RequestList} or {@apilink RequestQueue} instances
- * provided by the {@apilink BrowserCrawlerOptions.requestList|`requestList`} or {@apilink BrowserCrawlerOptions.requestQueue|`requestQueue`}
+ * The source URLs are represented by the {@link Request} objects that are fed from the {@link RequestList} or {@link RequestQueue} instances
+ * provided by the {@link BrowserCrawlerOptions.requestList|`requestList`} or {@link BrowserCrawlerOptions.requestQueue|`requestQueue`}
  * constructor options, respectively. If neither `requestList` nor `requestQueue` options are provided,
- * the crawler will open the default request queue either when the {@apilink BrowserCrawler.addRequests|`crawler.addRequests()`} function is called,
- * or if `requests` parameter (representing the initial requests) of the {@apilink BrowserCrawler.run|`crawler.run()`} function is provided.
+ * the crawler will open the default request queue either when the {@link BrowserCrawler.addRequests|`crawler.addRequests()`} function is called,
+ * or if `requests` parameter (representing the initial requests) of the {@link BrowserCrawler.run|`crawler.run()`} function is provided.
  *
- * If both {@apilink BrowserCrawlerOptions.requestList|`requestList`} and {@apilink BrowserCrawlerOptions.requestQueue|`requestQueue`} options are used,
- * the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them
- * to the {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
+ * If both {@link BrowserCrawlerOptions.requestList|`requestList`} and {@link BrowserCrawlerOptions.requestQueue|`requestQueue`} options are used,
+ * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
+ * to the {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
  *
- * The crawler finishes when there are no more {@apilink Request} objects to crawl.
+ * The crawler finishes when there are no more {@link Request} objects to crawl.
  *
- * `BrowserCrawler` opens a new browser page (i.e. tab or window) for each {@apilink Request} object to crawl
- * and then calls the function provided by user as the {@apilink BrowserCrawlerOptions.requestHandler|`requestHandler`} option.
+ * `BrowserCrawler` opens a new browser page (i.e. tab or window) for each {@link Request} object to crawl
+ * and then calls the function provided by user as the {@link BrowserCrawlerOptions.requestHandler|`requestHandler`} option.
  *
  * New pages are only opened when there is enough free CPU and memory available,
- * using the functionality provided by the {@apilink AutoscaledPool} class.
- * All {@apilink AutoscaledPool} configuration options can be passed to the {@apilink BrowserCrawlerOptions.autoscaledPoolOptions|`autoscaledPoolOptions`}
+ * using the functionality provided by the {@link AutoscaledPool} class.
+ * All {@link AutoscaledPool} configuration options can be passed to the {@link BrowserCrawlerOptions.autoscaledPoolOptions|`autoscaledPoolOptions`}
  * parameter of the `BrowserCrawler` constructor.
- * For user convenience, the {@apilink AutoscaledPoolOptions.minConcurrency|`minConcurrency`} and
- * {@apilink AutoscaledPoolOptions.maxConcurrency|`maxConcurrency`} options of the
- * underlying {@apilink AutoscaledPool} constructor are available directly in the `BrowserCrawler` constructor.
+ * For user convenience, the {@link AutoscaledPoolOptions.minConcurrency|`minConcurrency`} and
+ * {@link AutoscaledPoolOptions.maxConcurrency|`maxConcurrency`} options of the
+ * underlying {@link AutoscaledPool} constructor are available directly in the `BrowserCrawler` constructor.
  *
- * > *NOTE:* the pool of browser instances is internally managed by the {@apilink BrowserPool} class.
+ * > *NOTE:* the pool of browser instances is internally managed by the {@link BrowserPool} class.
  *
  * @category Crawlers
  */
@@ -70,7 +70,7 @@ class BrowserCrawler extends basic_1.BasicCrawler {
             value: config
         });
         /**
-         * A reference to the underlying {@apilink ProxyConfiguration} class that manages the crawler's proxies.
+         * A reference to the underlying {@link ProxyConfiguration} class that manages the crawler's proxies.
          * Only available if used by the crawler.
          */
         Object.defineProperty(this, "proxyConfiguration", {
@@ -80,7 +80,7 @@ class BrowserCrawler extends basic_1.BasicCrawler {
             value: void 0
         });
         /**
-         * A reference to the underlying {@apilink BrowserPool} class that manages the crawler's browsers.
+         * A reference to the underlying {@link BrowserPool} class that manages the crawler's browsers.
          */
         Object.defineProperty(this, "browserPool", {
             enumerable: true,

package/internals/browser-launcher.d.ts CHANGED Viewed

@@ -11,7 +11,7 @@ export interface BrowserLaunchContext<TOptions, Launcher> extends BrowserPluginO
      */
     proxyUrl?: string;
     /**
-     * If `true` and the `executablePath` option of {@apilink BrowserLaunchContext.launchOptions|`launchOptions`} is not set,
+     * If `true` and the `executablePath` option of {@link BrowserLaunchContext.launchOptions|`launchOptions`} is not set,
      * the launcher will launch full Google Chrome browser available on the machine
      * rather than the bundled Chromium. The path to Chrome executable
      * is taken from the `CRAWLEE_CHROME_EXECUTABLE_PATH` environment variable if provided,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@crawlee/browser",
-    "version": "3.13.3-beta.8",
+    "version": "3.13.3",
     "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
     "engines": {
         "node": ">=16.0.0"
@@ -54,10 +54,10 @@
     },
     "dependencies": {
         "@apify/timeout": "^0.3.0",
-        "@crawlee/basic": "3.13.3-beta.8",
-        "@crawlee/browser-pool": "3.13.3-beta.8",
-        "@crawlee/types": "3.13.3-beta.8",
-        "@crawlee/utils": "3.13.3-beta.8",
+        "@crawlee/basic": "3.13.3",
+        "@crawlee/browser-pool": "3.13.3",
+        "@crawlee/types": "3.13.3",
+        "@crawlee/utils": "3.13.3",
         "ow": "^0.28.1",
         "tslib": "^2.4.0",
         "type-fest": "^4.0.0"
@@ -81,5 +81,5 @@
             }
         }
     },
-    "gitHead": "0bcd58dd82d533da05a2bd7a524624fa441e7a71"
+    "gitHead": "279cadbd3cd6342f36cc4d841e07b999e472420d"
 }