npm - @crawlee/http - Versions diffs - 3.13.3-beta.9 → 3.13.3 - Mend

@crawlee/http 3.13.3-beta.9 → 3.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/internals/file-download.d.ts +9 -9
package/internals/file-download.js +9 -9
package/internals/http-crawler.d.ts +22 -22
package/internals/http-crawler.js +17 -17
package/package.json +5 -5
package/tsconfig.build.tsbuildinfo +1 -1

package/internals/file-download.d.ts CHANGED Viewed

@@ -29,15 +29,15 @@ JSONData extends Dictionary = any> = RequestHandler<FileDownloadCrawlingContext<
  *
  * Since `FileDownload` uses raw HTTP requests to download the files, it is very fast and bandwidth-efficient.
  * However, it doesn't parse the content - if you need to e.g. extract data from the downloaded files,
- * you might need to use {@apilink CheerioCrawler}, {@apilink PuppeteerCrawler} or {@apilink PlaywrightCrawler} instead.
+ * you might need to use {@link CheerioCrawler}, {@link PuppeteerCrawler} or {@link PlaywrightCrawler} instead.
  *
- * `FileCrawler` downloads each URL using a plain HTTP request and then invokes the user-provided {@apilink FileDownloadOptions.requestHandler} where the user can specify what to do with the downloaded data.
+ * `FileCrawler` downloads each URL using a plain HTTP request and then invokes the user-provided {@link FileDownloadOptions.requestHandler} where the user can specify what to do with the downloaded data.
  *
- * The source URLs are represented using {@apilink Request} objects that are fed from {@apilink RequestList} or {@apilink RequestQueue} instances provided by the {@apilink FileDownloadOptions.requestList} or {@apilink FileDownloadOptions.requestQueue} constructor options, respectively.
+ * The source URLs are represented using {@link Request} objects that are fed from {@link RequestList} or {@link RequestQueue} instances provided by the {@link FileDownloadOptions.requestList} or {@link FileDownloadOptions.requestQueue} constructor options, respectively.
  *
- * If both {@apilink FileDownloadOptions.requestList} and {@apilink FileDownloadOptions.requestQueue} are used, the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them to {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
+ * If both {@link FileDownloadOptions.requestList} and {@link FileDownloadOptions.requestQueue} are used, the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them to {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
  *
- * The crawler finishes when there are no more {@apilink Request} objects to crawl.
+ * The crawler finishes when there are no more {@link Request} objects to crawl.
  *
  * We can use the `preNavigationHooks` to adjust `gotOptions`:
  *
@@ -49,7 +49,7 @@ JSONData extends Dictionary = any> = RequestHandler<FileDownloadCrawlingContext<
  * ]
  * ```
  *
- * New requests are only dispatched when there is enough free CPU and memory available, using the functionality provided by the {@apilink AutoscaledPool} class. All {@apilink AutoscaledPool} configuration options can be passed to the `autoscaledPoolOptions` parameter of the `FileCrawler` constructor. For user convenience, the `minConcurrency` and `maxConcurrency` {@apilink AutoscaledPool} options are available directly in the `FileCrawler` constructor.
+ * New requests are only dispatched when there is enough free CPU and memory available, using the functionality provided by the {@link AutoscaledPool} class. All {@link AutoscaledPool} configuration options can be passed to the `autoscaledPoolOptions` parameter of the `FileCrawler` constructor. For user convenience, the `minConcurrency` and `maxConcurrency` {@link AutoscaledPool} options are available directly in the `FileCrawler` constructor.
  *
  * ## Example usage
  *
@@ -74,9 +74,9 @@ export declare class FileDownload extends HttpCrawler<FileDownloadCrawlingContex
     private streamRequestHandler;
 }
 /**
- * Creates new {@apilink Router} instance that works based on request labels.
- * This instance can then serve as a `requestHandler` of your {@apilink FileDownload}.
- * Defaults to the {@apilink FileDownloadCrawlingContext}.
+ * Creates new {@link Router} instance that works based on request labels.
+ * This instance can then serve as a `requestHandler` of your {@link FileDownload}.
+ * Defaults to the {@link FileDownloadCrawlingContext}.
  *
  * > Serves as a shortcut for using `Router.create<FileDownloadCrawlingContext>()`.
  *

package/internals/file-download.js CHANGED Viewed

@@ -10,15 +10,15 @@ const index_1 = require("../index");
  *
  * Since `FileDownload` uses raw HTTP requests to download the files, it is very fast and bandwidth-efficient.
  * However, it doesn't parse the content - if you need to e.g. extract data from the downloaded files,
- * you might need to use {@apilink CheerioCrawler}, {@apilink PuppeteerCrawler} or {@apilink PlaywrightCrawler} instead.
+ * you might need to use {@link CheerioCrawler}, {@link PuppeteerCrawler} or {@link PlaywrightCrawler} instead.
  *
- * `FileCrawler` downloads each URL using a plain HTTP request and then invokes the user-provided {@apilink FileDownloadOptions.requestHandler} where the user can specify what to do with the downloaded data.
+ * `FileCrawler` downloads each URL using a plain HTTP request and then invokes the user-provided {@link FileDownloadOptions.requestHandler} where the user can specify what to do with the downloaded data.
  *
- * The source URLs are represented using {@apilink Request} objects that are fed from {@apilink RequestList} or {@apilink RequestQueue} instances provided by the {@apilink FileDownloadOptions.requestList} or {@apilink FileDownloadOptions.requestQueue} constructor options, respectively.
+ * The source URLs are represented using {@link Request} objects that are fed from {@link RequestList} or {@link RequestQueue} instances provided by the {@link FileDownloadOptions.requestList} or {@link FileDownloadOptions.requestQueue} constructor options, respectively.
  *
- * If both {@apilink FileDownloadOptions.requestList} and {@apilink FileDownloadOptions.requestQueue} are used, the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them to {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
+ * If both {@link FileDownloadOptions.requestList} and {@link FileDownloadOptions.requestQueue} are used, the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them to {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
  *
- * The crawler finishes when there are no more {@apilink Request} objects to crawl.
+ * The crawler finishes when there are no more {@link Request} objects to crawl.
  *
  * We can use the `preNavigationHooks` to adjust `gotOptions`:
  *
@@ -30,7 +30,7 @@ const index_1 = require("../index");
  * ]
  * ```
  *
- * New requests are only dispatched when there is enough free CPU and memory available, using the functionality provided by the {@apilink AutoscaledPool} class. All {@apilink AutoscaledPool} configuration options can be passed to the `autoscaledPoolOptions` parameter of the `FileCrawler` constructor. For user convenience, the `minConcurrency` and `maxConcurrency` {@apilink AutoscaledPool} options are available directly in the `FileCrawler` constructor.
+ * New requests are only dispatched when there is enough free CPU and memory available, using the functionality provided by the {@link AutoscaledPool} class. All {@link AutoscaledPool} configuration options can be passed to the `autoscaledPoolOptions` parameter of the `FileCrawler` constructor. For user convenience, the `minConcurrency` and `maxConcurrency` {@link AutoscaledPool} options are available directly in the `FileCrawler` constructor.
  *
  * ## Example usage
  *
@@ -130,9 +130,9 @@ class FileDownload extends index_1.HttpCrawler {
 }
 exports.FileDownload = FileDownload;
 /**
- * Creates new {@apilink Router} instance that works based on request labels.
- * This instance can then serve as a `requestHandler` of your {@apilink FileDownload}.
- * Defaults to the {@apilink FileDownloadCrawlingContext}.
+ * Creates new {@link Router} instance that works based on request labels.
+ * This instance can then serve as a `requestHandler` of your {@link FileDownload}.
+ * Defaults to the {@link FileDownloadCrawlingContext}.
  *
  * > Serves as a shortcut for using `Router.create<FileDownloadCrawlingContext>()`.
  *

package/internals/http-crawler.d.ts CHANGED Viewed

@@ -21,7 +21,7 @@ export type HttpErrorHandler<UserData extends Dictionary = any, // with default
 JSONData extends JsonValue = any> = ErrorHandler<HttpCrawlingContext<UserData, JSONData>>;
 export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext = InternalHttpCrawlingContext> extends BasicCrawlerOptions<Context> {
     /**
-     * An alias for {@apilink HttpCrawlerOptions.requestHandler}
+     * An alias for {@link HttpCrawlerOptions.requestHandler}
      * Soon to be removed, use `requestHandler` instead.
      * @deprecated
      */
@@ -54,7 +54,7 @@ export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext
      * ```
      *
      * Modyfing `pageOptions` is supported only in Playwright incognito.
-     * See {@apilink PrePageCreateHook}
+     * See {@link PrePageCreateHook}
      */
     preNavigationHooks?: InternalHttpHook<Context>[];
     /**
@@ -80,7 +80,7 @@ export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext
      * Sadly, there are some websites which use invalid headers. Those are encoded using the UTF-8 encoding.
      * If those sites actually use a different encoding, the response will be corrupted. You can use
      * `suggestResponseEncoding` to fall back to a certain encoding, if you know that your target website uses it.
-     * To force a certain encoding, disregarding the response headers, use {@apilink HttpCrawlerOptions.forceResponseEncoding}
+     * To force a certain encoding, disregarding the response headers, use {@link HttpCrawlerOptions.forceResponseEncoding}
      * ```
      * // Will fall back to windows-1250 encoding if none found
      * suggestResponseEncoding: 'windows-1250'
@@ -90,7 +90,7 @@ export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext
     /**
      * By default this crawler will extract correct encoding from the HTTP response headers. Use `forceResponseEncoding`
      * to force a certain encoding, disregarding the response headers.
-     * To only provide a default for missing encodings, use {@apilink HttpCrawlerOptions.suggestResponseEncoding}
+     * To only provide a default for missing encodings, use {@link HttpCrawlerOptions.suggestResponseEncoding}
      * ```
      * // Will force windows-1250 encoding even if headers say otherwise
      * forceResponseEncoding: 'windows-1250'
@@ -160,7 +160,7 @@ Crawler = HttpCrawler<any>> extends CrawlingContext<Crawler, UserData> {
      */
     waitForSelector(selector: string, timeoutMs?: number): Promise<void>;
     /**
-     * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@apilink CheerioCrawler}.
+     * Returns Cheerio handle for `page.content()`, allowing to work with the data same way as with {@link CheerioCrawler}.
      * When provided with the `selector` argument, it will throw if it's not available.
      *
      * **Example usage:**
@@ -183,20 +183,20 @@ JSONData extends JsonValue = any> = RequestHandler<HttpCrawlingContext<UserData,
  * or from a dynamic queue of URLs enabling recursive crawling of websites.
  *
  * It is very fast and efficient on data bandwidth. However, if the target website requires JavaScript
- * to display the content, you might need to use {@apilink PuppeteerCrawler} or {@apilink PlaywrightCrawler} instead,
+ * to display the content, you might need to use {@link PuppeteerCrawler} or {@link PlaywrightCrawler} instead,
  * because it loads the pages using full-featured headless Chrome browser.
  *
  * This crawler downloads each URL using a plain HTTP request and doesn't do any HTML parsing.
  *
- * The source URLs are represented using {@apilink Request} objects that are fed from
- * {@apilink RequestList} or {@apilink RequestQueue} instances provided by the {@apilink HttpCrawlerOptions.requestList}
- * or {@apilink HttpCrawlerOptions.requestQueue} constructor options, respectively.
+ * The source URLs are represented using {@link Request} objects that are fed from
+ * {@link RequestList} or {@link RequestQueue} instances provided by the {@link HttpCrawlerOptions.requestList}
+ * or {@link HttpCrawlerOptions.requestQueue} constructor options, respectively.
  *
- * If both {@apilink HttpCrawlerOptions.requestList} and {@apilink HttpCrawlerOptions.requestQueue} are used,
- * the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them
- * to {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
+ * If both {@link HttpCrawlerOptions.requestList} and {@link HttpCrawlerOptions.requestQueue} are used,
+ * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
+ * to {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
  *
- * The crawler finishes when there are no more {@apilink Request} objects to crawl.
+ * The crawler finishes when there are no more {@link Request} objects to crawl.
  *
  * We can use the `preNavigationHooks` to adjust `gotOptions`:
  *
@@ -211,15 +211,15 @@ JSONData extends JsonValue = any> = RequestHandler<HttpCrawlingContext<UserData,
  * By default, this crawler only processes web pages with the `text/html`
  * and `application/xhtml+xml` MIME content types (as reported by the `Content-Type` HTTP header),
  * and skips pages with other content types. If you want the crawler to process other content types,
- * use the {@apilink HttpCrawlerOptions.additionalMimeTypes} constructor option.
+ * use the {@link HttpCrawlerOptions.additionalMimeTypes} constructor option.
  * Beware that the parsing behavior differs for HTML, XML, JSON and other types of content.
- * For details, see {@apilink HttpCrawlerOptions.requestHandler}.
+ * For details, see {@link HttpCrawlerOptions.requestHandler}.
  *
  * New requests are only dispatched when there is enough free CPU and memory available,
- * using the functionality provided by the {@apilink AutoscaledPool} class.
- * All {@apilink AutoscaledPool} configuration options can be passed to the `autoscaledPoolOptions`
+ * using the functionality provided by the {@link AutoscaledPool} class.
+ * All {@link AutoscaledPool} configuration options can be passed to the `autoscaledPoolOptions`
  * parameter of the constructor. For user convenience, the `minConcurrency` and `maxConcurrency`
- * {@apilink AutoscaledPool} options are available directly in the constructor.
+ * {@link AutoscaledPool} options are available directly in the constructor.
  *
  * **Example usage:**
  *
@@ -247,7 +247,7 @@ JSONData extends JsonValue = any> = RequestHandler<HttpCrawlingContext<UserData,
 export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any, any, HttpCrawler<Context>>> extends BasicCrawler<Context> {
     readonly config: Configuration;
     /**
-     * A reference to the underlying {@apilink ProxyConfiguration} class that manages the crawler's proxies.
+     * A reference to the underlying {@link ProxyConfiguration} class that manages the crawler's proxies.
      * Only available if used by the crawler.
      */
     proxyConfiguration?: ProxyConfiguration;
@@ -496,9 +496,9 @@ interface RequestFunctionOptions {
     gotOptions: OptionsInit;
 }
 /**
- * Creates new {@apilink Router} instance that works based on request labels.
- * This instance can then serve as a `requestHandler` of your {@apilink HttpCrawler}.
- * Defaults to the {@apilink HttpCrawlingContext}.
+ * Creates new {@link Router} instance that works based on request labels.
+ * This instance can then serve as a `requestHandler` of your {@link HttpCrawler}.
+ * Defaults to the {@link HttpCrawlingContext}.
  *
  * > Serves as a shortcut for using `Router.create<HttpCrawlingContext>()`.
  *

package/internals/http-crawler.js CHANGED Viewed

@@ -36,20 +36,20 @@ const HTTP_OPTIMIZED_AUTOSCALED_POOL_OPTIONS = {
  * or from a dynamic queue of URLs enabling recursive crawling of websites.
  *
  * It is very fast and efficient on data bandwidth. However, if the target website requires JavaScript
- * to display the content, you might need to use {@apilink PuppeteerCrawler} or {@apilink PlaywrightCrawler} instead,
+ * to display the content, you might need to use {@link PuppeteerCrawler} or {@link PlaywrightCrawler} instead,
  * because it loads the pages using full-featured headless Chrome browser.
  *
  * This crawler downloads each URL using a plain HTTP request and doesn't do any HTML parsing.
  *
- * The source URLs are represented using {@apilink Request} objects that are fed from
- * {@apilink RequestList} or {@apilink RequestQueue} instances provided by the {@apilink HttpCrawlerOptions.requestList}
- * or {@apilink HttpCrawlerOptions.requestQueue} constructor options, respectively.
+ * The source URLs are represented using {@link Request} objects that are fed from
+ * {@link RequestList} or {@link RequestQueue} instances provided by the {@link HttpCrawlerOptions.requestList}
+ * or {@link HttpCrawlerOptions.requestQueue} constructor options, respectively.
  *
- * If both {@apilink HttpCrawlerOptions.requestList} and {@apilink HttpCrawlerOptions.requestQueue} are used,
- * the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them
- * to {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
+ * If both {@link HttpCrawlerOptions.requestList} and {@link HttpCrawlerOptions.requestQueue} are used,
+ * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
+ * to {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
  *
- * The crawler finishes when there are no more {@apilink Request} objects to crawl.
+ * The crawler finishes when there are no more {@link Request} objects to crawl.
  *
  * We can use the `preNavigationHooks` to adjust `gotOptions`:
  *
@@ -64,15 +64,15 @@ const HTTP_OPTIMIZED_AUTOSCALED_POOL_OPTIONS = {
  * By default, this crawler only processes web pages with the `text/html`
  * and `application/xhtml+xml` MIME content types (as reported by the `Content-Type` HTTP header),
  * and skips pages with other content types. If you want the crawler to process other content types,
- * use the {@apilink HttpCrawlerOptions.additionalMimeTypes} constructor option.
+ * use the {@link HttpCrawlerOptions.additionalMimeTypes} constructor option.
  * Beware that the parsing behavior differs for HTML, XML, JSON and other types of content.
- * For details, see {@apilink HttpCrawlerOptions.requestHandler}.
+ * For details, see {@link HttpCrawlerOptions.requestHandler}.
  *
  * New requests are only dispatched when there is enough free CPU and memory available,
- * using the functionality provided by the {@apilink AutoscaledPool} class.
- * All {@apilink AutoscaledPool} configuration options can be passed to the `autoscaledPoolOptions`
+ * using the functionality provided by the {@link AutoscaledPool} class.
+ * All {@link AutoscaledPool} configuration options can be passed to the `autoscaledPoolOptions`
  * parameter of the constructor. For user convenience, the `minConcurrency` and `maxConcurrency`
- * {@apilink AutoscaledPool} options are available directly in the constructor.
+ * {@link AutoscaledPool} options are available directly in the constructor.
  *
  * **Example usage:**
  *
@@ -123,7 +123,7 @@ class HttpCrawler extends basic_1.BasicCrawler {
             value: config
         });
         /**
-         * A reference to the underlying {@apilink ProxyConfiguration} class that manages the crawler's proxies.
+         * A reference to the underlying {@link ProxyConfiguration} class that manages the crawler's proxies.
          * Only available if used by the crawler.
          */
         Object.defineProperty(this, "proxyConfiguration", {
@@ -699,9 +699,9 @@ function parseContentTypeFromResponse(response) {
     };
 }
 /**
- * Creates new {@apilink Router} instance that works based on request labels.
- * This instance can then serve as a `requestHandler` of your {@apilink HttpCrawler}.
- * Defaults to the {@apilink HttpCrawlingContext}.
+ * Creates new {@link Router} instance that works based on request labels.
+ * This instance can then serve as a `requestHandler` of your {@link HttpCrawler}.
+ * Defaults to the {@link HttpCrawlingContext}.
  *
  * > Serves as a shortcut for using `Router.create<HttpCrawlingContext>()`.
  *

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@crawlee/http",
-    "version": "3.13.3-beta.9",
+    "version": "3.13.3",
     "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
     "engines": {
         "node": ">=16.0.0"
@@ -55,9 +55,9 @@
     "dependencies": {
         "@apify/timeout": "^0.3.0",
         "@apify/utilities": "^2.7.10",
-        "@crawlee/basic": "3.13.3-beta.9",
-        "@crawlee/types": "3.13.3-beta.9",
-        "@crawlee/utils": "3.13.3-beta.9",
+        "@crawlee/basic": "3.13.3",
+        "@crawlee/types": "3.13.3",
+        "@crawlee/utils": "3.13.3",
         "@types/content-type": "^1.1.5",
         "cheerio": "1.0.0-rc.12",
         "content-type": "^1.0.4",
@@ -75,5 +75,5 @@
             }
         }
     },
-    "gitHead": "b15fa18267747df8d3e9ef4dde29e139187ab684"
+    "gitHead": "279cadbd3cd6342f36cc4d841e07b999e472420d"
 }