npm - apify - Versions diffs - 2.3.1-beta.4 → 3.0.0-alpha.0 - Mend

apify 2.3.1-beta.4 → 3.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (211) hide show

package/README.md +6 -5
package/package.json +69 -128
package/build/actor.d.ts +0 -113
package/build/actor.d.ts.map +0 -1
package/build/actor.js +0 -582
package/build/actor.js.map +0 -1
package/build/apify.d.ts +0 -752
package/build/apify.d.ts.map +0 -1
package/build/apify.js +0 -877
package/build/apify.js.map +0 -1
package/build/autoscaling/autoscaled_pool.d.ts +0 -384
package/build/autoscaling/autoscaled_pool.d.ts.map +0 -1
package/build/autoscaling/autoscaled_pool.js +0 -557
package/build/autoscaling/autoscaled_pool.js.map +0 -1
package/build/autoscaling/snapshotter.d.ts +0 -278
package/build/autoscaling/snapshotter.d.ts.map +0 -1
package/build/autoscaling/snapshotter.js +0 -447
package/build/autoscaling/snapshotter.js.map +0 -1
package/build/autoscaling/system_status.d.ts +0 -224
package/build/autoscaling/system_status.d.ts.map +0 -1
package/build/autoscaling/system_status.js +0 -228
package/build/autoscaling/system_status.js.map +0 -1
package/build/browser_launchers/browser_launcher.d.ts +0 -154
package/build/browser_launchers/browser_launcher.d.ts.map +0 -1
package/build/browser_launchers/browser_launcher.js +0 -160
package/build/browser_launchers/browser_launcher.js.map +0 -1
package/build/browser_launchers/browser_plugin.d.ts +0 -23
package/build/browser_launchers/browser_plugin.d.ts.map +0 -1
package/build/browser_launchers/browser_plugin.js +0 -25
package/build/browser_launchers/browser_plugin.js.map +0 -1
package/build/browser_launchers/playwright_launcher.d.ts +0 -131
package/build/browser_launchers/playwright_launcher.d.ts.map +0 -1
package/build/browser_launchers/playwright_launcher.js +0 -150
package/build/browser_launchers/playwright_launcher.js.map +0 -1
package/build/browser_launchers/puppeteer_launcher.d.ts +0 -153
package/build/browser_launchers/puppeteer_launcher.d.ts.map +0 -1
package/build/browser_launchers/puppeteer_launcher.js +0 -197
package/build/browser_launchers/puppeteer_launcher.js.map +0 -1
package/build/cache_container.d.ts +0 -31
package/build/cache_container.d.ts.map +0 -1
package/build/cache_container.js +0 -48
package/build/cache_container.js.map +0 -1
package/build/configuration.d.ts +0 -226
package/build/configuration.d.ts.map +0 -1
package/build/configuration.js +0 -325
package/build/configuration.js.map +0 -1
package/build/constants.d.ts +0 -37
package/build/constants.d.ts.map +0 -1
package/build/constants.js +0 -41
package/build/constants.js.map +0 -1
package/build/crawlers/basic_crawler.d.ts +0 -443
package/build/crawlers/basic_crawler.d.ts.map +0 -1
package/build/crawlers/basic_crawler.js +0 -664
package/build/crawlers/basic_crawler.js.map +0 -1
package/build/crawlers/browser_crawler.d.ts +0 -512
package/build/crawlers/browser_crawler.d.ts.map +0 -1
package/build/crawlers/browser_crawler.js +0 -540
package/build/crawlers/browser_crawler.js.map +0 -1
package/build/crawlers/cheerio_crawler.d.ts +0 -931
package/build/crawlers/cheerio_crawler.d.ts.map +0 -1
package/build/crawlers/cheerio_crawler.js +0 -913
package/build/crawlers/cheerio_crawler.js.map +0 -1
package/build/crawlers/crawler_extension.d.ts +0 -10
package/build/crawlers/crawler_extension.d.ts.map +0 -1
package/build/crawlers/crawler_extension.js +0 -19
package/build/crawlers/crawler_extension.js.map +0 -1
package/build/crawlers/crawler_utils.d.ts +0 -34
package/build/crawlers/crawler_utils.d.ts.map +0 -1
package/build/crawlers/crawler_utils.js +0 -87
package/build/crawlers/crawler_utils.js.map +0 -1
package/build/crawlers/playwright_crawler.d.ts +0 -448
package/build/crawlers/playwright_crawler.d.ts.map +0 -1
package/build/crawlers/playwright_crawler.js +0 -299
package/build/crawlers/playwright_crawler.js.map +0 -1
package/build/crawlers/puppeteer_crawler.d.ts +0 -425
package/build/crawlers/puppeteer_crawler.d.ts.map +0 -1
package/build/crawlers/puppeteer_crawler.js +0 -299
package/build/crawlers/puppeteer_crawler.js.map +0 -1
package/build/crawlers/statistics.d.ts +0 -185
package/build/crawlers/statistics.d.ts.map +0 -1
package/build/crawlers/statistics.js +0 -331
package/build/crawlers/statistics.js.map +0 -1
package/build/enqueue_links/click_elements.d.ts +0 -179
package/build/enqueue_links/click_elements.d.ts.map +0 -1
package/build/enqueue_links/click_elements.js +0 -434
package/build/enqueue_links/click_elements.js.map +0 -1
package/build/enqueue_links/enqueue_links.d.ts +0 -117
package/build/enqueue_links/enqueue_links.d.ts.map +0 -1
package/build/enqueue_links/enqueue_links.js +0 -163
package/build/enqueue_links/enqueue_links.js.map +0 -1
package/build/enqueue_links/shared.d.ts +0 -42
package/build/enqueue_links/shared.d.ts.map +0 -1
package/build/enqueue_links/shared.js +0 -121
package/build/enqueue_links/shared.js.map +0 -1
package/build/errors.d.ts +0 -29
package/build/errors.d.ts.map +0 -1
package/build/errors.js +0 -38
package/build/errors.js.map +0 -1
package/build/events.d.ts +0 -11
package/build/events.d.ts.map +0 -1
package/build/events.js +0 -147
package/build/events.js.map +0 -1
package/build/index.d.ts +0 -4
package/build/index.d.ts.map +0 -1
package/build/index.js +0 -7
package/build/index.js.map +0 -1
package/build/main.d.ts +0 -179
package/build/main.d.ts.map +0 -1
package/build/main.js +0 -81
package/build/main.js.map +0 -1
package/build/playwright_utils.d.ts +0 -9
package/build/playwright_utils.d.ts.map +0 -1
package/build/playwright_utils.js +0 -90
package/build/playwright_utils.js.map +0 -1
package/build/proxy_configuration.d.ts +0 -411
package/build/proxy_configuration.d.ts.map +0 -1
package/build/proxy_configuration.js +0 -517
package/build/proxy_configuration.js.map +0 -1
package/build/pseudo_url.d.ts +0 -86
package/build/pseudo_url.d.ts.map +0 -1
package/build/pseudo_url.js +0 -153
package/build/pseudo_url.js.map +0 -1
package/build/puppeteer_request_interception.d.ts +0 -8
package/build/puppeteer_request_interception.d.ts.map +0 -1
package/build/puppeteer_request_interception.js +0 -235
package/build/puppeteer_request_interception.js.map +0 -1
package/build/puppeteer_utils.d.ts +0 -250
package/build/puppeteer_utils.d.ts.map +0 -1
package/build/puppeteer_utils.js +0 -551
package/build/puppeteer_utils.js.map +0 -1
package/build/request.d.ts +0 -180
package/build/request.d.ts.map +0 -1
package/build/request.js +0 -261
package/build/request.js.map +0 -1
package/build/request_list.d.ts +0 -581
package/build/request_list.d.ts.map +0 -1
package/build/request_list.js +0 -826
package/build/request_list.js.map +0 -1
package/build/serialization.d.ts +0 -5
package/build/serialization.d.ts.map +0 -1
package/build/serialization.js +0 -139
package/build/serialization.js.map +0 -1
package/build/session_pool/errors.d.ts +0 -11
package/build/session_pool/errors.d.ts.map +0 -1
package/build/session_pool/errors.js +0 -18
package/build/session_pool/errors.js.map +0 -1
package/build/session_pool/events.d.ts +0 -5
package/build/session_pool/events.d.ts.map +0 -1
package/build/session_pool/events.js +0 -6
package/build/session_pool/events.js.map +0 -1
package/build/session_pool/session.d.ts +0 -286
package/build/session_pool/session.d.ts.map +0 -1
package/build/session_pool/session.js +0 -355
package/build/session_pool/session.js.map +0 -1
package/build/session_pool/session_pool.d.ts +0 -280
package/build/session_pool/session_pool.d.ts.map +0 -1
package/build/session_pool/session_pool.js +0 -393
package/build/session_pool/session_pool.js.map +0 -1
package/build/session_pool/session_utils.d.ts +0 -4
package/build/session_pool/session_utils.d.ts.map +0 -1
package/build/session_pool/session_utils.js +0 -24
package/build/session_pool/session_utils.js.map +0 -1
package/build/stealth/hiding_tricks.d.ts +0 -22
package/build/stealth/hiding_tricks.d.ts.map +0 -1
package/build/stealth/hiding_tricks.js +0 -308
package/build/stealth/hiding_tricks.js.map +0 -1
package/build/stealth/stealth.d.ts +0 -56
package/build/stealth/stealth.d.ts.map +0 -1
package/build/stealth/stealth.js +0 -125
package/build/stealth/stealth.js.map +0 -1
package/build/storages/dataset.d.ts +0 -288
package/build/storages/dataset.d.ts.map +0 -1
package/build/storages/dataset.js +0 -480
package/build/storages/dataset.js.map +0 -1
package/build/storages/key_value_store.d.ts +0 -243
package/build/storages/key_value_store.d.ts.map +0 -1
package/build/storages/key_value_store.js +0 -462
package/build/storages/key_value_store.js.map +0 -1
package/build/storages/request_queue.d.ts +0 -318
package/build/storages/request_queue.d.ts.map +0 -1
package/build/storages/request_queue.js +0 -636
package/build/storages/request_queue.js.map +0 -1
package/build/storages/storage_manager.d.ts +0 -87
package/build/storages/storage_manager.d.ts.map +0 -1
package/build/storages/storage_manager.js +0 -150
package/build/storages/storage_manager.js.map +0 -1
package/build/tsconfig.tsbuildinfo +0 -1
package/build/typedefs.d.ts +0 -146
package/build/typedefs.d.ts.map +0 -1
package/build/typedefs.js +0 -88
package/build/typedefs.js.map +0 -1
package/build/utils.d.ts +0 -175
package/build/utils.d.ts.map +0 -1
package/build/utils.js +0 -731
package/build/utils.js.map +0 -1
package/build/utils_log.d.ts +0 -41
package/build/utils_log.d.ts.map +0 -1
package/build/utils_log.js +0 -192
package/build/utils_log.js.map +0 -1
package/build/utils_request.d.ts +0 -77
package/build/utils_request.d.ts.map +0 -1
package/build/utils_request.js +0 -385
package/build/utils_request.js.map +0 -1
package/build/utils_social.d.ts +0 -210
package/build/utils_social.d.ts.map +0 -1
package/build/utils_social.js +0 -787
package/build/utils_social.js.map +0 -1
package/build/validators.d.ts +0 -23
package/build/validators.d.ts.map +0 -1
package/build/validators.js +0 -29
package/build/validators.js.map +0 -1

package/build/constants.d.ts DELETED Viewed

@@ -1,37 +0,0 @@
-/**
- * The default user agent used by `Apify.launchPuppeteer`.
- * Last updated on 2020-05-22.
- */
-export const DEFAULT_USER_AGENT: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36";
-export namespace EXIT_CODES {
-    const SUCCESS: number;
-    const ERROR_USER_FUNCTION_THREW: number;
-    const ERROR_UNKNOWN: number;
-}
-/**
- * These events are just internal for Apify package, so we don't need them in apify-shared package.
- *
- * @type {{CPU_INFO: string, SYSTEM_INFO: string, MIGRATING: string, PERSIST_STATE: string, ABORTING: string}}
- */
-export const ACTOR_EVENT_NAMES_EX: {
-    CPU_INFO: string;
-    SYSTEM_INFO: string;
-    MIGRATING: string;
-    PERSIST_STATE: string;
-    ABORTING: string;
-};
-/**
- * Base URL of Apify's API endpoints.
- * @type {string}
- */
-export const APIFY_API_BASE_URL: string;
-/**
- * Additional number of seconds used in CheerioCrawler and BrowserCrawler to set a reasonable
- * handleRequestTimeoutSecs for BasicCrawler that would not impare functionality (not timeout before crawlers).
- *
- * @type {number}
- */
-export const BASIC_CRAWLER_TIMEOUT_BUFFER_SECS: number;
-export const COUNTRY_CODE_REGEX: RegExp;
-export const STATUS_CODES_BLOCKED: number[];
-//# sourceMappingURL=constants.d.ts.map

package/build/constants.d.ts.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"file":"constants.d.ts","sourceRoot":"","sources":["../src/constants.js"],"names":[],"mappings":"AAEA;;;GAGG;AAEH,4JAA6J;;;;;;AAa7J;;;;GAIG;AACH,mCAFU;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAC,CAEjB;AAE5F;;;GAGG;AACH,iCAFU,MAAM,CAE6C;AAE7D;;;;;GAKG;AACH,gDAFU,MAAM,CAEoC;AAEpD,wCAA+C;AAE/C,4CAAoD"}

package/build/constants.js DELETED Viewed

@@ -1,41 +0,0 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", { value: true });
-exports.STATUS_CODES_BLOCKED = exports.COUNTRY_CODE_REGEX = exports.BASIC_CRAWLER_TIMEOUT_BUFFER_SECS = exports.APIFY_API_BASE_URL = exports.ACTOR_EVENT_NAMES_EX = exports.EXIT_CODES = exports.DEFAULT_USER_AGENT = void 0;
-const consts_1 = require("@apify/consts");
-/**
- * The default user agent used by `Apify.launchPuppeteer`.
- * Last updated on 2020-05-22.
- */
-// eslint-disable-next-line max-len
-exports.DEFAULT_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36';
-/**
- * Exit codes for the actor process.
- * The error codes must be in the range 1-128, to avoid collision with signal exits
- * and to ensure Docker will handle them correctly!
- */
-exports.EXIT_CODES = {
-    SUCCESS: 0,
-    ERROR_USER_FUNCTION_THREW: 91,
-    ERROR_UNKNOWN: 92,
-};
-/**
- * These events are just internal for Apify package, so we don't need them in apify-shared package.
- *
- * @type {{CPU_INFO: string, SYSTEM_INFO: string, MIGRATING: string, PERSIST_STATE: string, ABORTING: string}}
- */
-exports.ACTOR_EVENT_NAMES_EX = { ...consts_1.ACTOR_EVENT_NAMES, PERSIST_STATE: 'persistState' };
-/**
- * Base URL of Apify's API endpoints.
- * @type {string}
- */
-exports.APIFY_API_BASE_URL = 'https://api.apify.com/v2';
-/**
- * Additional number of seconds used in CheerioCrawler and BrowserCrawler to set a reasonable
- * handleRequestTimeoutSecs for BasicCrawler that would not impare functionality (not timeout before crawlers).
- *
- * @type {number}
- */
-exports.BASIC_CRAWLER_TIMEOUT_BUFFER_SECS = 10;
-exports.COUNTRY_CODE_REGEX = /^[A-Z]{2}$/;
-exports.STATUS_CODES_BLOCKED = [401, 403, 429];
-//# sourceMappingURL=constants.js.map

package/build/constants.js.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"file":"constants.js","sourceRoot":"","sources":["../src/constants.js"],"names":[],"mappings":";;;AAAA,0CAAkD;AAElD;;;GAGG;AACH,mCAAmC;AACtB,QAAA,kBAAkB,GAAG,0HAA0H,CAAC;AAE7J;;;;GAIG;AACU,QAAA,UAAU,GAAG;IACtB,OAAO,EAAE,CAAC;IACV,yBAAyB,EAAE,EAAE;IAC7B,aAAa,EAAE,EAAE;CACpB,CAAC;AAEF;;;;GAIG;AACU,QAAA,oBAAoB,GAAG,EAAE,GAAG,0BAAiB,EAAE,aAAa,EAAE,cAAc,EAAE,CAAC;AAE5F;;;GAGG;AACU,QAAA,kBAAkB,GAAG,0BAA0B,CAAC;AAE7D;;;;;GAKG;AACU,QAAA,iCAAiC,GAAG,EAAE,CAAC;AAEvC,QAAA,kBAAkB,GAAG,YAAY,CAAC;AAElC,QAAA,oBAAoB,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC"}

package/build/crawlers/basic_crawler.d.ts DELETED Viewed

@@ -1,443 +0,0 @@
-/**
- * @typedef BasicCrawlerOptions
- * @property {HandleRequest} handleRequestFunction
- *   User-provided function that performs the logic of the crawler. It is called for each URL to crawl.
- *
- *   The function receives the following object as an argument:
- * ```
- * {
- *   request: Request,
- *   session: Session,
- *   crawler: BasicCrawler,
- * }
- * ```
- *   where the {@link Request} instance represents the URL to crawl.
- *
- *   The function must return a promise, which is then awaited by the crawler.
- *
- *   If the function throws an exception, the crawler will try to re-crawl the
- *   request later, up to `option.maxRequestRetries` times.
- *   If all the retries fail, the crawler calls the function
- *   provided to the `handleFailedRequestFunction` parameter.
- *   To make this work, you should **always**
- *   let your function throw exceptions rather than catch them.
- *   The exceptions are logged to the request using the
- *   {@link Request#pushErrorMessage} function.
- * @property {RequestList} [requestList]
- *   Static list of URLs to be processed.
- *   Either `requestList` or `requestQueue` option must be provided (or both).
- * @property {RequestQueue} [requestQueue]
- *   Dynamic queue of URLs to be processed. This is useful for recursive crawling of websites.
- *   Either `requestList` or `requestQueue` option must be provided (or both).
- * @property {number} [handleRequestTimeoutSecs=60]
- *   Timeout in which the function passed as `handleRequestFunction` needs to finish, in seconds.
- * @property {HandleFailedRequest} [handleFailedRequestFunction]
- *   A function to handle requests that failed more than `option.maxRequestRetries` times.
- *
- *   The function receives the following object as an argument:
- * ```
- * {
- *   request: Request,
- *   error: Error,
- *   session: Session,
- *   crawler: BasicCrawler,
- * }
- * ```
- *   where the {@link Request} instance corresponds to the failed request, and the `Error` instance
- *   represents the last error thrown during processing of the request.
- *
- *   See
- *   [source code](https://github.com/apify/apify-js/blob/master/src/crawlers/basic_crawler.js#L11)
- *   for the default implementation of this function.
- * @property {number} [maxRequestRetries=3]
- *   Indicates how many times the request is retried if {@link BasicCrawlerOptions.handleRequestFunction} fails.
- * @property {number} [maxRequestsPerCrawl]
- *   Maximum number of pages that the crawler will open. The crawl will stop when this limit is reached.
- *   Always set this value in order to prevent infinite loops in misconfigured crawlers.
- *   Note that in cases of parallel crawling, the actual number of pages visited might be slightly higher than this value.
- * @property {AutoscaledPoolOptions} [autoscaledPoolOptions]
- *   Custom options passed to the underlying {@link AutoscaledPool} constructor.
- *   Note that the `runTaskFunction` and `isTaskReadyFunction` options
- *   are provided by `BasicCrawler` and cannot be overridden.
- *   However, you can provide a custom implementation of `isFinishedFunction`.
- * @property {number} [minConcurrency=1]
- *   Sets the minimum concurrency (parallelism) for the crawl. Shortcut to the corresponding {@link AutoscaledPool} option.
- *
- *   *WARNING:* If you set this value too high with respect to the available system memory and CPU, your crawler will run extremely slow or crash.
- *   If you're not sure, just keep the default value and the concurrency will scale up automatically.
- * @property {number} [maxConcurrency=1000]
- *   Sets the maximum concurrency (parallelism) for the crawl. Shortcut to the corresponding {@link AutoscaledPool} option.
- * @property {boolean} [useSessionPool=true]
- *   Basic crawler will initialize the  {@link SessionPool} with the corresponding `sessionPoolOptions`.
- *   The session instance will be than available in the `handleRequestFunction`.
- * @property {SessionPoolOptions} [sessionPoolOptions] The configuration options for {@link SessionPool} to use.
- */
-/**
- * Provides a simple framework for parallel crawling of web pages.
- * The URLs to crawl are fed either from a static list of URLs
- * or from a dynamic queue of URLs enabling recursive crawling of websites.
- *
- * `BasicCrawler` is a low-level tool that requires the user to implement the page
- * download and data extraction functionality themselves.
- * If you want a crawler that already facilitates this functionality,
- * please consider using {@link CheerioCrawler}, {@link PuppeteerCrawler} or {@link PlaywrightCrawler}.
- *
- * `BasicCrawler` invokes the user-provided {@link BasicCrawlerOptions.handleRequestFunction}
- * for each {@link Request} object, which represents a single URL to crawl.
- * The {@link Request} objects are fed from the {@link RequestList} or the {@link RequestQueue}
- * instances provided by the {@link BasicCrawlerOptions.requestList} or {@link BasicCrawlerOptions.requestQueue}
- * constructor options, respectively.
- *
- * If both {@link BasicCrawlerOptions.requestList} and {@link BasicCrawlerOptions.requestQueue} options are used,
- * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
- * to {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
- *
- * The crawler finishes if there are no more {@link Request} objects to crawl.
- *
- * New requests are only dispatched when there is enough free CPU and memory available,
- * using the functionality provided by the {@link AutoscaledPool} class.
- * All {@link AutoscaledPool} configuration options can be passed to the `autoscaledPoolOptions`
- * parameter of the `BasicCrawler` constructor. For user convenience, the `minConcurrency` and `maxConcurrency`
- * {@link AutoscaledPool} options are available directly in the `BasicCrawler` constructor.
- *
- * **Example usage:**
- *
- * ```javascript
- * // Prepare a list of URLs to crawl
- * const requestList = new Apify.RequestList({
- *   sources: [
- *       { url: 'http://www.example.com/page-1' },
- *       { url: 'http://www.example.com/page-2' },
- *   ],
- * });
- * await requestList.initialize();
- *
- * // Crawl the URLs
- * const crawler = new Apify.BasicCrawler({
- *     requestList,
- *     handleRequestFunction: async ({ request }) => {
- *         // 'request' contains an instance of the Request class
- *         // Here we simply fetch the HTML of the page and store it to a dataset
- *         const { body } = await Apify.utils.requestAsBrowser(request);
- *         await Apify.pushData({
- *             url: request.url,
- *             html: body,
- *         })
- *     },
- * });
- *
- * await crawler.run();
- * ```
- * @property {Statistics} stats
- *  Contains statistics about the current run.
- * @property {RequestList} [requestList]
- *  A reference to the underlying {@link RequestList} class that manages the crawler's {@link Request}s.
- *  Only available if used by the crawler.
- * @property {RequestQueue} [requestQueue]
- *  A reference to the underlying {@link RequestQueue} class that manages the crawler's {@link Request}s.
- *  Only available if used by the crawler.
- * @property {SessionPool} [sessionPool]
- *  A reference to the underlying {@link SessionPool} class that manages the crawler's {@link Session}s.
- *  Only available if used by the crawler.
- * @property {AutoscaledPool} autoscaledPool
- *  A reference to the underlying {@link AutoscaledPool} class that manages the concurrency of the crawler.
- *  Note that this property is only initialized after calling the {@link BasicCrawler#run} function.
- *  You can use it to change the concurrency settings on the fly,
- *  to pause the crawler by calling {@link AutoscaledPool#pause}
- *  or to abort it by calling {@link AutoscaledPool#abort}.
- */
-export class BasicCrawler {
-    /**
-     * @internal
-     * @type any
-     */
-    static optionsShape: any;
-    /**
-     * @param {BasicCrawlerOptions} options
-     * All `BasicCrawler` parameters are passed via an options object.
-     */
-    constructor(options: BasicCrawlerOptions);
-    /** @type {Log} */
-    log: Log;
-    requestList: RequestList | undefined;
-    requestQueue: RequestQueue | undefined;
-    userProvidedHandler: HandleRequest;
-    failedContextHandler: HandleFailedRequest | undefined;
-    handleRequestTimeoutMillis: number;
-    internalTimeoutMillis: any;
-    handleFailedRequestFunction: HandleFailedRequest | undefined;
-    maxRequestRetries: number;
-    handledRequestsCount: number;
-    stats: Statistics;
-    /** @type {SessionPoolOptions} */
-    sessionPoolOptions: SessionPoolOptions;
-    useSessionPool: boolean;
-    crawlingContexts: Map<any, any>;
-    autoscaledPoolOptions: any;
-    isRunningPromise: Promise<void> | null;
-    /**
-     * Runs the crawler. Returns a promise that gets resolved once all the requests are processed.
-     *
-     * @return {Promise<void>}
-     */
-    run(): Promise<void>;
-    /**
-     * @return {Promise<void>}
-     * @ignore
-     * @protected
-     * @internal
-     */
-    protected _init(): Promise<void>;
-    autoscaledPool: AutoscaledPool | undefined;
-    sessionPool: import("../session_pool/session_pool").SessionPool | undefined;
-    /**
-     * @param {CrawlingContext} crawlingContext
-     * @return {Promise<void>}
-     * @ignore
-     * @protected
-     * @internal
-     */
-    protected _handleRequestFunction(crawlingContext: CrawlingContext): Promise<void>;
-    /**
-     * @ignore
-     * @protected
-     * @internal
-     */
-    protected _pauseOnMigration(): Promise<void>;
-    /**
-     * Fetches request from either RequestList or RequestQueue. If request comes from a RequestList
-     * and RequestQueue is present then enqueues it to the queue first.
-     *
-     * @ignore
-     * @protected
-     * @internal
-     */
-    protected _fetchNextRequest(): Promise<Request | null>;
-    /**
-     * Wrapper around handleRequestFunction that fetches requests from RequestList/RequestQueue
-     * then retries them in a case of an error, etc.
-     *
-     * @ignore
-     * @protected
-     * @internal
-     */
-    protected _runTaskFunction(): Promise<void>;
-    /**
-     * Run async callback with given timeout and retry.
-     * @ignore
-     */
-    _timeoutAndRetry(handler: any, timeout: any, error: any, maxRetries?: number, retried?: number): any;
-    /**
-     * Returns true if either RequestList or RequestQueue have a request ready for processing.
-     *
-     * @ignore
-     * @protected
-     * @internal
-     */
-    protected _isTaskReadyFunction(): Promise<boolean>;
-    /**
-     * Returns true if both RequestList and RequestQueue have all requests finished.
-     *
-     * @ignore
-     * @protected
-     * @internal
-     */
-    protected _defaultIsFinishedFunction(): Promise<boolean>;
-    /**
-     * Handles errors thrown by user provided handleRequestFunction()
-     * @param {Error} error
-     * @param {object} crawlingContext
-     * @param {Request} crawlingContext.request
-     * @param {(RequestList|RequestQueue)} source
-     * @return {Promise<void>}
-     * @ignore
-     * @protected
-     * @internal
-     */
-    protected _requestFunctionErrorHandler(error: Error, crawlingContext: {
-        request: Request;
-    }, source: (RequestList | RequestQueue)): Promise<void>;
-    /**
-     * @param {object} crawlingContext
-     * @param {Error} crawlingContext.error
-     * @param {Request} crawlingContext.request
-     * @return {Promise<void>}
-     * @ignore
-     * @protected
-     * @internal
-     */
-    protected _handleFailedRequestFunction(crawlingContext: {
-        error: Error;
-        request: Request;
-    }): Promise<void>;
-    /**
-     * Updates handledRequestsCount from possibly stored counts,
-     * usually after worker migration. Since one of the stores
-     * needs to have priority when both are present,
-     * it is the request queue, because generally, the request
-     * list will first be dumped into the queue and then left
-     * empty.
-     *
-     * @return {Promise<void>}
-     * @ignore
-     * @protected
-     * @internal
-     */
-    protected _loadHandledRequestCount(): Promise<void>;
-    /**
-     * @param {Array<any>} hooks
-     * @param  {*} args
-     * @ignore
-     * @protected
-     * @internal
-     */
-    protected _executeHooks(hooks: Array<any>, ...args: any): Promise<void>;
-    /**
-     * Function for cleaning up after all request are processed.
-     * @ignore
-     */
-    teardown(): Promise<void>;
-}
-export type CrawlingContext = {
-    id: string;
-    request: Request;
-    session: Session;
-    proxyInfo: ProxyInfo;
-    response: any;
-};
-export type BasicCrawlerOptions = {
-    /**
-     *   User-provided function that performs the logic of the crawler. It is called for each URL to crawl.
-     *
-     *   The function receives the following object as an argument:
-     * ```
-     * {
-     *   request: Request,
-     *   session: Session,
-     *   crawler: BasicCrawler,
-     * }
-     * ```
-     *   where the {@link Request } instance represents the URL to crawl.
-     *
-     *   The function must return a promise, which is then awaited by the crawler.
-     *
-     *   If the function throws an exception, the crawler will try to re-crawl the
-     *   request later, up to `option.maxRequestRetries` times.
-     *   If all the retries fail, the crawler calls the function
-     *   provided to the `handleFailedRequestFunction` parameter.
-     *   To make this work, you should **always**
-     *   let your function throw exceptions rather than catch them.
-     *   The exceptions are logged to the request using the
-     *   {@link RequestpushErrorMessage } function.
-     */
-    handleRequestFunction: HandleRequest;
-    /**
-     * Static list of URLs to be processed.
-     * Either `requestList` or `requestQueue` option must be provided (or both).
-     */
-    requestList?: RequestList | undefined;
-    /**
-     * Dynamic queue of URLs to be processed. This is useful for recursive crawling of websites.
-     * Either `requestList` or `requestQueue` option must be provided (or both).
-     */
-    requestQueue?: RequestQueue | undefined;
-    /**
-     * Timeout in which the function passed as `handleRequestFunction` needs to finish, in seconds.
-     */
-    handleRequestTimeoutSecs?: number | undefined;
-    /**
-     * A function to handle requests that failed more than `option.maxRequestRetries` times.
-     *
-     * The function receives the following object as an argument:
-     * ```
-     * {
-     * request: Request,
-     * error: Error,
-     * session: Session,
-     * crawler: BasicCrawler,
-     * }
-     * ```
-     * where the {@link Request } instance corresponds to the failed request, and the `Error` instance
-     * represents the last error thrown during processing of the request.
-     *
-     * See
-     * [source code](https://github.com/apify/apify-js/blob/master/src/crawlers/basic_crawler.js#L11)
-     * for the default implementation of this function.
-     */
-    handleFailedRequestFunction?: HandleFailedRequest | undefined;
-    /**
-     * Indicates how many times the request is retried if {@link BasicCrawlerOptions.handleRequestFunction } fails.
-     */
-    maxRequestRetries?: number | undefined;
-    /**
-     * Maximum number of pages that the crawler will open. The crawl will stop when this limit is reached.
-     * Always set this value in order to prevent infinite loops in misconfigured crawlers.
-     * Note that in cases of parallel crawling, the actual number of pages visited might be slightly higher than this value.
-     */
-    maxRequestsPerCrawl?: number | undefined;
-    /**
-     * Custom options passed to the underlying {@link AutoscaledPool } constructor.
-     * Note that the `runTaskFunction` and `isTaskReadyFunction` options
-     * are provided by `BasicCrawler` and cannot be overridden.
-     * However, you can provide a custom implementation of `isFinishedFunction`.
-     */
-    autoscaledPoolOptions?: AutoscaledPoolOptions | undefined;
-    /**
-     * Sets the minimum concurrency (parallelism) for the crawl. Shortcut to the corresponding {@link AutoscaledPool } option.
-     *
-     * *WARNING:* If you set this value too high with respect to the available system memory and CPU, your crawler will run extremely slow or crash.
-     * If you're not sure, just keep the default value and the concurrency will scale up automatically.
-     */
-    minConcurrency?: number | undefined;
-    /**
-     * Sets the maximum concurrency (parallelism) for the crawl. Shortcut to the corresponding {@link AutoscaledPool } option.
-     */
-    maxConcurrency?: number | undefined;
-    /**
-     * Basic crawler will initialize the  {@link SessionPool } with the corresponding `sessionPoolOptions`.
-     * The session instance will be than available in the `handleRequestFunction`.
-     */
-    useSessionPool?: boolean | undefined;
-    /**
-     * The configuration options for {@link SessionPool } to use.
-     */
-    sessionPoolOptions?: SessionPoolOptions | undefined;
-};
-export type HandleRequest = (inputs: HandleRequestInputs) => Promise<void>;
-export type HandleRequestInputs = {
-    /**
-     * The original {Request} object.
-     * A reference to the underlying {@link AutoscaledPool } class that manages the concurrency of the crawler.
-     * Note that this property is only initialized after calling the {@link BasicCrawlerrun } function.
-     * You can use it to change the concurrency settings on the fly,
-     * to pause the crawler by calling {@link AutoscaledPoolpause }
-     * or to abort it by calling {@link AutoscaledPoolabort }.
-     */
-    request: Request;
-    session?: Session | undefined;
-    crawler?: BasicCrawler | undefined;
-};
-export type HandleFailedRequest = (inputs: HandleFailedRequestInput) => Promise<void>;
-export type HandleFailedRequestInput = {
-    /**
-     * The Error thrown by `handleRequestFunction`.
-     */
-    error: Error;
-    /**
-     * The original {Request} object.
-     */
-    request: Request;
-    session: Session;
-    proxyInfo: ProxyInfo;
-};
-import { Log } from "../utils_log";
-import { RequestList } from "../request_list";
-import { RequestQueue } from "../storages/request_queue";
-import Statistics from "./statistics";
-import { SessionPoolOptions } from "../session_pool/session_pool";
-import AutoscaledPool from "../autoscaling/autoscaled_pool";
-import Request from "../request";
-import { Session } from "../session_pool/session";
-import { ProxyInfo } from "../proxy_configuration";
-import { AutoscaledPoolOptions } from "../autoscaling/autoscaled_pool";
-//# sourceMappingURL=basic_crawler.d.ts.map

package/build/crawlers/basic_crawler.d.ts.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"file":"basic_crawler.d.ts","sourceRoot":"","sources":["../../src/crawlers/basic_crawler.js"],"names":[],"mappings":"AA+CA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAyEG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAyEG;AACH;IACI;;;OAGG;IACH,qBAFS,GAAG,CAuBV;IAEF;;;OAGG;IACH,qBAHW,mBAAmB,EAsH7B;IAtFG,kBAAkB;IAClB,KADW,GAAG,CACA;IACd,qCAA8B;IAC9B,uCAAgC;IAChC,mCAAgD;IAChD,sDAAuD;IACvD,mCAAiE;IAGjE,2BAA+H;IAK/H,6DAA8D;IAC9D,0BAA0C;IAC1C,6BAA6B;IAC7B,kBAA6F;IAC7F,iCAAiC;IACjC,oBADW,kBAAkB,CAI5B;IACD,wBAAoC;IACpC,gCAAiC;IAuDjC,2BAA2G;IAE3G,uCAA4B;IAOhC;;;;OAIG;IACH,OAFY,QAAQ,IAAI,CAAC,CAuBxB;IAED;;;;;OAKG;IACH,mBALY,QAAQ,IAAI,CAAC,CAkBxB;IATG,2CAAoE;IAGhE,4EAAiE;IAQzE;;;;;;OAMG;IACH,kDANW,eAAe,GACd,QAAQ,IAAI,CAAC,CAOxB;IAED;;;;OAIG;IACH,6CAmCC;IAED;;;;;;;OAOG;IACH,uDAiBC;IAED;;;;;;;OAOG;IACH,4CAiFC;IAED;;;OAGG;IACH,qGAWC;IAED;;;;;;OAMG;IACH,mDAOC;IAED;;;;;;OAMG;IACH,yDAUC;IAED;;;;;;;;;;OAUG;IACH,8CATW,KAAK;QAEoB,OAAO,EAAhC,OAAO;eACP,CAAC,WAAW,GAAC,YAAY,CAAC,GACzB,QAAQ,IAAI,CAAC,CA4BxB;IAED;;;;;;;;OAQG;IACH;QAPkC,KAAK,EAA5B,KAAK;QACoB,OAAO,EAAhC,OAAO;QACN,QAAQ,IAAI,CAAC,CAgBxB;IAED;;;;;;;;;;;;OAYG;IACH,sCALY,QAAQ,IAAI,CAAC,CAWxB;IAED;;;;;;OAMG;IACH,+BANW,MAAM,GAAG,CAAC,+BAYpB;IAED;;;OAGG;IACH,0BAIC;CACJ;;QArrBa,MAAM;aACN,OAAO;aACP,OAAO;eACP,SAAS;;;;;;;;;;;;;;;;;;;;;;;;;;;;2BAmBT,aAAa;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qCAmqBhB,mBAAmB,KACjB,QAAQ,IAAI,CAAC;;;;;;;;;;aAIZ,OAAO;;;;2CAYV,wBAAwB,KACtB,QAAQ,IAAI,CAAC;;;;;WAKZ,KAAK;;;;aACL,OAAO;aACP,OAAO;eACP,SAAS"}