npm - @crawlee/core - Versions diffs - 3.5.5-beta.8 → 3.5.5 - Mend

@crawlee/core 3.5.5-beta.8 → 3.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/enqueue_links/enqueue_links.d.ts +2 -2
package/enqueue_links/enqueue_links.d.ts.map +1 -1
package/index.mjs +2 -0
package/package.json +5 -5
package/request.d.ts +2 -0
package/request.d.ts.map +1 -1
package/request.js +5 -6
package/request.js.map +1 -1
package/storages/index.d.ts +2 -0
package/storages/index.d.ts.map +1 -1
package/storages/index.js +2 -0
package/storages/index.js.map +1 -1
package/storages/request_provider.d.ts +262 -0
package/storages/request_provider.d.ts.map +1 -0
package/storages/request_provider.js +602 -0
package/storages/request_provider.js.map +1 -0
package/storages/request_queue.d.ts +17 -299
package/storages/request_queue.d.ts.map +1 -1
package/storages/request_queue.js +62 -645
package/storages/request_queue.js.map +1 -1
package/storages/request_queue_v2.d.ts +41 -0
package/storages/request_queue_v2.d.ts.map +1 -0
package/storages/request_queue_v2.js +250 -0
package/storages/request_queue_v2.js.map +1 -0
package/storages/storage_manager.d.ts.map +1 -1
package/storages/storage_manager.js.map +1 -1
package/storages/utils.d.ts +34 -0
package/storages/utils.d.ts.map +1 -1
package/storages/utils.js +45 -1
package/storages/utils.js.map +1 -1
package/tsconfig.build.tsbuildinfo +1 -1

package/storages/request_queue.js CHANGED Viewed

@@ -1,68 +1,18 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.RequestQueue = exports.getRequestId = exports.STORAGE_CONSISTENCY_DELAY_MILLIS = exports.MAX_QUERIES_FOR_CONSISTENCY = exports.API_PROCESSED_REQUESTS_DELAY_MILLIS = exports.QUERY_HEAD_BUFFER = exports.QUERY_HEAD_MIN_LENGTH = void 0;
-const tslib_1 = require("tslib");
-const node_crypto_1 = tslib_1.__importDefault(require("node:crypto"));
+exports.RequestQueue = void 0;
 const promises_1 = require("node:timers/promises");
 const consts_1 = require("@apify/consts");
-const datastructures_1 = require("@apify/datastructures");
-const utilities_1 = require("@apify/utilities");
-const utils_1 = require("@crawlee/utils");
-const ow_1 = tslib_1.__importDefault(require("ow"));
-const storage_manager_1 = require("./storage_manager");
-const utils_2 = require("./utils");
+const request_provider_1 = require("./request_provider");
+const utils_1 = require("./utils");
 const configuration_1 = require("../configuration");
-const log_1 = require("../log");
-const request_1 = require("../request");
 const MAX_CACHED_REQUESTS = 1000000;
-/**
- * When requesting queue head we always fetch requestsInProgressCount * QUERY_HEAD_BUFFER number of requests.
- * @internal
- */
-exports.QUERY_HEAD_MIN_LENGTH = 100;
-/** @internal */
-exports.QUERY_HEAD_BUFFER = 3;
-/**
- * If queue was modified (request added/updated/deleted) before more than API_PROCESSED_REQUESTS_DELAY_MILLIS
- * then we assume the get head operation to be consistent.
- * @internal
- */
-exports.API_PROCESSED_REQUESTS_DELAY_MILLIS = 10000;
-/**
- * How many times we try to get queue head with queueModifiedAt older than API_PROCESSED_REQUESTS_DELAY_MILLIS.
- * @internal
- */
-exports.MAX_QUERIES_FOR_CONSISTENCY = 6;
 /**
  * This number must be large enough so that processing of all these requests cannot be done in
  * a time lower than expected maximum latency of DynamoDB, but low enough not to waste too much memory.
  * @internal
  */
 const RECENTLY_HANDLED_CACHE_SIZE = 1000;
-/**
- * Indicates how long it usually takes for the underlying storage to propagate all writes
- * to be available to subsequent reads.
- * @internal
- */
-exports.STORAGE_CONSISTENCY_DELAY_MILLIS = 3000;
-/**
- * Helper function that creates ID from uniqueKey for local emulation of request queue.
- * It's also used for local cache of remote request queue.
- *
- * This function may not exactly match how requestId is created server side.
- * So we never pass requestId created by this to server and use it only for local cache.
- *
- * @internal
- */
-function getRequestId(uniqueKey) {
-    const str = node_crypto_1.default
-        .createHash('sha256')
-        .update(uniqueKey)
-        .digest('base64')
-        .replace(/[+/=]/g, '');
-    return str.substr(0, 15);
-}
-exports.getRequestId = getRequestId;
 /**
  * Represents a queue of URLs to crawl, which is used for deep crawling of websites
  * where you start with several URLs and then recursively
@@ -109,361 +59,29 @@ exports.getRequestId = getRequestId;
  * ```
  * @category Sources
  */
-class RequestQueue {
+class RequestQueue extends request_provider_1.RequestProvider {
     /**
      * @internal
      */
     constructor(options, config = configuration_1.Configuration.getGlobalConfig()) {
-        Object.defineProperty(this, "config", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: config
-        });
-        Object.defineProperty(this, "log", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: log_1.log.child({ prefix: 'RequestQueue' })
-        });
-        Object.defineProperty(this, "id", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "name", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "timeoutSecs", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: 30
-        });
-        Object.defineProperty(this, "clientKey", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: (0, utilities_1.cryptoRandomObjectId)()
-        });
-        Object.defineProperty(this, "client", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        Object.defineProperty(this, "proxyConfiguration", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: void 0
-        });
-        /**
-         * Contains a cached list of request IDs from the head of the queue,
-         * as obtained in the last query. Both key and value is the request ID.
-         * Need to apply a type here to the generated TS types don't try to use types-apify
-         */
-        Object.defineProperty(this, "queueHeadDict", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: new datastructures_1.ListDictionary()
-        });
+        super({
+            ...options,
+            logPrefix: 'RequestQueue',
+            recentlyHandledRequestsMaxSize: RECENTLY_HANDLED_CACHE_SIZE,
+            requestCacheMaxSize: MAX_CACHED_REQUESTS,
+        }, config);
         Object.defineProperty(this, "queryQueueHeadPromise", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: null
         });
-        // A set of all request IDs that are currently being handled,
-        // i.e. which were returned by fetchNextRequest() but not markRequestHandled()
-        Object.defineProperty(this, "inProgress", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: new Set()
-        });
-        // To track whether the queue gets stuck, and we need to reset it
-        // `lastActivity` tracks the time when we either added, processed or reclaimed a request,
-        // or when we add new request to in-progress cache
         Object.defineProperty(this, "lastActivity", {
             enumerable: true,
             configurable: true,
             writable: true,
             value: new Date()
         });
-        Object.defineProperty(this, "internalTimeoutMillis", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: 5 * 60e3
-        }); // defaults to 5 minutes, will be overridden by BasicCrawler
-        // Contains a list of recently handled requests. It is used to avoid inconsistencies
-        // caused by delays in the underlying DynamoDB storage.
-        // Keys are request IDs, values are true.
-        Object.defineProperty(this, "recentlyHandled", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: new datastructures_1.LruCache({ maxLength: RECENTLY_HANDLED_CACHE_SIZE })
-        });
-        // We can trust these numbers only in a case that queue is used by a single client.
-        // This information is returned by getHead() under the hadMultipleClients property.
-        Object.defineProperty(this, "assumedTotalCount", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: 0
-        });
-        Object.defineProperty(this, "assumedHandledCount", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: 0
-        });
-        // Caching requests to avoid redundant addRequest() calls.
-        // Key is computed using getRequestId() and value is { id, isHandled }.
-        Object.defineProperty(this, "requestsCache", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: new datastructures_1.LruCache({ maxLength: MAX_CACHED_REQUESTS })
-        });
-        this.id = options.id;
-        this.name = options.name;
-        this.client = options.client.requestQueue(this.id, {
-            clientKey: this.clientKey,
-            timeoutSecs: this.timeoutSecs,
-        });
-        this.proxyConfiguration = options.proxyConfiguration;
-    }
-    /**
-     * @ignore
-     */
-    inProgressCount() {
-        return this.inProgress.size;
-    }
-    /**
-     * Adds a request to the queue.
-     *
-     * If a request with the same `uniqueKey` property is already present in the queue,
-     * it will not be updated. You can find out whether this happened from the resulting
-     * {@apilink QueueOperationInfo} object.
-     *
-     * To add multiple requests to the queue by extracting links from a webpage,
-     * see the {@apilink enqueueLinks} helper function.
-     *
-     * @param requestLike {@apilink Request} object or vanilla object with request data.
-     * Note that the function sets the `uniqueKey` and `id` fields to the passed Request.
-     * @param [options] Request queue operation options.
-     */
-    async addRequest(requestLike, options = {}) {
-        (0, ow_1.default)(requestLike, ow_1.default.object);
-        (0, ow_1.default)(options, ow_1.default.object.exactShape({
-            forefront: ow_1.default.optional.boolean,
-        }));
-        this.lastActivity = new Date();
-        const { forefront = false } = options;
-        if ('requestsFromUrl' in requestLike) {
-            const requests = await this._fetchRequestsFromUrl(requestLike);
-            const processedRequests = await this._addFetchedRequests(requestLike, requests, options);
-            return processedRequests[0];
-        }
-        (0, ow_1.default)(requestLike, ow_1.default.object.partialShape({
-            url: ow_1.default.string,
-            id: ow_1.default.undefined,
-        }));
-        const request = requestLike instanceof request_1.Request
-            ? requestLike
-            : new request_1.Request(requestLike);
-        const cacheKey = getRequestId(request.uniqueKey);
-        const cachedInfo = this.requestsCache.get(cacheKey);
-        if (cachedInfo) {
-            request.id = cachedInfo.id;
-            return {
-                wasAlreadyPresent: true,
-                // We may assume that if request is in local cache then also the information if the
-                // request was already handled is there because just one client should be using one queue.
-                wasAlreadyHandled: cachedInfo.isHandled,
-                requestId: cachedInfo.id,
-                uniqueKey: cachedInfo.uniqueKey,
-            };
-        }
-        const queueOperationInfo = await this.client.addRequest(request, { forefront });
-        queueOperationInfo.uniqueKey = request.uniqueKey;
-        const { requestId, wasAlreadyPresent } = queueOperationInfo;
-        this._cacheRequest(cacheKey, queueOperationInfo);
-        if (!wasAlreadyPresent && !this.inProgress.has(requestId) && !this.recentlyHandled.get(requestId)) {
-            this.assumedTotalCount++;
-            // Performance optimization: add request straight to head if possible
-            this._maybeAddRequestToQueueHead(requestId, forefront);
-        }
-        return queueOperationInfo;
-    }
-    /**
-     * Adds requests to the queue in batches of 25.
-     *
-     * If a request that is passed in is already present due to its `uniqueKey` property being the same,
-     * it will not be updated. You can find out whether this happened by finding the request in the resulting
-     * {@apilink BatchAddRequestsResult} object.
-     *
-     * @param requestsLike {@apilink Request} objects or vanilla objects with request data.
-     * Note that the function sets the `uniqueKey` and `id` fields to the passed requests if missing.
-     * @param [options] Request queue operation options.
-     */
-    async addRequests(requestsLike, options = {}) {
-        (0, ow_1.default)(requestsLike, ow_1.default.array);
-        (0, ow_1.default)(options, ow_1.default.object.exactShape({
-            forefront: ow_1.default.optional.boolean,
-        }));
-        const { forefront = false } = options;
-        const uniqueKeyToCacheKey = new Map();
-        const getCachedRequestId = (uniqueKey) => {
-            const cached = uniqueKeyToCacheKey.get(uniqueKey);
-            if (cached)
-                return cached;
-            const newCacheKey = getRequestId(uniqueKey);
-            uniqueKeyToCacheKey.set(uniqueKey, newCacheKey);
-            return newCacheKey;
-        };
-        const results = {
-            processedRequests: [],
-            unprocessedRequests: [],
-        };
-        for (const requestLike of requestsLike) {
-            if ('requestsFromUrl' in requestLike) {
-                const requests = await this._fetchRequestsFromUrl(requestLike);
-                await this._addFetchedRequests(requestLike, requests, options);
-            }
-        }
-        const requests = requestsLike
-            .filter((requestLike) => !('requestsFromUrl' in requestLike))
-            .map((requestLike) => {
-            return requestLike instanceof request_1.Request ? requestLike : new request_1.Request(requestLike);
-        });
-        const requestsToAdd = new Map();
-        for (const request of requests) {
-            const cacheKey = getCachedRequestId(request.uniqueKey);
-            const cachedInfo = this.requestsCache.get(cacheKey);
-            if (cachedInfo) {
-                request.id = cachedInfo.id;
-                results.processedRequests.push({
-                    wasAlreadyPresent: true,
-                    // We may assume that if request is in local cache then also the information if the
-                    // request was already handled is there because just one client should be using one queue.
-                    wasAlreadyHandled: cachedInfo.isHandled,
-                    requestId: cachedInfo.id,
-                    uniqueKey: cachedInfo.uniqueKey,
-                });
-            }
-            else if (!requestsToAdd.has(request.uniqueKey)) {
-                requestsToAdd.set(request.uniqueKey, request);
-            }
-        }
-        // Early exit if all provided requests were already added
-        if (!requestsToAdd.size) {
-            return results;
-        }
-        const apiResults = await this.client.batchAddRequests([...requestsToAdd.values()], { forefront });
-        // Report unprocessed requests
-        results.unprocessedRequests = apiResults.unprocessedRequests;
-        // Add all new requests to the queue head
-        for (const newRequest of apiResults.processedRequests) {
-            // Add the new request to the processed list
-            results.processedRequests.push(newRequest);
-            const cacheKey = getCachedRequestId(newRequest.uniqueKey);
-            const { requestId, wasAlreadyPresent } = newRequest;
-            this._cacheRequest(cacheKey, newRequest);
-            if (!wasAlreadyPresent && !this.inProgress.has(requestId) && !this.recentlyHandled.get(requestId)) {
-                this.assumedTotalCount++;
-                // Performance optimization: add request straight to head if possible
-                this._maybeAddRequestToQueueHead(requestId, forefront);
-            }
-        }
-        return results;
-    }
-    /**
-     * Adds requests to the queue in batches. By default, it will resolve after the initial batch is added, and continue
-     * adding the rest in background. You can configure the batch size via `batchSize` option and the sleep time in between
-     * the batches via `waitBetweenBatchesMillis`. If you want to wait for all batches to be added to the queue, you can use
-     * the `waitForAllRequestsToBeAdded` promise you get in the response object.
-     *
-     * @param requests The requests to add
-     * @param options Options for the request queue
-     */
-    async addRequestsBatched(requests, options = {}) {
-        (0, ow_1.default)(requests, ow_1.default.array.ofType(ow_1.default.any(ow_1.default.string, ow_1.default.object.partialShape({ url: ow_1.default.string, id: ow_1.default.undefined }), ow_1.default.object.partialShape({ requestsFromUrl: ow_1.default.string, regex: ow_1.default.optional.regExp }))));
-        (0, ow_1.default)(options, ow_1.default.object.exactShape({
-            forefront: ow_1.default.optional.boolean,
-            waitForAllRequestsToBeAdded: ow_1.default.optional.boolean,
-            batchSize: ow_1.default.optional.number,
-            waitBetweenBatchesMillis: ow_1.default.optional.number,
-        }));
-        const { batchSize = 1000, waitBetweenBatchesMillis = 1000, } = options;
-        const builtRequests = [];
-        for (const opts of requests) {
-            if (opts && typeof opts === 'object' && 'requestsFromUrl' in opts) {
-                await this.addRequest(opts, { forefront: options.forefront });
-            }
-            else {
-                builtRequests.push(new request_1.Request(typeof opts === 'string' ? { url: opts } : opts));
-            }
-        }
-        const attemptToAddToQueueAndAddAnyUnprocessed = async (providedRequests) => {
-            const resultsToReturn = [];
-            const apiResult = await this.addRequests(providedRequests, { forefront: options.forefront });
-            resultsToReturn.push(...apiResult.processedRequests);
-            if (apiResult.unprocessedRequests.length) {
-                await (0, promises_1.setTimeout)(waitBetweenBatchesMillis);
-                resultsToReturn.push(...await attemptToAddToQueueAndAddAnyUnprocessed(providedRequests.filter((r) => !apiResult.processedRequests.some((pr) => pr.uniqueKey === r.uniqueKey))));
-            }
-            return resultsToReturn;
-        };
-        const initialChunk = builtRequests.splice(0, batchSize);
-        // Add initial batch of `batchSize` to process them right away
-        const addedRequests = await attemptToAddToQueueAndAddAnyUnprocessed(initialChunk);
-        // If we have no more requests to add, return early
-        if (!builtRequests.length) {
-            return {
-                addedRequests,
-                waitForAllRequestsToBeAdded: Promise.resolve([]),
-            };
-        }
-        // eslint-disable-next-line no-async-promise-executor
-        const promise = new Promise(async (resolve) => {
-            const chunks = (0, utils_1.chunk)(builtRequests, batchSize);
-            const finalAddedRequests = [];
-            for (const requestChunk of chunks) {
-                finalAddedRequests.push(...await attemptToAddToQueueAndAddAnyUnprocessed(requestChunk));
-                await (0, promises_1.setTimeout)(waitBetweenBatchesMillis);
-            }
-            resolve(finalAddedRequests);
-        });
-        // If the user wants to wait for all the requests to be added, we wait for the promise to resolve for them
-        if (options.waitForAllRequestsToBeAdded) {
-            addedRequests.push(...await promise);
-        }
-        return {
-            addedRequests,
-            waitForAllRequestsToBeAdded: promise,
-        };
-    }
-    /**
-     * Gets the request from the queue specified by ID.
-     *
-     * @param id ID of the request.
-     * @returns Returns the request object, or `null` if it was not found.
-     */
-    async getRequest(id) {
-        (0, ow_1.default)(id, ow_1.default.string);
-        const requestOptions = await this.client.getRequest(id);
-        if (!requestOptions)
-            return null;
-        return new request_1.Request(requestOptions);
     }
     /**
      * Returns a next request in the queue to be processed, or `null` if there are no more pending requests.
@@ -483,17 +101,17 @@ class RequestQueue {
      *   Returns the request object or `null` if there are no more pending requests.
      */
     async fetchNextRequest() {
-        await this._ensureHeadIsNonEmpty();
-        const nextRequestId = this.queueHeadDict.removeFirst();
+        await this.ensureHeadIsNonEmpty();
+        const nextRequestId = this.queueHeadIds.removeFirst();
         // We are likely done at this point.
         if (!nextRequestId)
             return null;
         // This should never happen, but...
-        if (this.inProgress.has(nextRequestId) || this.recentlyHandled.get(nextRequestId)) {
+        if (this.inProgress.has(nextRequestId) || this.recentlyHandledRequestsCache.get(nextRequestId)) {
             this.log.warning('Queue head returned a request that is already in progress?!', {
                 nextRequestId,
                 inProgress: this.inProgress.has(nextRequestId),
-                recentlyHandled: !!this.recentlyHandled.get(nextRequestId),
+                recentlyHandled: !!this.recentlyHandledRequestsCache.get(nextRequestId),
             });
             return null;
         }
@@ -518,7 +136,7 @@ class RequestQueue {
             this.log.debug('Cannot find a request from the beginning of queue, will be retried later', { nextRequestId });
             setTimeout(() => {
                 this.inProgress.delete(nextRequestId);
-            }, exports.STORAGE_CONSISTENCY_DELAY_MILLIS);
+            }, utils_1.STORAGE_CONSISTENCY_DELAY_MILLIS);
             return null;
         }
         // 2) Queue head index is behind the main table and the underlying request was already handled
@@ -527,125 +145,14 @@ class RequestQueue {
         //    will not put the request again to queueHeadDict.
         if (request.handledAt) {
             this.log.debug('Request fetched from the beginning of queue was already handled', { nextRequestId });
-            this.recentlyHandled.add(nextRequestId, true);
+            this.recentlyHandledRequestsCache.add(nextRequestId, true);
             return null;
         }
         return request;
     }
-    /**
-     * Marks a request that was previously returned by the
-     * {@apilink RequestQueue.fetchNextRequest}
-     * function as handled after successful processing.
-     * Handled requests will never again be returned by the `fetchNextRequest` function.
-     */
-    async markRequestHandled(request) {
-        this.lastActivity = new Date();
-        (0, ow_1.default)(request, ow_1.default.object.partialShape({
-            id: ow_1.default.string,
-            uniqueKey: ow_1.default.string,
-            handledAt: ow_1.default.optional.string,
-        }));
-        if (!this.inProgress.has(request.id)) {
-            this.log.debug(`Cannot mark request ${request.id} as handled, because it is not in progress!`, { requestId: request.id });
-            return null;
-        }
-        const handledAt = request.handledAt ?? new Date().toISOString();
-        const queueOperationInfo = await this.client.updateRequest({ ...request, handledAt });
-        request.handledAt = handledAt;
-        queueOperationInfo.uniqueKey = request.uniqueKey;
-        this.inProgress.delete(request.id);
-        this.recentlyHandled.add(request.id, true);
-        if (!queueOperationInfo.wasAlreadyHandled) {
-            this.assumedHandledCount++;
-        }
-        this._cacheRequest(getRequestId(request.uniqueKey), queueOperationInfo);
-        return queueOperationInfo;
-    }
-    /**
-     * Reclaims a failed request back to the queue, so that it can be returned for processing later again
-     * by another call to {@apilink RequestQueue.fetchNextRequest}.
-     * The request record in the queue is updated using the provided `request` parameter.
-     * For example, this lets you store the number of retries or error messages for the request.
-     */
-    async reclaimRequest(request, options = {}) {
-        this.lastActivity = new Date();
-        (0, ow_1.default)(request, ow_1.default.object.partialShape({
-            id: ow_1.default.string,
-            uniqueKey: ow_1.default.string,
-        }));
-        (0, ow_1.default)(options, ow_1.default.object.exactShape({
-            forefront: ow_1.default.optional.boolean,
-        }));
-        const { forefront = false } = options;
-        if (!this.inProgress.has(request.id)) {
-            this.log.debug(`Cannot reclaim request ${request.id}, because it is not in progress!`, { requestId: request.id });
-            return null;
-        }
-        // TODO: If request hasn't been changed since the last getRequest(),
-        //   we don't need to call updateRequest() and thus improve performance.
-        const queueOperationInfo = await this.client.updateRequest(request, { forefront });
-        queueOperationInfo.uniqueKey = request.uniqueKey;
-        this._cacheRequest(getRequestId(request.uniqueKey), queueOperationInfo);
-        // Wait a little to increase a chance that the next call to fetchNextRequest() will return the request with updated data.
-        // This is to compensate for the limitation of DynamoDB, where writes might not be immediately visible to subsequent reads.
-        setTimeout(() => {
-            if (!this.inProgress.has(request.id)) {
-                this.log.debug('The request is no longer marked as in progress in the queue?!', { requestId: request.id });
-                return;
-            }
-            this.inProgress.delete(request.id);
-            // Performance optimization: add request straight to head if possible
-            this._maybeAddRequestToQueueHead(request.id, forefront);
-        }, exports.STORAGE_CONSISTENCY_DELAY_MILLIS);
-        return queueOperationInfo;
-    }
-    /**
-     * Resolves to `true` if the next call to {@apilink RequestQueue.fetchNextRequest}
-     * would return `null`, otherwise it resolves to `false`.
-     * Note that even if the queue is empty, there might be some pending requests currently being processed.
-     * If you need to ensure that there is no activity in the queue, use {@apilink RequestQueue.isFinished}.
-     */
-    async isEmpty() {
+    async ensureHeadIsNonEmpty() {
+        // Alias for backwards compatibility
         await this._ensureHeadIsNonEmpty();
-        return this.queueHeadDict.length() === 0;
-    }
-    /**
-     * Resolves to `true` if all requests were already handled and there are no more left.
-     * Due to the nature of distributed storage used by the queue,
-     * the function might occasionally return a false negative,
-     * but it will never return a false positive.
-     */
-    async isFinished() {
-        if ((Date.now() - +this.lastActivity) > this.internalTimeoutMillis) {
-            const message = `The request queue seems to be stuck for ${this.internalTimeoutMillis / 1e3}s, resetting internal state.`;
-            this.log.warning(message, { inProgress: [...this.inProgress] });
-            this._reset();
-        }
-        if (this.queueHeadDict.length() > 0 || this.inProgressCount() > 0)
-            return false;
-        const isHeadConsistent = await this._ensureHeadIsNonEmpty(true);
-        return isHeadConsistent && this.queueHeadDict.length() === 0 && this.inProgressCount() === 0;
-    }
-    _reset() {
-        this.queueHeadDict.clear();
-        this.queryQueueHeadPromise = null;
-        this.inProgress.clear();
-        this.recentlyHandled.clear();
-        this.assumedTotalCount = 0;
-        this.assumedHandledCount = 0;
-        this.requestsCache.clear();
-        this.lastActivity = new Date();
-    }
-    /**
-     * Caches information about request to beware of unneeded addRequest() calls.
-     */
-    _cacheRequest(cacheKey, queueOperationInfo) {
-        this.requestsCache.add(cacheKey, {
-            id: queueOperationInfo.requestId,
-            isHandled: queueOperationInfo.wasAlreadyHandled,
-            uniqueKey: queueOperationInfo.uniqueKey,
-            wasAlreadyHandled: queueOperationInfo.wasAlreadyHandled,
-        });
     }
     /**
      * We always request more items than is in progress to ensure that something falls into head.
@@ -658,10 +165,15 @@ class RequestQueue {
      * @param [iteration] Used when this function is called recursively to limit the recursion.
      * @returns Indicates if queue head is consistent (true) or inconsistent (false).
      */
-    async _ensureHeadIsNonEmpty(ensureConsistency = false, limit = Math.max(this.inProgressCount() * exports.QUERY_HEAD_BUFFER, exports.QUERY_HEAD_MIN_LENGTH), iteration = 0) {
+    async _ensureHeadIsNonEmpty(ensureConsistency = false, limit = Math.max(this.inProgressCount() * utils_1.QUERY_HEAD_BUFFER, utils_1.QUERY_HEAD_MIN_LENGTH), iteration = 0) {
+        // If we are paused for migration, resolve immediately.
+        if (this.queuePausedForMigration) {
+            return true;
+        }
         // If is nonempty resolve immediately.
-        if (this.queueHeadDict.length() > 0)
+        if (this.queueHeadIds.length() > 0) {
             return true;
+        }
         if (!this.queryQueueHeadPromise) {
             const queryStartedAt = new Date();
             this.queryQueueHeadPromise = this.client
@@ -669,10 +181,10 @@ class RequestQueue {
                 .then(({ items, queueModifiedAt, hadMultipleClients }) => {
                 items.forEach(({ id: requestId, uniqueKey }) => {
                     // Queue head index might be behind the main table, so ensure we don't recycle requests
-                    if (!requestId || !uniqueKey || this.inProgress.has(requestId) || this.recentlyHandled.get(requestId))
+                    if (!requestId || !uniqueKey || this.inProgress.has(requestId) || this.recentlyHandledRequestsCache.get(requestId))
                         return;
-                    this.queueHeadDict.add(requestId, requestId, false);
-                    this._cacheRequest(getRequestId(uniqueKey), {
+                    this.queueHeadIds.add(requestId, requestId, false);
+                    this._cacheRequest((0, utils_1.getRequestId)(uniqueKey), {
                         requestId,
                         wasAlreadyHandled: false,
                         wasAlreadyPresent: true,
@@ -701,13 +213,13 @@ class RequestQueue {
         if (prevLimit >= consts_1.REQUEST_QUEUE_HEAD_MAX_LIMIT) {
             this.log.warning(`Reached the maximum number of requests in progress: ${consts_1.REQUEST_QUEUE_HEAD_MAX_LIMIT}.`);
         }
-        const shouldRepeatWithHigherLimit = this.queueHeadDict.length() === 0
+        const shouldRepeatWithHigherLimit = this.queueHeadIds.length() === 0
             && wasLimitReached
             && prevLimit < consts_1.REQUEST_QUEUE_HEAD_MAX_LIMIT;
         // If ensureConsistency=true then we must ensure that either:
         // - queueModifiedAt is older than queryStartedAt by at least API_PROCESSED_REQUESTS_DELAY_MILLIS
         // - hadMultipleClients=false and this.assumedTotalCount<=this.assumedHandledCount
-        const isDatabaseConsistent = +queryStartedAt - +queueModifiedAt >= exports.API_PROCESSED_REQUESTS_DELAY_MILLIS;
+        const isDatabaseConsistent = +queryStartedAt - +queueModifiedAt >= utils_1.API_PROCESSED_REQUESTS_DELAY_MILLIS;
         const isLocallyConsistent = !hadMultipleClients && this.assumedTotalCount <= this.assumedHandledCount;
         // Consistent information from one source is enough to consider request queue finished.
         const shouldRepeatForConsistency = ensureConsistency && !isDatabaseConsistent && !isLocallyConsistent;
@@ -716,152 +228,57 @@ class RequestQueue {
             return true;
         // If we are querying for consistency then we limit the number of queries to MAX_QUERIES_FOR_CONSISTENCY.
         // If this is reached then we return false so that empty() and finished() returns possibly false negative.
-        if (!shouldRepeatWithHigherLimit && iteration > exports.MAX_QUERIES_FOR_CONSISTENCY)
+        if (!shouldRepeatWithHigherLimit && iteration > utils_1.MAX_QUERIES_FOR_CONSISTENCY)
             return false;
         const nextLimit = shouldRepeatWithHigherLimit
             ? Math.round(prevLimit * 1.5)
             : prevLimit;
         // If we are repeating for consistency then wait required time.
         if (shouldRepeatForConsistency) {
-            const delayMillis = exports.API_PROCESSED_REQUESTS_DELAY_MILLIS - (Date.now() - +queueModifiedAt);
+            const delayMillis = utils_1.API_PROCESSED_REQUESTS_DELAY_MILLIS - (Date.now() - +queueModifiedAt);
             this.log.info(`Waiting for ${delayMillis}ms before considering the queue as finished to ensure that the data is consistent.`);
             await (0, promises_1.setTimeout)(delayMillis);
         }
         return this._ensureHeadIsNonEmpty(ensureConsistency, nextLimit, iteration + 1);
     }
-    /**
-     * Adds a request straight to the queueHeadDict, to improve performance.
-     */
-    _maybeAddRequestToQueueHead(requestId, forefront) {
-        if (forefront) {
-            this.queueHeadDict.add(requestId, requestId, true);
-        }
-        else if (this.assumedTotalCount < exports.QUERY_HEAD_MIN_LENGTH) {
-            this.queueHeadDict.add(requestId, requestId, false);
+    // RequestQueue v1 behavior overrides below
+    async isFinished() {
+        if ((Date.now() - +this.lastActivity) > this.internalTimeoutMillis) {
+            const message = `The request queue seems to be stuck for ${this.internalTimeoutMillis / 1e3}s, resetting internal state.`;
+            this.log.warning(message, { inProgress: [...this.inProgress] });
+            this._reset();
         }
+        if (this.queueHeadIds.length() > 0 || this.inProgressCount() > 0)
+            return false;
+        const isHeadConsistent = await this._ensureHeadIsNonEmpty(true);
+        return isHeadConsistent && this.queueHeadIds.length() === 0 && this.inProgressCount() === 0;
     }
-    /**
-     * Removes the queue either from the Apify Cloud storage or from the local database,
-     * depending on the mode of operation.
-     */
-    async drop() {
-        await this.client.delete();
-        const manager = storage_manager_1.StorageManager.getManager(RequestQueue, this.config);
-        manager.closeStorage(this);
+    async addRequest(...args) {
+        this.lastActivity = new Date();
+        return super.addRequest(...args);
     }
-    /**
-     * Returns the number of handled requests.
-     *
-     * This function is just a convenient shortcut for:
-     *
-     * ```javascript
-     * const { handledRequestCount } = await queue.getInfo();
-     * ```
-     */
-    async handledCount() {
-        // NOTE: We keep this function for compatibility with RequestList.handledCount()
-        const { handledRequestCount } = await this.getInfo() ?? {};
-        return handledRequestCount ?? 0;
+    async addRequests(...args) {
+        this.lastActivity = new Date();
+        return super.addRequests(...args);
     }
-    /**
-     * Returns an object containing general information about the request queue.
-     *
-     * The function returns the same object as the Apify API Client's
-     * [getQueue](https://docs.apify.com/api/apify-client-js/latest#ApifyClient-requestQueues)
-     * function, which in turn calls the
-     * [Get request queue](https://apify.com/docs/api/v2#/reference/request-queues/queue/get-request-queue)
-     * API endpoint.
-     *
-     * **Example:**
-     * ```
-     * {
-     *   id: "WkzbQMuFYuamGv3YF",
-     *   name: "my-queue",
-     *   userId: "wRsJZtadYvn4mBZmm",
-     *   createdAt: new Date("2015-12-12T07:34:14.202Z"),
-     *   modifiedAt: new Date("2015-12-13T08:36:13.202Z"),
-     *   accessedAt: new Date("2015-12-14T08:36:13.202Z"),
-     *   totalRequestCount: 25,
-     *   handledRequestCount: 5,
-     *   pendingRequestCount: 20,
-     * }
-     * ```
-     */
-    async getInfo() {
-        return this.client.get();
+    async addRequestsBatched(...args) {
+        this.lastActivity = new Date();
+        return super.addRequestsBatched(...args);
     }
-    /**
-     * Fetches URLs from requestsFromUrl and returns them in format of list of requests
-     */
-    async _fetchRequestsFromUrl(source) {
-        const { requestsFromUrl, regex, ...sharedOpts } = source;
-        // Download remote resource and parse URLs.
-        let urlsArr;
-        try {
-            urlsArr = await this._downloadListOfUrls({ url: requestsFromUrl, urlRegExp: regex, proxyUrl: await this.proxyConfiguration?.newUrl() });
-        }
-        catch (err) {
-            throw new Error(`Cannot fetch a request list from ${requestsFromUrl}: ${err}`);
-        }
-        // Skip if resource contained no URLs.
-        if (!urlsArr.length) {
-            this.log.warning('list fetched, but it is empty.', { requestsFromUrl, regex });
-            return [];
-        }
-        return urlsArr.map((url) => ({ url, ...sharedOpts }));
+    async markRequestHandled(...args) {
+        this.lastActivity = new Date();
+        return super.markRequestHandled(...args);
     }
-    /**
-     * Adds all fetched requests from a URL from a remote resource.
-     */
-    async _addFetchedRequests(source, fetchedRequests, options) {
-        const { requestsFromUrl, regex } = source;
-        const { addedRequests } = await this.addRequestsBatched(fetchedRequests, options);
-        this.log.info('Fetched and loaded Requests from a remote resource.', {
-            requestsFromUrl,
-            regex,
-            fetchedCount: fetchedRequests.length,
-            importedCount: addedRequests.length,
-            duplicateCount: fetchedRequests.length - addedRequests.length,
-            sample: JSON.stringify(fetchedRequests.slice(0, 5)),
-        });
-        return addedRequests;
+    async reclaimRequest(...args) {
+        this.lastActivity = new Date();
+        return super.reclaimRequest(...args);
     }
-    /**
-     * @internal wraps public utility for mocking purposes
-     */
-    async _downloadListOfUrls(options) {
-        return (0, utils_1.downloadListOfUrls)(options);
+    _reset() {
+        super._reset();
+        this.lastActivity = new Date();
     }
-    /**
-     * Opens a request queue and returns a promise resolving to an instance
-     * of the {@apilink RequestQueue} class.
-     *
-     * {@apilink RequestQueue} represents a queue of URLs to crawl, which is stored either on local filesystem or in the cloud.
-     * The queue is used for deep crawling of websites, where you start with several URLs and then
-     * recursively follow links to other pages. The data structure supports both breadth-first
-     * and depth-first crawling orders.
-     *
-     * For more details and code examples, see the {@apilink RequestQueue} class.
-     *
-     * @param [queueIdOrName]
-     *   ID or name of the request queue to be opened. If `null` or `undefined`,
-     *   the function returns the default request queue associated with the crawler run.
-     * @param [options] Open Request Queue options.
-     */
-    static async open(queueIdOrName, options = {}) {
-        (0, ow_1.default)(queueIdOrName, ow_1.default.optional.any(ow_1.default.string, ow_1.default.null));
-        (0, ow_1.default)(options, ow_1.default.object.exactShape({
-            config: ow_1.default.optional.object.instanceOf(configuration_1.Configuration),
-            storageClient: ow_1.default.optional.object,
-            proxyConfiguration: ow_1.default.optional.object,
-        }));
-        options.config ?? (options.config = configuration_1.Configuration.getGlobalConfig());
-        options.storageClient ?? (options.storageClient = options.config.getStorageClient());
-        await (0, utils_2.purgeDefaultStorages)({ onlyPurgeOnce: true, client: options.storageClient, config: options.config });
-        const manager = storage_manager_1.StorageManager.getManager(this, options.config);
-        const queue = await manager.openStorage(queueIdOrName, options.storageClient);
-        queue.proxyConfiguration = options.proxyConfiguration;
-        return queue;
+    static open(...args) {
+        return super.open(...args);
     }
 }
 exports.RequestQueue = RequestQueue;