@crawlee/core 4.0.0-beta.63 → 4.0.0-beta.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/crawlers/crawler_commons.d.ts +2 -2
- package/crawlers/crawler_commons.d.ts.map +1 -1
- package/enqueue_links/enqueue_links.d.ts +7 -6
- package/enqueue_links/enqueue_links.d.ts.map +1 -1
- package/enqueue_links/enqueue_links.js +4 -4
- package/enqueue_links/enqueue_links.js.map +1 -1
- package/package.json +5 -5
- package/storages/index.d.ts +3 -2
- package/storages/index.d.ts.map +1 -1
- package/storages/index.js +1 -2
- package/storages/index.js.map +1 -1
- package/storages/request_list.d.ts +22 -71
- package/storages/request_list.d.ts.map +1 -1
- package/storages/request_list.js +33 -28
- package/storages/request_list.js.map +1 -1
- package/storages/request_loader.d.ts +97 -0
- package/storages/request_loader.d.ts.map +1 -0
- package/storages/request_loader.js +2 -0
- package/storages/request_loader.js.map +1 -0
- package/storages/request_manager.d.ts +25 -0
- package/storages/request_manager.d.ts.map +1 -0
- package/storages/request_manager.js +2 -0
- package/storages/request_manager.js.map +1 -0
- package/storages/request_manager_tandem.d.ts +44 -16
- package/storages/request_manager_tandem.d.ts.map +1 -1
- package/storages/request_manager_tandem.js +95 -40
- package/storages/request_manager_tandem.js.map +1 -1
- package/storages/request_provider.d.ts +6 -65
- package/storages/request_provider.d.ts.map +1 -1
- package/storages/request_provider.js +10 -15
- package/storages/request_provider.js.map +1 -1
- package/storages/request_queue_v2.d.ts +2 -1
- package/storages/request_queue_v2.d.ts.map +1 -1
- package/storages/{sitemap_request_list.d.ts → sitemap_request_loader.d.ts} +23 -18
- package/storages/sitemap_request_loader.d.ts.map +1 -0
- package/storages/{sitemap_request_list.js → sitemap_request_loader.js} +40 -39
- package/storages/sitemap_request_loader.js.map +1 -0
- package/storages/request_list_adapter.d.ts +0 -58
- package/storages/request_list_adapter.d.ts.map +0 -1
- package/storages/request_list_adapter.js +0 -81
- package/storages/request_list_adapter.js.map +0 -1
- package/storages/sitemap_request_list.d.ts.map +0 -1
- package/storages/sitemap_request_list.js.map +0 -1
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import type { Dictionary } from '@crawlee/types';
|
|
2
|
+
import type { Request } from '../request.js';
|
|
3
|
+
import type { IRequestManager } from './request_manager.js';
|
|
4
|
+
import type { RequestQueueOperationInfo } from './request_provider.js';
|
|
5
|
+
/**
|
|
6
|
+
* An abstract interface defining a read-only stream of requests to crawl.
|
|
7
|
+
*
|
|
8
|
+
* Request loaders are used to manage and provide access to a storage of crawling requests.
|
|
9
|
+
*
|
|
10
|
+
* Key responsibilities:
|
|
11
|
+
* - Fetching the next request to be processed.
|
|
12
|
+
* - Marking requests as handled once they are no longer in progress.
|
|
13
|
+
* - Managing state information such as the total and handled request counts.
|
|
14
|
+
*
|
|
15
|
+
* ## Request lifecycle contract
|
|
16
|
+
*
|
|
17
|
+
* Every request returned by {@link IRequestLoader.fetchNextRequest} is considered **in progress**
|
|
18
|
+
* until it is passed to {@link IRequestLoader.markRequestHandled}. Once you fetch a request, you are
|
|
19
|
+
* obligated to eventually mark it as handled — there is no way to hand a request back to a loader
|
|
20
|
+
* (only an {@link IRequestManager} can reclaim requests for a retry). "Handled" therefore means
|
|
21
|
+
* "finished with this request", whether processing succeeded or was abandoned after exhausting retries.
|
|
22
|
+
*
|
|
23
|
+
* Honoring this contract matters for three reasons:
|
|
24
|
+
* - **Restarts and migrations:** loaders that persist their state (see {@link IRequestLoader.persistState})
|
|
25
|
+
* treat in-progress requests as interrupted and re-serve them after a restart. A request that is fetched
|
|
26
|
+
* but never marked handled will be crawled again.
|
|
27
|
+
* - **Termination detection:** {@link IRequestLoader.isFinished} only resolves to `true` once nothing is
|
|
28
|
+
* in progress. Leaving a request unmarked keeps the crawler running indefinitely.
|
|
29
|
+
* - **Bookkeeping:** the handled and pending counts are derived from the set of in-progress requests, so
|
|
30
|
+
* skipping {@link IRequestLoader.markRequestHandled} corrupts {@link IRequestLoader.getHandledCount}
|
|
31
|
+
* and {@link IRequestLoader.getPendingCount}.
|
|
32
|
+
*
|
|
33
|
+
* Concrete implementations such as {@link RequestList} or {@link SitemapRequestLoader} build on this interface.
|
|
34
|
+
* The {@link IRequestManager} interface extends it with the capability to enqueue and reclaim requests.
|
|
35
|
+
*/
|
|
36
|
+
export interface IRequestLoader {
|
|
37
|
+
/**
|
|
38
|
+
* Returns an approximation of the total number of requests in the loader (i.e. pending + handled).
|
|
39
|
+
*/
|
|
40
|
+
getTotalCount(): Promise<number>;
|
|
41
|
+
/**
|
|
42
|
+
* Returns an approximation of the number of pending requests in the loader.
|
|
43
|
+
*/
|
|
44
|
+
getPendingCount(): Promise<number>;
|
|
45
|
+
/**
|
|
46
|
+
* Returns the number of requests in the loader that have been handled.
|
|
47
|
+
*/
|
|
48
|
+
getHandledCount(): Promise<number>;
|
|
49
|
+
/**
|
|
50
|
+
* Returns `true` if all requests were already handled and there are no more left.
|
|
51
|
+
*/
|
|
52
|
+
isFinished(): Promise<boolean>;
|
|
53
|
+
/**
|
|
54
|
+
* Resolves to `true` if the next call to {@link IRequestLoader.fetchNextRequest} function
|
|
55
|
+
* would return `null`, otherwise it resolves to `false`.
|
|
56
|
+
* Note that even if the loader is empty, there might be some pending requests currently being processed.
|
|
57
|
+
*/
|
|
58
|
+
isEmpty(): Promise<boolean>;
|
|
59
|
+
/**
|
|
60
|
+
* Gets the next {@link Request} to process, or `null` if there are no more pending requests.
|
|
61
|
+
*
|
|
62
|
+
* The returned request is marked as **in progress** and remains so until it is passed to
|
|
63
|
+
* {@link IRequestLoader.markRequestHandled}. The caller is responsible for eventually marking
|
|
64
|
+
* every fetched request as handled; otherwise the loader never considers itself finished and the
|
|
65
|
+
* request may be re-served after a restart. See the request lifecycle contract on {@link IRequestLoader}.
|
|
66
|
+
*/
|
|
67
|
+
fetchNextRequest<T extends Dictionary = Dictionary>(): Promise<Request<T> | null>;
|
|
68
|
+
/**
|
|
69
|
+
* Can be used to iterate over the loader instance in a `for await .. of` loop.
|
|
70
|
+
* Provides an alternative for the repeated use of `fetchNextRequest`.
|
|
71
|
+
*/
|
|
72
|
+
[Symbol.asyncIterator](): AsyncGenerator<Request>;
|
|
73
|
+
/**
|
|
74
|
+
* Marks a request previously returned by {@link IRequestLoader.fetchNextRequest} as handled,
|
|
75
|
+
* removing it from the set of in-progress requests.
|
|
76
|
+
*
|
|
77
|
+
* Call this once you are done with the request — whether processing succeeded or was abandoned after
|
|
78
|
+
* exhausting retries. Because a loader cannot take a request back, marking it handled is the only way to
|
|
79
|
+
* signal completion; failing to do so prevents {@link IRequestLoader.isFinished} from ever resolving to
|
|
80
|
+
* `true` and skews the handled and pending counts. See the request lifecycle contract on {@link IRequestLoader}.
|
|
81
|
+
*/
|
|
82
|
+
markRequestHandled(request: Request): Promise<RequestQueueOperationInfo | void | null>;
|
|
83
|
+
/**
|
|
84
|
+
* Persists the current state of the loader into the default {@link KeyValueStore}.
|
|
85
|
+
*
|
|
86
|
+
* Not all loaders support persistence; implementations that do not should leave this `undefined`.
|
|
87
|
+
*/
|
|
88
|
+
persistState?(): Promise<void>;
|
|
89
|
+
/**
|
|
90
|
+
* Combines the loader with a request manager to support adding and reclaiming requests.
|
|
91
|
+
*
|
|
92
|
+
* @param requestManager Request manager to combine the loader with. If not provided, the default
|
|
93
|
+
* {@link RequestQueue} is used.
|
|
94
|
+
*/
|
|
95
|
+
toTandem?(requestManager?: IRequestManager): Promise<IRequestManager>;
|
|
96
|
+
}
|
|
97
|
+
//# sourceMappingURL=request_loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"request_loader.d.ts","sourceRoot":"","sources":["../../src/storages/request_loader.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAEjD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,KAAK,EAAE,yBAAyB,EAAE,MAAM,uBAAuB,CAAC;AAEvE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AACH,MAAM,WAAW,cAAc;IAC3B;;OAEG;IACH,aAAa,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC;IAEjC;;OAEG;IACH,eAAe,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC;IAEnC;;OAEG;IACH,eAAe,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC;IAEnC;;OAEG;IACH,UAAU,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC;IAE/B;;;;OAIG;IACH,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC;IAE5B;;;;;;;OAOG;IACH,gBAAgB,CAAC,CAAC,SAAS,UAAU,GAAG,UAAU,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC;IAElF;;;OAGG;IACH,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,cAAc,CAAC,OAAO,CAAC,CAAC;IAElD;;;;;;;;OAQG;IACH,kBAAkB,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,yBAAyB,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC;IAEvF;;;;OAIG;IACH,YAAY,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAE/B;;;;;OAKG;IACH,QAAQ,CAAC,CAAC,cAAc,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;CACzE"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"request_loader.js","sourceRoot":"","sources":["../../src/storages/request_loader.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { Request, Source } from '../request.js';
|
|
2
|
+
import type { IRequestLoader } from './request_loader.js';
|
|
3
|
+
import type { AddRequestsBatchedOptions, AddRequestsBatchedResult, RequestQueueOperationInfo, RequestQueueOperationOptions } from './request_provider.js';
|
|
4
|
+
export type RequestsLike = AsyncIterable<Source | string> | Iterable<Source | string> | (Source | string)[];
|
|
5
|
+
/**
|
|
6
|
+
* Extends the read-only {@link IRequestLoader} interface with the capability to enqueue new requests
|
|
7
|
+
* and reclaim failed ones.
|
|
8
|
+
*/
|
|
9
|
+
export interface IRequestManager extends IRequestLoader {
|
|
10
|
+
/**
|
|
11
|
+
* Reclaims request to the provider if its processing failed.
|
|
12
|
+
* The request will be returned by some subsequent `fetchNextRequest()` call.
|
|
13
|
+
*/
|
|
14
|
+
reclaimRequest(request: Request, options?: RequestQueueOperationOptions): Promise<RequestQueueOperationInfo | null>;
|
|
15
|
+
addRequest(requestLike: Source, options?: RequestQueueOperationOptions): Promise<RequestQueueOperationInfo>;
|
|
16
|
+
addRequestsBatched(requests: RequestsLike, options?: AddRequestsBatchedOptions): Promise<AddRequestsBatchedResult>;
|
|
17
|
+
/**
|
|
18
|
+
* Remove all requests from the queue but keep the queue itself, resetting it
|
|
19
|
+
* so it can be reused (e.g. across multiple `crawler.run()` calls).
|
|
20
|
+
*
|
|
21
|
+
* Implementations that do not support purging may leave this `undefined`.
|
|
22
|
+
*/
|
|
23
|
+
purge?(): Promise<void>;
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=request_manager.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"request_manager.d.ts","sourceRoot":"","sources":["../../src/storages/request_manager.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAC1D,OAAO,KAAK,EACR,yBAAyB,EACzB,wBAAwB,EACxB,yBAAyB,EACzB,4BAA4B,EAC/B,MAAM,uBAAuB,CAAC;AAE/B,MAAM,MAAM,YAAY,GAAG,aAAa,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,QAAQ,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,CAAC;AAE5G;;;GAGG;AACH,MAAM,WAAW,eAAgB,SAAQ,cAAc;IACnD;;;OAGG;IACH,cAAc,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,EAAE,4BAA4B,GAAG,OAAO,CAAC,yBAAyB,GAAG,IAAI,CAAC,CAAC;IAEpH,UAAU,CAAC,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,4BAA4B,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAC;IAE5G,kBAAkB,CAAC,QAAQ,EAAE,YAAY,EAAE,OAAO,CAAC,EAAE,yBAAyB,GAAG,OAAO,CAAC,wBAAwB,CAAC,CAAC;IAEnH;;;;;OAKG;IACH,KAAK,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC3B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"request_manager.js","sourceRoot":"","sources":["../../src/storages/request_manager.ts"],"names":[],"mappings":""}
|
|
@@ -1,26 +1,43 @@
|
|
|
1
1
|
import type { Dictionary } from '@crawlee/types';
|
|
2
2
|
import type { Request, Source } from '../request.js';
|
|
3
|
-
import type {
|
|
4
|
-
import type {
|
|
3
|
+
import type { IRequestLoader } from './request_loader.js';
|
|
4
|
+
import type { IRequestManager, RequestsLike } from './request_manager.js';
|
|
5
|
+
import type { AddRequestsBatchedOptions, AddRequestsBatchedResult, RequestQueueOperationInfo, RequestQueueOperationOptions } from './request_provider.js';
|
|
5
6
|
/**
|
|
6
|
-
* A request manager that combines a RequestList
|
|
7
|
-
*
|
|
8
|
-
* transfers them in batches to the
|
|
7
|
+
* A request manager that combines a {@link IRequestLoader} (such as a `RequestList`) with a writable
|
|
8
|
+
* {@link IRequestManager} (such as a `RequestQueue`).
|
|
9
|
+
* It first reads requests from the loader and then, when needed, transfers them in batches to the manager.
|
|
9
10
|
*/
|
|
10
11
|
export declare class RequestManagerTandem implements IRequestManager {
|
|
11
12
|
private log;
|
|
12
|
-
private
|
|
13
|
-
private
|
|
14
|
-
|
|
13
|
+
private requestLoader;
|
|
14
|
+
private requestManagerPromise?;
|
|
15
|
+
private resolvedRequestManager?;
|
|
16
|
+
private requestManagerFactory;
|
|
15
17
|
/**
|
|
16
|
-
*
|
|
17
|
-
*
|
|
18
|
+
* @param requestLoader The read-only loader to read requests from first.
|
|
19
|
+
* @param requestManager The writable manager to transfer requests into and enqueue new ones. May be passed as a
|
|
20
|
+
* factory function so that the tandem can be constructed synchronously and the manager opened lazily on first use
|
|
21
|
+
* (e.g. a lazily-opened default {@link RequestQueue}).
|
|
22
|
+
*/
|
|
23
|
+
constructor(requestLoader: IRequestLoader, requestManager: IRequestManager | (() => IRequestManager | Promise<IRequestManager>));
|
|
24
|
+
/**
|
|
25
|
+
* Resolves the writable request manager, opening it lazily (via the factory) on first use and memoizing the result.
|
|
26
|
+
* @private
|
|
27
|
+
*/
|
|
28
|
+
private getRequestManager;
|
|
29
|
+
/**
|
|
30
|
+
* Transfers a single request from the read-only loader to the writable manager.
|
|
31
|
+
* If the transfer fails, the request is dropped (and logged) rather than reclaimed.
|
|
32
|
+
*
|
|
33
|
+
* @returns `true` if a request was successfully transferred (or there was nothing to transfer), and `false` if a
|
|
34
|
+
* transfer was attempted but failed - in which case the caller should not fetch from the manager this round.
|
|
18
35
|
* @private
|
|
19
36
|
*/
|
|
20
|
-
private
|
|
37
|
+
private transferNextRequestToQueue;
|
|
21
38
|
/**
|
|
22
|
-
* Fetches the next request from the
|
|
23
|
-
* is not finished, it will transfer a
|
|
39
|
+
* Fetches the next request from the request manager. If the manager is empty and the loader
|
|
40
|
+
* is not finished, it will transfer a request from the loader to the manager first.
|
|
24
41
|
* @inheritdoc
|
|
25
42
|
*/
|
|
26
43
|
fetchNextRequest<T extends Dictionary = Dictionary>(): Promise<Request<T> | null>;
|
|
@@ -35,15 +52,15 @@ export declare class RequestManagerTandem implements IRequestManager {
|
|
|
35
52
|
/**
|
|
36
53
|
* @inheritdoc
|
|
37
54
|
*/
|
|
38
|
-
|
|
55
|
+
getHandledCount(): Promise<number>;
|
|
39
56
|
/**
|
|
40
57
|
* @inheritdoc
|
|
41
58
|
*/
|
|
42
|
-
getTotalCount(): number
|
|
59
|
+
getTotalCount(): Promise<number>;
|
|
43
60
|
/**
|
|
44
61
|
* @inheritdoc
|
|
45
62
|
*/
|
|
46
|
-
getPendingCount(): number
|
|
63
|
+
getPendingCount(): Promise<number>;
|
|
47
64
|
/**
|
|
48
65
|
* @inheritdoc
|
|
49
66
|
*/
|
|
@@ -64,5 +81,16 @@ export declare class RequestManagerTandem implements IRequestManager {
|
|
|
64
81
|
* @inheritdoc
|
|
65
82
|
*/
|
|
66
83
|
addRequestsBatched(requests: RequestsLike, options?: AddRequestsBatchedOptions): Promise<AddRequestsBatchedResult>;
|
|
84
|
+
/**
|
|
85
|
+
* Persists the state of the underlying read-only loader, if it supports persistence.
|
|
86
|
+
* @inheritdoc
|
|
87
|
+
*/
|
|
88
|
+
persistState(): Promise<void>;
|
|
89
|
+
/**
|
|
90
|
+
* Purges the writable request manager so the tandem can be reused (e.g. across repeated `crawler.run()` calls).
|
|
91
|
+
* The read-only loader is immutable and cannot be purged, so only the manager side is reset.
|
|
92
|
+
* @inheritdoc
|
|
93
|
+
*/
|
|
94
|
+
purge(): Promise<void>;
|
|
67
95
|
}
|
|
68
96
|
//# sourceMappingURL=request_manager_tandem.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"request_manager_tandem.d.ts","sourceRoot":"","sources":["../../src/storages/request_manager_tandem.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGjD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAErD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"request_manager_tandem.d.ts","sourceRoot":"","sources":["../../src/storages/request_manager_tandem.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGjD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAErD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAC1D,OAAO,KAAK,EAAE,eAAe,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAC1E,OAAO,KAAK,EACR,yBAAyB,EACzB,wBAAwB,EACxB,yBAAyB,EACzB,4BAA4B,EAC/B,MAAM,uBAAuB,CAAC;AAE/B;;;;GAIG;AACH,qBAAa,oBAAqB,YAAW,eAAe;IACxD,OAAO,CAAC,GAAG,CAAgB;IAC3B,OAAO,CAAC,aAAa,CAAiB;IACtC,OAAO,CAAC,qBAAqB,CAAC,CAA2B;IACzD,OAAO,CAAC,sBAAsB,CAAC,CAAkB;IAEjD,OAAO,CAAC,qBAAqB,CAAmD;IAEhF;;;;;OAKG;gBAEC,aAAa,EAAE,cAAc,EAC7B,cAAc,EAAE,eAAe,GAAG,CAAC,MAAM,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAOxF;;;OAGG;YACW,iBAAiB;IAQ/B;;;;;;;OAOG;YACW,0BAA0B;IAyBxC;;;;OAIG;IACG,gBAAgB,CAAC,CAAC,SAAS,UAAU,GAAG,UAAU,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;IAmBvF;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,OAAO,CAAC;IAMpC;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC;IAMjC;;OAEG;IACG,eAAe,IAAI,OAAO,CAAC,MAAM,CAAC;IAKxC;;OAEG;IACG,aAAa,IAAI,OAAO,CAAC,MAAM,CAAC;IAStC;;OAEG;IACG,eAAe,IAAI,OAAO,CAAC,MAAM,CAAC;IASxC;;OAEG;IACI,CAAC,MAAM,CAAC,aAAa,CAAC;IAQ7B;;OAEG;IACG,kBAAkB,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,yBAAyB,GAAG,IAAI,GAAG,IAAI,CAAC;IAI5F;;OAEG;IACG,cAAc,CAChB,OAAO,EAAE,OAAO,EAChB,OAAO,CAAC,EAAE,4BAA4B,GACvC,OAAO,CAAC,yBAAyB,GAAG,IAAI,CAAC;IAI5C;;OAEG;IACG,UAAU,CAAC,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,4BAA4B,GAAG,OAAO,CAAC,yBAAyB,CAAC;IAIjH;;OAEG;IACG,kBAAkB,CACpB,QAAQ,EAAE,YAAY,EACtB,OAAO,CAAC,EAAE,yBAAyB,GACpC,OAAO,CAAC,wBAAwB,CAAC;IAIpC;;;OAGG;IACG,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;IAInC;;;;OAIG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
|
|
@@ -1,89 +1,129 @@
|
|
|
1
1
|
import { serviceLocator } from '../service_locator.js';
|
|
2
2
|
/**
|
|
3
|
-
* A request manager that combines a RequestList
|
|
4
|
-
*
|
|
5
|
-
* transfers them in batches to the
|
|
3
|
+
* A request manager that combines a {@link IRequestLoader} (such as a `RequestList`) with a writable
|
|
4
|
+
* {@link IRequestManager} (such as a `RequestQueue`).
|
|
5
|
+
* It first reads requests from the loader and then, when needed, transfers them in batches to the manager.
|
|
6
6
|
*/
|
|
7
7
|
export class RequestManagerTandem {
|
|
8
8
|
log;
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
requestLoader;
|
|
10
|
+
requestManagerPromise;
|
|
11
|
+
resolvedRequestManager;
|
|
12
|
+
requestManagerFactory;
|
|
13
|
+
/**
|
|
14
|
+
* @param requestLoader The read-only loader to read requests from first.
|
|
15
|
+
* @param requestManager The writable manager to transfer requests into and enqueue new ones. May be passed as a
|
|
16
|
+
* factory function so that the tandem can be constructed synchronously and the manager opened lazily on first use
|
|
17
|
+
* (e.g. a lazily-opened default {@link RequestQueue}).
|
|
18
|
+
*/
|
|
19
|
+
constructor(requestLoader, requestManager) {
|
|
12
20
|
this.log = serviceLocator.getLogger().child({ prefix: 'RequestManagerTandem' });
|
|
13
|
-
this.
|
|
14
|
-
this.
|
|
21
|
+
this.requestLoader = requestLoader;
|
|
22
|
+
this.requestManagerFactory = typeof requestManager === 'function' ? requestManager : () => requestManager;
|
|
15
23
|
}
|
|
16
24
|
/**
|
|
17
|
-
*
|
|
18
|
-
* Handles both successful transfers and failures appropriately.
|
|
25
|
+
* Resolves the writable request manager, opening it lazily (via the factory) on first use and memoizing the result.
|
|
19
26
|
* @private
|
|
20
27
|
*/
|
|
21
|
-
async
|
|
22
|
-
|
|
28
|
+
async getRequestManager() {
|
|
29
|
+
if (this.resolvedRequestManager === undefined) {
|
|
30
|
+
this.requestManagerPromise ??= Promise.resolve(this.requestManagerFactory());
|
|
31
|
+
this.resolvedRequestManager = await this.requestManagerPromise;
|
|
32
|
+
}
|
|
33
|
+
return this.resolvedRequestManager;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Transfers a single request from the read-only loader to the writable manager.
|
|
37
|
+
* If the transfer fails, the request is dropped (and logged) rather than reclaimed.
|
|
38
|
+
*
|
|
39
|
+
* @returns `true` if a request was successfully transferred (or there was nothing to transfer), and `false` if a
|
|
40
|
+
* transfer was attempted but failed - in which case the caller should not fetch from the manager this round.
|
|
41
|
+
* @private
|
|
42
|
+
*/
|
|
43
|
+
async transferNextRequestToQueue() {
|
|
44
|
+
const request = await this.requestLoader.fetchNextRequest();
|
|
23
45
|
if (request === null) {
|
|
24
|
-
return;
|
|
46
|
+
return true;
|
|
25
47
|
}
|
|
48
|
+
const requestManager = await this.getRequestManager();
|
|
26
49
|
try {
|
|
27
|
-
await
|
|
50
|
+
await requestManager.addRequest(request, { forefront: true });
|
|
51
|
+
return true;
|
|
28
52
|
}
|
|
29
53
|
catch (error) {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
54
|
+
this.log.exception(error, 'Adding request from the RequestLoader to the RequestManager failed, the request has been dropped.', { url: request.url, uniqueKey: request.uniqueKey });
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
finally {
|
|
58
|
+
// Mark it as handled so that the request doesn't get stuck in the `inProgress` state in the loader.
|
|
59
|
+
await this.requestLoader.markRequestHandled(request);
|
|
35
60
|
}
|
|
36
|
-
await this.requestList.markRequestHandled(request);
|
|
37
61
|
}
|
|
38
62
|
/**
|
|
39
|
-
* Fetches the next request from the
|
|
40
|
-
* is not finished, it will transfer a
|
|
63
|
+
* Fetches the next request from the request manager. If the manager is empty and the loader
|
|
64
|
+
* is not finished, it will transfer a request from the loader to the manager first.
|
|
41
65
|
* @inheritdoc
|
|
42
66
|
*/
|
|
43
67
|
async fetchNextRequest() {
|
|
44
68
|
// First, try to transfer a request from the requestList
|
|
45
69
|
const [listEmpty, listFinished] = await Promise.all([
|
|
46
|
-
this.
|
|
47
|
-
this.
|
|
70
|
+
this.requestLoader.isEmpty(),
|
|
71
|
+
this.requestLoader.isFinished(),
|
|
48
72
|
]);
|
|
49
73
|
if (!listEmpty && !listFinished) {
|
|
50
|
-
|
|
74
|
+
// If the transfer failed, the request was dropped; don't fetch from the manager this round (matching
|
|
75
|
+
// crawlee-python behaviour). The next `fetchNextRequest()` call will pick up where we left off.
|
|
76
|
+
if (!(await this.transferNextRequestToQueue())) {
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
51
79
|
}
|
|
52
|
-
// Try to fetch from
|
|
53
|
-
return this.
|
|
80
|
+
// Try to fetch from manager after the transfer
|
|
81
|
+
return (await this.getRequestManager()).fetchNextRequest();
|
|
54
82
|
}
|
|
55
83
|
/**
|
|
56
84
|
* @inheritdoc
|
|
57
85
|
*/
|
|
58
86
|
async isFinished() {
|
|
59
|
-
const
|
|
87
|
+
const requestManager = await this.getRequestManager();
|
|
88
|
+
const storagesFinished = await Promise.all([this.requestLoader.isFinished(), requestManager.isFinished()]);
|
|
60
89
|
return storagesFinished.every(Boolean);
|
|
61
90
|
}
|
|
62
91
|
/**
|
|
63
92
|
* @inheritdoc
|
|
64
93
|
*/
|
|
65
94
|
async isEmpty() {
|
|
66
|
-
const
|
|
95
|
+
const requestManager = await this.getRequestManager();
|
|
96
|
+
const storagesEmpty = await Promise.all([this.requestLoader.isEmpty(), requestManager.isEmpty()]);
|
|
67
97
|
return storagesEmpty.every(Boolean);
|
|
68
98
|
}
|
|
69
99
|
/**
|
|
70
100
|
* @inheritdoc
|
|
71
101
|
*/
|
|
72
|
-
async
|
|
73
|
-
// Since one of the stores needs to have priority when both are present, we query the request
|
|
74
|
-
return await this.
|
|
102
|
+
async getHandledCount() {
|
|
103
|
+
// Since one of the stores needs to have priority when both are present, we query the request manager - the request loader will first be dumped into the manager and then left empty.
|
|
104
|
+
return (await this.getRequestManager()).getHandledCount();
|
|
75
105
|
}
|
|
76
106
|
/**
|
|
77
107
|
* @inheritdoc
|
|
78
108
|
*/
|
|
79
|
-
getTotalCount() {
|
|
80
|
-
|
|
109
|
+
async getTotalCount() {
|
|
110
|
+
const requestManager = await this.getRequestManager();
|
|
111
|
+
const [managerTotal, loaderTotal] = await Promise.all([
|
|
112
|
+
requestManager.getTotalCount(),
|
|
113
|
+
this.requestLoader.getTotalCount(),
|
|
114
|
+
]);
|
|
115
|
+
return managerTotal + loaderTotal;
|
|
81
116
|
}
|
|
82
117
|
/**
|
|
83
118
|
* @inheritdoc
|
|
84
119
|
*/
|
|
85
|
-
getPendingCount() {
|
|
86
|
-
|
|
120
|
+
async getPendingCount() {
|
|
121
|
+
const requestManager = await this.getRequestManager();
|
|
122
|
+
const [managerPending, loaderPending] = await Promise.all([
|
|
123
|
+
requestManager.getPendingCount(),
|
|
124
|
+
this.requestLoader.getPendingCount(),
|
|
125
|
+
]);
|
|
126
|
+
return managerPending + loaderPending;
|
|
87
127
|
}
|
|
88
128
|
/**
|
|
89
129
|
* @inheritdoc
|
|
@@ -100,25 +140,40 @@ export class RequestManagerTandem {
|
|
|
100
140
|
* @inheritdoc
|
|
101
141
|
*/
|
|
102
142
|
async markRequestHandled(request) {
|
|
103
|
-
return this.
|
|
143
|
+
return (await this.getRequestManager()).markRequestHandled(request);
|
|
104
144
|
}
|
|
105
145
|
/**
|
|
106
146
|
* @inheritdoc
|
|
107
147
|
*/
|
|
108
148
|
async reclaimRequest(request, options) {
|
|
109
|
-
return await this.
|
|
149
|
+
return (await this.getRequestManager()).reclaimRequest(request, options);
|
|
110
150
|
}
|
|
111
151
|
/**
|
|
112
152
|
* @inheritdoc
|
|
113
153
|
*/
|
|
114
154
|
async addRequest(requestLike, options) {
|
|
115
|
-
return await this.
|
|
155
|
+
return (await this.getRequestManager()).addRequest(requestLike, options);
|
|
116
156
|
}
|
|
117
157
|
/**
|
|
118
158
|
* @inheritdoc
|
|
119
159
|
*/
|
|
120
160
|
async addRequestsBatched(requests, options) {
|
|
121
|
-
return await this.
|
|
161
|
+
return (await this.getRequestManager()).addRequestsBatched(requests, options);
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Persists the state of the underlying read-only loader, if it supports persistence.
|
|
165
|
+
* @inheritdoc
|
|
166
|
+
*/
|
|
167
|
+
async persistState() {
|
|
168
|
+
await this.requestLoader.persistState?.();
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Purges the writable request manager so the tandem can be reused (e.g. across repeated `crawler.run()` calls).
|
|
172
|
+
* The read-only loader is immutable and cannot be purged, so only the manager side is reset.
|
|
173
|
+
* @inheritdoc
|
|
174
|
+
*/
|
|
175
|
+
async purge() {
|
|
176
|
+
await (await this.getRequestManager()).purge?.();
|
|
122
177
|
}
|
|
123
178
|
}
|
|
124
179
|
//# sourceMappingURL=request_manager_tandem.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"request_manager_tandem.js","sourceRoot":"","sources":["../../src/storages/request_manager_tandem.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"request_manager_tandem.js","sourceRoot":"","sources":["../../src/storages/request_manager_tandem.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAUvD;;;;GAIG;AACH,MAAM,OAAO,oBAAoB;IACrB,GAAG,CAAgB;IACnB,aAAa,CAAiB;IAC9B,qBAAqB,CAA4B;IACjD,sBAAsB,CAAmB;IAEzC,qBAAqB,CAAmD;IAEhF;;;;;OAKG;IACH,YACI,aAA6B,EAC7B,cAAoF;QAEpF,IAAI,CAAC,GAAG,GAAG,cAAc,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,sBAAsB,EAAE,CAAC,CAAC;QAChF,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;QACnC,IAAI,CAAC,qBAAqB,GAAG,OAAO,cAAc,KAAK,UAAU,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,cAAc,CAAC;IAC9G,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,iBAAiB;QAC3B,IAAI,IAAI,CAAC,sBAAsB,KAAK,SAAS,EAAE,CAAC;YAC5C,IAAI,CAAC,qBAAqB,KAAK,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,qBAAqB,EAAE,CAAC,CAAC;YAC7E,IAAI,CAAC,sBAAsB,GAAG,MAAM,IAAI,CAAC,qBAAqB,CAAC;QACnE,CAAC;QACD,OAAO,IAAI,CAAC,sBAAsB,CAAC;IACvC,CAAC;IAED;;;;;;;OAOG;IACK,KAAK,CAAC,0BAA0B;QACpC,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,gBAAgB,EAAE,CAAC;QAE5D,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;YACnB,OAAO,IAAI,CAAC;QAChB,CAAC;QAED,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAEtD,IAAI,CAAC;YACD,MAAM,cAAc,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAC9D,OAAO,IAAI,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,IAAI,CAAC,GAAG,CAAC,SAAS,CACd,KAAc,EACd,mGAAmG,EACnG,EAAE,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE,SAAS,EAAE,OAAO,CAAC,SAAS,EAAE,CACrD,CAAC;YACF,OAAO,KAAK,CAAC;QACjB,CAAC;gBAAS,CAAC;YACP,oGAAoG;YACpG,MAAM,IAAI,CAAC,aAAa,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;QACzD,CAAC;IACL,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,gBAAgB;QAClB,wDAAwD;QACxD,MAAM,CAAC,SAAS,EAAE,YAAY,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAChD,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE;YAC5B,IAAI,CAAC,aAAa,CAAC,UAAU,EAAE;SAClC,CAAC,CAAC;QAEH,IAAI,CAAC,SAAS,IAAI,CAAC,YAAY,EAAE,CAAC;YAC9B,qGAAqG;YACrG,gGAAgG;YAChG,IAAI,CAAC,CAAC,MAAM,IAAI,CAAC,0BAA0B,EAAE,CAAC,EAAE,CAAC;gBAC7C,OAAO,IAAI,CAAC;YAChB,CAAC;QACL,CAAC;QAED,+CAA+C;QAC/C,OAAO,CAAC,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC,CAAC,gBAAgB,EAAK,CAAC;IAClE,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU;QACZ,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACtD,MAAM,gBAAgB,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,UAAU,EAAE,EAAE,cAAc,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;QAC3G,OAAO,gBAAgB,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAC3C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACT,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACtD,MAAM,aAAa,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE,EAAE,cAAc,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QAClG,OAAO,aAAa,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACxC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,eAAe;QACjB,qLAAqL;QACrL,OAAO,CAAC,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC,CAAC,eAAe,EAAE,CAAC;IAC9D,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,aAAa;QACf,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACtD,MAAM,CAAC,YAAY,EAAE,WAAW,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAClD,cAAc,CAAC,aAAa,EAAE;YAC9B,IAAI,CAAC,aAAa,CAAC,aAAa,EAAE;SACrC,CAAC,CAAC;QACH,OAAO,YAAY,GAAG,WAAW,CAAC;IACtC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,eAAe;QACjB,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACtD,MAAM,CAAC,cAAc,EAAE,aAAa,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YACtD,cAAc,CAAC,eAAe,EAAE;YAChC,IAAI,CAAC,aAAa,CAAC,eAAe,EAAE;SACvC,CAAC,CAAC;QACH,OAAO,cAAc,GAAG,aAAa,CAAC;IAC1C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC;QACzB,OAAO,IAAI,EAAE,CAAC;YACV,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC1C,IAAI,CAAC,GAAG;gBAAE,MAAM;YAChB,MAAM,GAAG,CAAC;QACd,CAAC;IACL,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,kBAAkB,CAAC,OAAgB;QACrC,OAAO,CAAC,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;IACxE,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,cAAc,CAChB,OAAgB,EAChB,OAAsC;QAEtC,OAAO,CAAC,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC,CAAC,cAAc,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IAC7E,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU,CAAC,WAAmB,EAAE,OAAsC;QACxE,OAAO,CAAC,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC,CAAC,UAAU,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IAC7E,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,kBAAkB,CACpB,QAAsB,EACtB,OAAmC;QAEnC,OAAO,CAAC,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC,CAAC,kBAAkB,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAClF,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,YAAY;QACd,MAAM,IAAI,CAAC,aAAa,CAAC,YAAY,EAAE,EAAE,CAAC;IAC9C,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,KAAK;QACP,MAAM,CAAC,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC;IACrD,CAAC;CACJ"}
|
|
@@ -7,66 +7,9 @@ import type { CrawleeLogger } from '../log.js';
|
|
|
7
7
|
import type { ProxyConfiguration } from '../proxy_configuration.js';
|
|
8
8
|
import type { InternalSource, RequestOptions, Source } from '../request.js';
|
|
9
9
|
import { Request } from '../request.js';
|
|
10
|
+
import type { IRequestManager, RequestsLike } from './request_manager.js';
|
|
10
11
|
import type { IStorage, StorageIdentifier } from './storage_instance_manager.js';
|
|
11
12
|
import type { StorageOpenOptions } from './utils.js';
|
|
12
|
-
export type RequestsLike = AsyncIterable<Source | string> | Iterable<Source | string> | (Source | string)[];
|
|
13
|
-
/**
|
|
14
|
-
* Represents a provider of requests/URLs to crawl.
|
|
15
|
-
*/
|
|
16
|
-
export interface IRequestManager {
|
|
17
|
-
/**
|
|
18
|
-
* Returns `true` if all requests were already handled and there are no more left.
|
|
19
|
-
*/
|
|
20
|
-
isFinished(): Promise<boolean>;
|
|
21
|
-
/**
|
|
22
|
-
* Resolves to `true` if the next call to {@link IRequestManager.fetchNextRequest} function
|
|
23
|
-
* would return `null`, otherwise it resolves to `false`.
|
|
24
|
-
* Note that even if the provider is empty, there might be some pending requests currently being processed.
|
|
25
|
-
*/
|
|
26
|
-
isEmpty(): Promise<boolean>;
|
|
27
|
-
/**
|
|
28
|
-
* Returns number of handled requests.
|
|
29
|
-
*/
|
|
30
|
-
handledCount(): Promise<number>;
|
|
31
|
-
/**
|
|
32
|
-
* Get the total number of requests known to the request manager.
|
|
33
|
-
*/
|
|
34
|
-
getTotalCount(): number;
|
|
35
|
-
/**
|
|
36
|
-
* Get an offline approximation of the number of pending requests.
|
|
37
|
-
*/
|
|
38
|
-
getPendingCount(): number;
|
|
39
|
-
/**
|
|
40
|
-
* Gets the next {@link Request} to process.
|
|
41
|
-
*
|
|
42
|
-
* The function's `Promise` resolves to `null` if there are no more
|
|
43
|
-
* requests to process.
|
|
44
|
-
*/
|
|
45
|
-
fetchNextRequest<T extends Dictionary = Dictionary>(): Promise<Request<T> | null>;
|
|
46
|
-
/**
|
|
47
|
-
* Can be used to iterate over the `RequestManager` instance in a `for await .. of` loop.
|
|
48
|
-
* Provides an alternative for the repeated use of `fetchNextRequest`.
|
|
49
|
-
*/
|
|
50
|
-
[Symbol.asyncIterator](): AsyncGenerator<Request>;
|
|
51
|
-
/**
|
|
52
|
-
* Marks request as handled after successful processing.
|
|
53
|
-
*/
|
|
54
|
-
markRequestHandled(request: Request): Promise<RequestQueueOperationInfo | void | null>;
|
|
55
|
-
/**
|
|
56
|
-
* Reclaims request to the provider if its processing failed.
|
|
57
|
-
* The request will become available in the next `fetchNextRequest()`.
|
|
58
|
-
*/
|
|
59
|
-
reclaimRequest(request: Request, options?: RequestQueueOperationOptions): Promise<RequestQueueOperationInfo | null>;
|
|
60
|
-
addRequest(requestLike: Source, options?: RequestQueueOperationOptions): Promise<RequestQueueOperationInfo>;
|
|
61
|
-
addRequestsBatched(requests: RequestsLike, options?: AddRequestsBatchedOptions): Promise<AddRequestsBatchedResult>;
|
|
62
|
-
/**
|
|
63
|
-
* Remove all requests from the queue but keep the queue itself, resetting it
|
|
64
|
-
* so it can be reused (e.g. across multiple `crawler.run()` calls).
|
|
65
|
-
*
|
|
66
|
-
* Implementations that do not support purging may leave this `undefined`.
|
|
67
|
-
*/
|
|
68
|
-
purge?(): Promise<void>;
|
|
69
|
-
}
|
|
70
13
|
export declare abstract class RequestProvider implements IStorage, IRequestManager {
|
|
71
14
|
protected readonly config: Configuration;
|
|
72
15
|
id: string;
|
|
@@ -80,8 +23,6 @@ export declare abstract class RequestProvider implements IStorage, IRequestManag
|
|
|
80
23
|
requestLockSecs: number;
|
|
81
24
|
assumedTotalCount: number;
|
|
82
25
|
assumedHandledCount: number;
|
|
83
|
-
private initialCount;
|
|
84
|
-
private initialHandledCount;
|
|
85
26
|
private isInitialized;
|
|
86
27
|
protected queueHeadIds: ListDictionary<string>;
|
|
87
28
|
protected requestCache: LruCache<RequestLruItem>;
|
|
@@ -94,17 +35,17 @@ export declare abstract class RequestProvider implements IStorage, IRequestManag
|
|
|
94
35
|
protected readonly events: EventManager;
|
|
95
36
|
constructor(options: InternalRequestProviderOptions, config?: Configuration);
|
|
96
37
|
/**
|
|
97
|
-
* Returns
|
|
38
|
+
* Returns the total number of requests in the queue (i.e. pending + handled).
|
|
98
39
|
*
|
|
99
40
|
* Survives restarts and actor migrations.
|
|
100
41
|
*/
|
|
101
|
-
getTotalCount(): number
|
|
42
|
+
getTotalCount(): Promise<number>;
|
|
102
43
|
/**
|
|
103
|
-
* Returns
|
|
44
|
+
* Returns the total number of pending requests in the queue.
|
|
104
45
|
*
|
|
105
46
|
* Survives restarts and Actor migrations.
|
|
106
47
|
*/
|
|
107
|
-
getPendingCount(): number
|
|
48
|
+
getPendingCount(): Promise<number>;
|
|
108
49
|
/**
|
|
109
50
|
* Adds a request to the queue.
|
|
110
51
|
*
|
|
@@ -232,7 +173,7 @@ export declare abstract class RequestProvider implements IStorage, IRequestManag
|
|
|
232
173
|
* ```
|
|
233
174
|
* @inheritdoc
|
|
234
175
|
*/
|
|
235
|
-
|
|
176
|
+
getHandledCount(): Promise<number>;
|
|
236
177
|
/**
|
|
237
178
|
* Returns an object containing general information about the request queue.
|
|
238
179
|
*
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"request_provider.d.ts","sourceRoot":"","sources":["../../src/storages/request_provider.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACR,cAAc,EACd,sBAAsB,EACtB,UAAU,EACV,gBAAgB,EAChB,kBAAkB,EAClB,kBAAkB,EAClB,gBAAgB,EACnB,MAAM,gBAAgB,CAAC;AAWxB,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAE9C,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAGjE,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAE/D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAC/C,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AACpE,OAAO,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAC5E,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAGxC,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"request_provider.d.ts","sourceRoot":"","sources":["../../src/storages/request_provider.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACR,cAAc,EACd,sBAAsB,EACtB,UAAU,EACV,gBAAgB,EAChB,kBAAkB,EAClB,kBAAkB,EAClB,gBAAgB,EACnB,MAAM,gBAAgB,CAAC;AAWxB,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAE9C,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAGjE,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAE/D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAC/C,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AACpE,OAAO,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAC5E,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAGxC,OAAO,KAAK,EAAE,eAAe,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAC1E,OAAO,KAAK,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AACjF,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAIrD,8BAAsB,eAAgB,YAAW,QAAQ,EAAE,eAAe;IAsClE,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,aAAa;IArC5C,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,SAAM;IACjB,SAAS,SAA0B;IACnC,MAAM,EAAE,kBAAkB,CAAC;IAC3B,SAAS,CAAC,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAElD,GAAG,EAAE,aAAa,CAAC;IACnB,qBAAqB,SAAc;IACnC,eAAe,SAAU;IAIzB,iBAAiB,SAAK;IACtB,mBAAmB,SAAK;IAExB,OAAO,CAAC,aAAa,CAAS;IAE9B,SAAS,CAAC,YAAY,yBAAgC;IACtD,SAAS,CAAC,YAAY,EAAE,QAAQ,CAAC,cAAc,CAAC,CAAC;IAEjD,SAAS,CAAC,4BAA4B,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;IAE1D,SAAS,CAAC,uBAAuB,UAAS;IAE1C,SAAS,CAAC,YAAY,OAAc;IAEpC,SAAS,CAAC,oCAAoC,SAAK;IAEnD,SAAS,CAAC,2BAA2B,SAAK;IAE1C,SAAS,CAAC,UAAU,CAAC,EAAE,cAAc,CAAC;IAEtC,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,YAAY,CAAC;gBAGpC,OAAO,EAAE,8BAA8B,EACpB,MAAM,GAAE,aAA+C;IAoB9E;;;;OAIG;IACG,aAAa;IAKnB;;;;OAIG;IACG,eAAe;IAKrB;;;;;;;;;;;;;OAaG;IACG,UAAU,CACZ,WAAW,EAAE,MAAM,EACnB,OAAO,GAAE,4BAAiC,GAC3C,OAAO,CAAC,yBAAyB,CAAC;IAmErC;;;;;;;;;;;;;OAaG;IACG,WAAW,CACb,YAAY,EAAE,YAAY,EAC1B,OAAO,GAAE,4BAAiC,GAC3C,OAAO,CAAC,sBAAsB,CAAC;IA4GlC;;;;;;;;OAQG;IACG,kBAAkB,CACpB,QAAQ,EAAE,YAAY,CAAC,YAAY,CAAC,EACpC,OAAO,GAAE,yBAA8B,GACxC,OAAO,CAAC,wBAAwB,CAAC;IAmIpC;;;;;OAKG;IACG,UAAU,CAAC,CAAC,SAAS,UAAU,GAAG,UAAU,EAAE,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;IAW3F;;;;;;;;;;;;;;;;OAgBG;IACH,QAAQ,CAAC,gBAAgB,CAAC,CAAC,SAAS,UAAU,GAAG,UAAU,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;IAE1F;;;;;OAKG;IACG,kBAAkB,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,yBAAyB,GAAG,IAAI,CAAC;IAwCrF;;;;;OAKG;IACG,cAAc,CAChB,OAAO,EAAE,OAAO,EAChB,OAAO,GAAE,4BAAiC,GAC3C,OAAO,CAAC,yBAAyB,GAAG,IAAI,CAAC;IAmC5C,SAAS,CAAC,QAAQ,CAAC,oBAAoB,IAAI,OAAO,CAAC,IAAI,CAAC;IAExD;;;;;OAKG;IACG,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC;IAKjC;;;;;OAKG;IACH,QAAQ,CAAC,UAAU,IAAI,OAAO,CAAC,OAAO,CAAC;IAEvC,SAAS,CAAC,MAAM;IAShB;;OAEG;IACH,SAAS,CAAC,aAAa,CAAC,QAAQ,EAAE,MAAM,EAAE,kBAAkB,EAAE,yBAAyB,GAAG,IAAI;IAc9F;;OAEG;IACH,SAAS,CAAC,2BAA2B,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,GAAG,IAAI;IAQlF;;;OAGG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAO3B;;;OAGG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAgB5B;;OAEG;IACI,CAAC,MAAM,CAAC,aAAa,CAAC;IAQ7B;;;;;;;;;OASG;IACG,eAAe,IAAI,OAAO,CAAC,MAAM,CAAC;IAMxC;;;;;;;;;;;;;;;;;;OAkBG;IACG,OAAO,IAAI,OAAO,CAAC,gBAAgB,CAAC;IAM1C;;OAEG;cACa,qBAAqB,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IAwBxF;;OAEG;cACa,mBAAmB,CAC/B,MAAM,EAAE,cAAc,EACtB,eAAe,EAAE,cAAc,EAAE,EACjC,OAAO,EAAE,4BAA4B;IAiBzC;;OAEG;YACW,mBAAmB;IAWjC;;;;;;;;;;;;;;;;OAgBG;WACU,IAAI,CACb,UAAU,CAAC,EAAE,MAAM,GAAG,iBAAiB,GAAG,IAAI,EAC9C,OAAO,GAAE,kBAAuB,GACjC,OAAO,CAAC,eAAe,CAAC;CA+C9B;AAYD,UAAU,cAAc;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,OAAO,CAAC;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,OAAO,GAAG,IAAI,CAAC;IACzB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,SAAS,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,WAAW,sBAAsB;IACnC,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,kBAAkB,CAAC;IAE3B;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;CAC3C;AAED;;GAEG;AACH,MAAM,WAAW,mBAAoB,SAAQ,sBAAsB;CAAG;AAEtE;;GAEG;AACH,MAAM,WAAW,8BAA+B,SAAQ,sBAAsB;IAC1E,SAAS,EAAE,MAAM,CAAC;IAClB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,8BAA8B,EAAE,MAAM,CAAC;CAC1C;AAED,MAAM,WAAW,4BAA4B;IACzC;;;;;;;;;;;;OAYG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB;;;;OAIG;IACH,KAAK,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,yBAA0B,SAAQ,kBAAkB;IACjE,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,WAAW,yBAA0B,SAAQ,4BAA4B;IAC3E;;;OAGG;IACH,2BAA2B,CAAC,EAAE,OAAO,CAAC;IAEtC;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;OAEG;IACH,wBAAwB,CAAC,EAAE,MAAM,CAAC;CACrC;AAED,MAAM,WAAW,wBAAwB;IACrC,aAAa,EAAE,gBAAgB,EAAE,CAAC;IAClC;;;;;;;;;;;;;;;OAeG;IACH,2BAA2B,EAAE,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAAC;CAC5D"}
|