@crawlee/core 4.0.0-beta.64 → 4.0.0-beta.66
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/crawlers/crawler_commons.d.ts +3 -3
- package/crawlers/crawler_commons.d.ts.map +1 -1
- package/enqueue_links/enqueue_links.d.ts +7 -6
- package/enqueue_links/enqueue_links.d.ts.map +1 -1
- package/enqueue_links/enqueue_links.js +4 -4
- package/enqueue_links/enqueue_links.js.map +1 -1
- package/package.json +5 -5
- package/storages/index.d.ts +4 -6
- package/storages/index.d.ts.map +1 -1
- package/storages/index.js +2 -6
- package/storages/index.js.map +1 -1
- package/storages/request_list.d.ts +23 -72
- package/storages/request_list.d.ts.map +1 -1
- package/storages/request_list.js +34 -29
- package/storages/request_list.js.map +1 -1
- package/storages/request_loader.d.ts +97 -0
- package/storages/request_loader.d.ts.map +1 -0
- package/storages/request_loader.js +2 -0
- package/storages/request_loader.js.map +1 -0
- package/storages/request_manager.d.ts +34 -0
- package/storages/request_manager.d.ts.map +1 -0
- package/storages/request_manager.js +2 -0
- package/storages/request_manager.js.map +1 -0
- package/storages/request_manager_tandem.d.ts +56 -17
- package/storages/request_manager_tandem.d.ts.map +1 -1
- package/storages/request_manager_tandem.js +114 -41
- package/storages/request_manager_tandem.js.map +1 -1
- package/storages/request_queue.d.ts +276 -44
- package/storages/request_queue.d.ts.map +1 -1
- package/storages/request_queue.js +576 -212
- package/storages/request_queue.js.map +1 -1
- package/storages/{sitemap_request_list.d.ts → sitemap_request_loader.d.ts} +24 -19
- package/storages/sitemap_request_loader.d.ts.map +1 -0
- package/storages/{sitemap_request_list.js → sitemap_request_loader.js} +41 -40
- package/storages/sitemap_request_loader.js.map +1 -0
- package/validators.d.ts +4 -0
- package/validators.d.ts.map +1 -1
- package/validators.js +4 -0
- package/validators.js.map +1 -1
- package/storages/request_list_adapter.d.ts +0 -58
- package/storages/request_list_adapter.d.ts.map +0 -1
- package/storages/request_list_adapter.js +0 -81
- package/storages/request_list_adapter.js.map +0 -1
- package/storages/request_provider.d.ts +0 -384
- package/storages/request_provider.d.ts.map +0 -1
- package/storages/request_provider.js +0 -624
- package/storages/request_provider.js.map +0 -1
- package/storages/request_queue_v2.d.ts +0 -87
- package/storages/request_queue_v2.d.ts.map +0 -1
- package/storages/request_queue_v2.js +0 -437
- package/storages/request_queue_v2.js.map +0 -1
- package/storages/sitemap_request_list.d.ts.map +0 -1
- package/storages/sitemap_request_list.js.map +0 -1
|
@@ -1,384 +0,0 @@
|
|
|
1
|
-
import type { BaseHttpClient, BatchAddRequestsResult, Dictionary, ProcessedRequest, QueueOperationInfo, RequestQueueClient, RequestQueueInfo } from '@crawlee/types';
|
|
2
|
-
import type { ReadonlyDeep } from 'type-fest';
|
|
3
|
-
import { ListDictionary, LruCache } from '@apify/datastructures';
|
|
4
|
-
import { Configuration } from '../configuration.js';
|
|
5
|
-
import type { EventManager } from '../events/event_manager.js';
|
|
6
|
-
import type { CrawleeLogger } from '../log.js';
|
|
7
|
-
import type { ProxyConfiguration } from '../proxy_configuration.js';
|
|
8
|
-
import type { InternalSource, RequestOptions, Source } from '../request.js';
|
|
9
|
-
import { Request } from '../request.js';
|
|
10
|
-
import type { IStorage, StorageIdentifier } from './storage_instance_manager.js';
|
|
11
|
-
import type { StorageOpenOptions } from './utils.js';
|
|
12
|
-
export type RequestsLike = AsyncIterable<Source | string> | Iterable<Source | string> | (Source | string)[];
|
|
13
|
-
/**
|
|
14
|
-
* Represents a provider of requests/URLs to crawl.
|
|
15
|
-
*/
|
|
16
|
-
export interface IRequestManager {
|
|
17
|
-
/**
|
|
18
|
-
* Returns `true` if all requests were already handled and there are no more left.
|
|
19
|
-
*/
|
|
20
|
-
isFinished(): Promise<boolean>;
|
|
21
|
-
/**
|
|
22
|
-
* Resolves to `true` if the next call to {@link IRequestManager.fetchNextRequest} function
|
|
23
|
-
* would return `null`, otherwise it resolves to `false`.
|
|
24
|
-
* Note that even if the provider is empty, there might be some pending requests currently being processed.
|
|
25
|
-
*/
|
|
26
|
-
isEmpty(): Promise<boolean>;
|
|
27
|
-
/**
|
|
28
|
-
* Returns number of handled requests.
|
|
29
|
-
*/
|
|
30
|
-
handledCount(): Promise<number>;
|
|
31
|
-
/**
|
|
32
|
-
* Get the total number of requests known to the request manager.
|
|
33
|
-
*/
|
|
34
|
-
getTotalCount(): number;
|
|
35
|
-
/**
|
|
36
|
-
* Get an offline approximation of the number of pending requests.
|
|
37
|
-
*/
|
|
38
|
-
getPendingCount(): number;
|
|
39
|
-
/**
|
|
40
|
-
* Gets the next {@link Request} to process.
|
|
41
|
-
*
|
|
42
|
-
* The function's `Promise` resolves to `null` if there are no more
|
|
43
|
-
* requests to process.
|
|
44
|
-
*/
|
|
45
|
-
fetchNextRequest<T extends Dictionary = Dictionary>(): Promise<Request<T> | null>;
|
|
46
|
-
/**
|
|
47
|
-
* Can be used to iterate over the `RequestManager` instance in a `for await .. of` loop.
|
|
48
|
-
* Provides an alternative for the repeated use of `fetchNextRequest`.
|
|
49
|
-
*/
|
|
50
|
-
[Symbol.asyncIterator](): AsyncGenerator<Request>;
|
|
51
|
-
/**
|
|
52
|
-
* Marks request as handled after successful processing.
|
|
53
|
-
*/
|
|
54
|
-
markRequestHandled(request: Request): Promise<RequestQueueOperationInfo | void | null>;
|
|
55
|
-
/**
|
|
56
|
-
* Reclaims request to the provider if its processing failed.
|
|
57
|
-
* The request will become available in the next `fetchNextRequest()`.
|
|
58
|
-
*/
|
|
59
|
-
reclaimRequest(request: Request, options?: RequestQueueOperationOptions): Promise<RequestQueueOperationInfo | null>;
|
|
60
|
-
addRequest(requestLike: Source, options?: RequestQueueOperationOptions): Promise<RequestQueueOperationInfo>;
|
|
61
|
-
addRequestsBatched(requests: RequestsLike, options?: AddRequestsBatchedOptions): Promise<AddRequestsBatchedResult>;
|
|
62
|
-
/**
|
|
63
|
-
* Remove all requests from the queue but keep the queue itself, resetting it
|
|
64
|
-
* so it can be reused (e.g. across multiple `crawler.run()` calls).
|
|
65
|
-
*
|
|
66
|
-
* Implementations that do not support purging may leave this `undefined`.
|
|
67
|
-
*/
|
|
68
|
-
purge?(): Promise<void>;
|
|
69
|
-
}
|
|
70
|
-
export declare abstract class RequestProvider implements IStorage, IRequestManager {
|
|
71
|
-
protected readonly config: Configuration;
|
|
72
|
-
id: string;
|
|
73
|
-
name?: string;
|
|
74
|
-
timeoutSecs: number;
|
|
75
|
-
clientKey: string;
|
|
76
|
-
client: RequestQueueClient;
|
|
77
|
-
protected proxyConfiguration?: ProxyConfiguration;
|
|
78
|
-
log: CrawleeLogger;
|
|
79
|
-
internalTimeoutMillis: number;
|
|
80
|
-
requestLockSecs: number;
|
|
81
|
-
assumedTotalCount: number;
|
|
82
|
-
assumedHandledCount: number;
|
|
83
|
-
private initialCount;
|
|
84
|
-
private initialHandledCount;
|
|
85
|
-
private isInitialized;
|
|
86
|
-
protected queueHeadIds: ListDictionary<string>;
|
|
87
|
-
protected requestCache: LruCache<RequestLruItem>;
|
|
88
|
-
protected recentlyHandledRequestsCache: LruCache<boolean>;
|
|
89
|
-
protected queuePausedForMigration: boolean;
|
|
90
|
-
protected lastActivity: Date;
|
|
91
|
-
protected isFinishedCalledWhileHeadWasNotEmpty: number;
|
|
92
|
-
protected inProgressRequestBatchCount: number;
|
|
93
|
-
protected httpClient?: BaseHttpClient;
|
|
94
|
-
protected readonly events: EventManager;
|
|
95
|
-
constructor(options: InternalRequestProviderOptions, config?: Configuration);
|
|
96
|
-
/**
|
|
97
|
-
* Returns an offline approximation of the total number of requests in the queue (i.e. pending + handled).
|
|
98
|
-
*
|
|
99
|
-
* Survives restarts and actor migrations.
|
|
100
|
-
*/
|
|
101
|
-
getTotalCount(): number;
|
|
102
|
-
/**
|
|
103
|
-
* Returns an offline approximation of the total number of pending requests in the queue.
|
|
104
|
-
*
|
|
105
|
-
* Survives restarts and Actor migrations.
|
|
106
|
-
*/
|
|
107
|
-
getPendingCount(): number;
|
|
108
|
-
/**
|
|
109
|
-
* Adds a request to the queue.
|
|
110
|
-
*
|
|
111
|
-
* If a request with the same `uniqueKey` property is already present in the queue,
|
|
112
|
-
* it will not be updated. You can find out whether this happened from the resulting
|
|
113
|
-
* {@link QueueOperationInfo} object.
|
|
114
|
-
*
|
|
115
|
-
* To add multiple requests to the queue by extracting links from a webpage,
|
|
116
|
-
* see the {@link enqueueLinks} helper function.
|
|
117
|
-
*
|
|
118
|
-
* @param requestLike {@link Request} object or vanilla object with request data.
|
|
119
|
-
* Note that the function sets the `uniqueKey` and `id` fields to the passed Request.
|
|
120
|
-
* @param [options] Request queue operation options.
|
|
121
|
-
*/
|
|
122
|
-
addRequest(requestLike: Source, options?: RequestQueueOperationOptions): Promise<RequestQueueOperationInfo>;
|
|
123
|
-
/**
|
|
124
|
-
* Adds requests to the queue in batches of 25. This method will wait till all the requests are added
|
|
125
|
-
* to the queue before resolving. You should prefer using `queue.addRequestsBatched()` or `crawler.addRequests()`
|
|
126
|
-
* if you don't want to block the processing, as those methods will only wait for the initial 1000 requests,
|
|
127
|
-
* start processing right after that happens, and continue adding more in the background.
|
|
128
|
-
*
|
|
129
|
-
* If a request passed in is already present due to its `uniqueKey` property being the same,
|
|
130
|
-
* it will not be updated. You can find out whether this happened by finding the request in the resulting
|
|
131
|
-
* {@link BatchAddRequestsResult} object.
|
|
132
|
-
*
|
|
133
|
-
* @param requestsLike {@link Request} objects or vanilla objects with request data.
|
|
134
|
-
* Note that the function sets the `uniqueKey` and `id` fields to the passed requests if missing.
|
|
135
|
-
* @param [options] Request queue operation options.
|
|
136
|
-
*/
|
|
137
|
-
addRequests(requestsLike: RequestsLike, options?: RequestQueueOperationOptions): Promise<BatchAddRequestsResult>;
|
|
138
|
-
/**
|
|
139
|
-
* Adds requests to the queue in batches. By default, it will resolve after the initial batch is added, and continue
|
|
140
|
-
* adding the rest in the background. You can configure the batch size via `batchSize` option and the sleep time in between
|
|
141
|
-
* the batches via `waitBetweenBatchesMillis`. If you want to wait for all batches to be added to the queue, you can use
|
|
142
|
-
* the `waitForAllRequestsToBeAdded` promise you get in the response object.
|
|
143
|
-
*
|
|
144
|
-
* @param requests The requests to add
|
|
145
|
-
* @param options Options for the request queue
|
|
146
|
-
*/
|
|
147
|
-
addRequestsBatched(requests: ReadonlyDeep<RequestsLike>, options?: AddRequestsBatchedOptions): Promise<AddRequestsBatchedResult>;
|
|
148
|
-
/**
|
|
149
|
-
* Gets the request from the queue specified by ID.
|
|
150
|
-
*
|
|
151
|
-
* @param id ID of the request.
|
|
152
|
-
* @returns Returns the request object, or `null` if it was not found.
|
|
153
|
-
*/
|
|
154
|
-
getRequest<T extends Dictionary = Dictionary>(id: string): Promise<Request<T> | null>;
|
|
155
|
-
/**
|
|
156
|
-
* Returns a next request in the queue to be processed, or `null` if there are no more pending requests.
|
|
157
|
-
*
|
|
158
|
-
* Once you successfully finish processing of the request, you need to call
|
|
159
|
-
* {@link RequestQueue.markRequestHandled}
|
|
160
|
-
* to mark the request as handled in the queue. If there was some error in processing the request,
|
|
161
|
-
* call {@link RequestQueue.reclaimRequest} instead,
|
|
162
|
-
* so that the queue will give the request to some other consumer in another call to the `fetchNextRequest` function.
|
|
163
|
-
*
|
|
164
|
-
* Note that the `null` return value doesn't mean the queue processing finished,
|
|
165
|
-
* it means there are currently no pending requests.
|
|
166
|
-
* To check whether all requests in queue were finished,
|
|
167
|
-
* use {@link RequestQueue.isFinished} instead.
|
|
168
|
-
*
|
|
169
|
-
* @returns
|
|
170
|
-
* Returns the request object or `null` if there are no more pending requests.
|
|
171
|
-
*/
|
|
172
|
-
abstract fetchNextRequest<T extends Dictionary = Dictionary>(): Promise<Request<T> | null>;
|
|
173
|
-
/**
|
|
174
|
-
* Marks a request that was previously returned by the
|
|
175
|
-
* {@link RequestQueue.fetchNextRequest}
|
|
176
|
-
* function as handled after successful processing.
|
|
177
|
-
* Handled requests will never again be returned by the `fetchNextRequest` function.
|
|
178
|
-
*/
|
|
179
|
-
markRequestHandled(request: Request): Promise<RequestQueueOperationInfo | null>;
|
|
180
|
-
/**
|
|
181
|
-
* Reclaims a failed request back to the queue, so that it can be returned for processing later again
|
|
182
|
-
* by another call to {@link RequestQueue.fetchNextRequest}.
|
|
183
|
-
* The request record in the queue is updated using the provided `request` parameter.
|
|
184
|
-
* For example, this lets you store the number of retries or error messages for the request.
|
|
185
|
-
*/
|
|
186
|
-
reclaimRequest(request: Request, options?: RequestQueueOperationOptions): Promise<RequestQueueOperationInfo | null>;
|
|
187
|
-
protected abstract ensureHeadIsNonEmpty(): Promise<void>;
|
|
188
|
-
/**
|
|
189
|
-
* Resolves to `true` if the next call to {@link RequestQueue.fetchNextRequest}
|
|
190
|
-
* would return `null`, otherwise it resolves to `false`.
|
|
191
|
-
* Note that even if the queue is empty, there might be some pending requests currently being processed.
|
|
192
|
-
* If you need to ensure that there is no activity in the queue, use {@link RequestQueue.isFinished}.
|
|
193
|
-
*/
|
|
194
|
-
isEmpty(): Promise<boolean>;
|
|
195
|
-
/**
|
|
196
|
-
* Resolves to `true` if all requests were already handled and there are no more left.
|
|
197
|
-
* Due to the nature of distributed storage used by the queue,
|
|
198
|
-
* the function may occasionally return a false negative,
|
|
199
|
-
* but it shall never return a false positive.
|
|
200
|
-
*/
|
|
201
|
-
abstract isFinished(): Promise<boolean>;
|
|
202
|
-
protected _reset(): void;
|
|
203
|
-
/**
|
|
204
|
-
* Caches information about request to beware of unneeded addRequest() calls.
|
|
205
|
-
*/
|
|
206
|
-
protected _cacheRequest(cacheKey: string, queueOperationInfo: RequestQueueOperationInfo): void;
|
|
207
|
-
/**
|
|
208
|
-
* Adds a request straight to the queueHeadDict, to improve performance.
|
|
209
|
-
*/
|
|
210
|
-
protected _maybeAddRequestToQueueHead(requestId: string, forefront: boolean): void;
|
|
211
|
-
/**
|
|
212
|
-
* Removes the queue either from the Apify Cloud storage or from the local database,
|
|
213
|
-
* depending on the mode of operation.
|
|
214
|
-
*/
|
|
215
|
-
drop(): Promise<void>;
|
|
216
|
-
/**
|
|
217
|
-
* Remove all requests from the queue but keep the queue itself, resetting it
|
|
218
|
-
* so it can be reused (e.g. across multiple `crawler.run()` calls).
|
|
219
|
-
*/
|
|
220
|
-
purge(): Promise<void>;
|
|
221
|
-
/**
|
|
222
|
-
* @inheritdoc
|
|
223
|
-
*/
|
|
224
|
-
[Symbol.asyncIterator](): AsyncGenerator<Request<Dictionary>, void, unknown>;
|
|
225
|
-
/**
|
|
226
|
-
* Returns the number of handled requests.
|
|
227
|
-
*
|
|
228
|
-
* This function is just a convenient shortcut for:
|
|
229
|
-
*
|
|
230
|
-
* ```javascript
|
|
231
|
-
* const { handledRequestCount } = await queue.getInfo();
|
|
232
|
-
* ```
|
|
233
|
-
* @inheritdoc
|
|
234
|
-
*/
|
|
235
|
-
handledCount(): Promise<number>;
|
|
236
|
-
/**
|
|
237
|
-
* Returns an object containing general information about the request queue.
|
|
238
|
-
*
|
|
239
|
-
* **Example:**
|
|
240
|
-
* ```
|
|
241
|
-
* {
|
|
242
|
-
* id: "WkzbQMuFYuamGv3YF",
|
|
243
|
-
* name: "my-queue",
|
|
244
|
-
* createdAt: new Date("2015-12-12T07:34:14.202Z"),
|
|
245
|
-
* modifiedAt: new Date("2015-12-13T08:36:13.202Z"),
|
|
246
|
-
* accessedAt: new Date("2015-12-14T08:36:13.202Z"),
|
|
247
|
-
* totalRequestCount: 25,
|
|
248
|
-
* handledRequestCount: 5,
|
|
249
|
-
* pendingRequestCount: 20,
|
|
250
|
-
* }
|
|
251
|
-
* ```
|
|
252
|
-
*
|
|
253
|
-
* @throws If the underlying storage no longer exists (e.g. it was deleted externally).
|
|
254
|
-
*/
|
|
255
|
-
getInfo(): Promise<RequestQueueInfo>;
|
|
256
|
-
/**
|
|
257
|
-
* Fetches URLs from requestsFromUrl and returns them in format of list of requests
|
|
258
|
-
*/
|
|
259
|
-
protected _fetchRequestsFromUrl(source: InternalSource): Promise<RequestOptions[]>;
|
|
260
|
-
/**
|
|
261
|
-
* Adds all fetched requests from a URL from a remote resource.
|
|
262
|
-
*/
|
|
263
|
-
protected _addFetchedRequests(source: InternalSource, fetchedRequests: RequestOptions[], options: RequestQueueOperationOptions): Promise<ProcessedRequest[]>;
|
|
264
|
-
/**
|
|
265
|
-
* @internal wraps public utility for mocking purposes
|
|
266
|
-
*/
|
|
267
|
-
private _downloadListOfUrls;
|
|
268
|
-
/**
|
|
269
|
-
* Opens a request queue and returns a promise resolving to an instance
|
|
270
|
-
* of the {@link RequestQueue} class.
|
|
271
|
-
*
|
|
272
|
-
* {@link RequestQueue} represents a queue of URLs to crawl, which is stored either on local filesystem or in the cloud.
|
|
273
|
-
* The queue is used for deep crawling of websites, where you start with several URLs and then
|
|
274
|
-
* recursively follow links to other pages. The data structure supports both breadth-first
|
|
275
|
-
* and depth-first crawling orders.
|
|
276
|
-
*
|
|
277
|
-
* For more details and code examples, see the {@link RequestQueue} class.
|
|
278
|
-
*
|
|
279
|
-
* @param [identifier]
|
|
280
|
-
* ID or name of the request queue to be opened. If a string is provided, it will first be
|
|
281
|
-
* looked up as an ID; if no such storage exists, it will be treated as a name.
|
|
282
|
-
* If `null` or `undefined`, the function returns the default request queue associated with the crawler run.
|
|
283
|
-
* @param [options] Open Request Queue options.
|
|
284
|
-
*/
|
|
285
|
-
static open(identifier?: string | StorageIdentifier | null, options?: StorageOpenOptions): Promise<RequestProvider>;
|
|
286
|
-
}
|
|
287
|
-
interface RequestLruItem {
|
|
288
|
-
uniqueKey: string;
|
|
289
|
-
isHandled: boolean;
|
|
290
|
-
id: string;
|
|
291
|
-
hydrated: Request | null;
|
|
292
|
-
lockExpiresAt: number | null;
|
|
293
|
-
forefront: boolean;
|
|
294
|
-
}
|
|
295
|
-
export interface RequestProviderOptions {
|
|
296
|
-
id: string;
|
|
297
|
-
name?: string;
|
|
298
|
-
client: RequestQueueClient;
|
|
299
|
-
/**
|
|
300
|
-
* Used to pass the proxy configuration for the `requestsFromUrl` objects.
|
|
301
|
-
* Takes advantage of the internal address rotation and authentication process.
|
|
302
|
-
* If undefined, the `requestsFromUrl` requests will be made without proxy.
|
|
303
|
-
*/
|
|
304
|
-
proxyConfiguration?: ProxyConfiguration;
|
|
305
|
-
}
|
|
306
|
-
/**
|
|
307
|
-
* @deprecated Use {@link RequestProviderOptions} instead.
|
|
308
|
-
*/
|
|
309
|
-
export interface RequestQueueOptions extends RequestProviderOptions {
|
|
310
|
-
}
|
|
311
|
-
/**
|
|
312
|
-
* @internal
|
|
313
|
-
*/
|
|
314
|
-
export interface InternalRequestProviderOptions extends RequestProviderOptions {
|
|
315
|
-
logPrefix: string;
|
|
316
|
-
requestCacheMaxSize: number;
|
|
317
|
-
recentlyHandledRequestsMaxSize: number;
|
|
318
|
-
}
|
|
319
|
-
export interface RequestQueueOperationOptions {
|
|
320
|
-
/**
|
|
321
|
-
* If set to `true`:
|
|
322
|
-
* - while adding the request to the queue: the request will be added to the foremost position in the queue.
|
|
323
|
-
* - while reclaiming the request: the request will be placed to the beginning of the queue, so that it's returned
|
|
324
|
-
* in the next call to {@link RequestQueue.fetchNextRequest}.
|
|
325
|
-
* By default, it's put to the end of the queue.
|
|
326
|
-
*
|
|
327
|
-
* In case the request is already present in the queue, this option has no effect.
|
|
328
|
-
*
|
|
329
|
-
* If more requests are added with this option at once, their order in the following `fetchNextRequest` call
|
|
330
|
-
* is arbitrary.
|
|
331
|
-
* @default false
|
|
332
|
-
*/
|
|
333
|
-
forefront?: boolean;
|
|
334
|
-
/**
|
|
335
|
-
* Should the requests be added to the local LRU cache?
|
|
336
|
-
* @default false
|
|
337
|
-
* @internal
|
|
338
|
-
*/
|
|
339
|
-
cache?: boolean;
|
|
340
|
-
}
|
|
341
|
-
/**
|
|
342
|
-
* @internal
|
|
343
|
-
*/
|
|
344
|
-
export interface RequestQueueOperationInfo extends QueueOperationInfo {
|
|
345
|
-
uniqueKey: string;
|
|
346
|
-
forefront: boolean;
|
|
347
|
-
}
|
|
348
|
-
export interface AddRequestsBatchedOptions extends RequestQueueOperationOptions {
|
|
349
|
-
/**
|
|
350
|
-
* Whether to wait for all the provided requests to be added, instead of waiting just for the initial batch of up to `batchSize`.
|
|
351
|
-
* @default false
|
|
352
|
-
*/
|
|
353
|
-
waitForAllRequestsToBeAdded?: boolean;
|
|
354
|
-
/**
|
|
355
|
-
* @default 1000
|
|
356
|
-
*/
|
|
357
|
-
batchSize?: number;
|
|
358
|
-
/**
|
|
359
|
-
* @default 1000
|
|
360
|
-
*/
|
|
361
|
-
waitBetweenBatchesMillis?: number;
|
|
362
|
-
}
|
|
363
|
-
export interface AddRequestsBatchedResult {
|
|
364
|
-
addedRequests: ProcessedRequest[];
|
|
365
|
-
/**
|
|
366
|
-
* A promise which will resolve with the rest of the requests that were added to the queue.
|
|
367
|
-
*
|
|
368
|
-
* Alternatively, we can set {@link AddRequestsBatchedOptions.waitForAllRequestsToBeAdded|`waitForAllRequestsToBeAdded`} to `true`
|
|
369
|
-
* in the {@link BasicCrawler.addRequests|`crawler.addRequests()`} options.
|
|
370
|
-
*
|
|
371
|
-
* **Example:**
|
|
372
|
-
*
|
|
373
|
-
* ```ts
|
|
374
|
-
* // Assuming `requests` is a list of requests.
|
|
375
|
-
* const result = await crawler.addRequests(requests);
|
|
376
|
-
*
|
|
377
|
-
* // If we want to wait for the rest of the requests to be added to the queue:
|
|
378
|
-
* await result.waitForAllRequestsToBeAdded;
|
|
379
|
-
* ```
|
|
380
|
-
*/
|
|
381
|
-
waitForAllRequestsToBeAdded: Promise<ProcessedRequest[]>;
|
|
382
|
-
}
|
|
383
|
-
export {};
|
|
384
|
-
//# sourceMappingURL=request_provider.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"request_provider.d.ts","sourceRoot":"","sources":["../../src/storages/request_provider.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACR,cAAc,EACd,sBAAsB,EACtB,UAAU,EACV,gBAAgB,EAChB,kBAAkB,EAClB,kBAAkB,EAClB,gBAAgB,EACnB,MAAM,gBAAgB,CAAC;AAWxB,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAE9C,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAGjE,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAE/D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAC/C,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AACpE,OAAO,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAC5E,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAGxC,OAAO,KAAK,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AACjF,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAIrD,MAAM,MAAM,YAAY,GAAG,aAAa,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,QAAQ,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,CAAC;AAE5G;;GAEG;AACH,MAAM,WAAW,eAAe;IAC5B;;OAEG;IACH,UAAU,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC;IAE/B;;;;OAIG;IACH,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC;IAE5B;;OAEG;IACH,YAAY,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC;IAEhC;;OAEG;IACH,aAAa,IAAI,MAAM,CAAC;IAExB;;OAEG;IACH,eAAe,IAAI,MAAM,CAAC;IAE1B;;;;;OAKG;IACH,gBAAgB,CAAC,CAAC,SAAS,UAAU,GAAG,UAAU,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC;IAElF;;;OAGG;IACH,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,cAAc,CAAC,OAAO,CAAC,CAAC;IAElD;;OAEG;IACH,kBAAkB,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,yBAAyB,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC;IAEvF;;;OAGG;IACH,cAAc,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,EAAE,4BAA4B,GAAG,OAAO,CAAC,yBAAyB,GAAG,IAAI,CAAC,CAAC;IAEpH,UAAU,CAAC,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,4BAA4B,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAC;IAE5G,kBAAkB,CAAC,QAAQ,EAAE,YAAY,EAAE,OAAO,CAAC,EAAE,yBAAyB,GAAG,OAAO,CAAC,wBAAwB,CAAC,CAAC;IAEnH;;;;;OAKG;IACH,KAAK,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC3B;AAED,8BAAsB,eAAgB,YAAW,QAAQ,EAAE,eAAe;IAwClE,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,aAAa;IAvC5C,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,SAAM;IACjB,SAAS,SAA0B;IACnC,MAAM,EAAE,kBAAkB,CAAC;IAC3B,SAAS,CAAC,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAElD,GAAG,EAAE,aAAa,CAAC;IACnB,qBAAqB,SAAc;IACnC,eAAe,SAAU;IAIzB,iBAAiB,SAAK;IACtB,mBAAmB,SAAK;IAExB,OAAO,CAAC,YAAY,CAAK;IACzB,OAAO,CAAC,mBAAmB,CAAK;IAChC,OAAO,CAAC,aAAa,CAAS;IAE9B,SAAS,CAAC,YAAY,yBAAgC;IACtD,SAAS,CAAC,YAAY,EAAE,QAAQ,CAAC,cAAc,CAAC,CAAC;IAEjD,SAAS,CAAC,4BAA4B,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;IAE1D,SAAS,CAAC,uBAAuB,UAAS;IAE1C,SAAS,CAAC,YAAY,OAAc;IAEpC,SAAS,CAAC,oCAAoC,SAAK;IAEnD,SAAS,CAAC,2BAA2B,SAAK;IAE1C,SAAS,CAAC,UAAU,CAAC,EAAE,cAAc,CAAC;IAEtC,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,YAAY,CAAC;gBAGpC,OAAO,EAAE,8BAA8B,EACpB,MAAM,GAAE,aAA+C;IAoB9E;;;;OAIG;IACH,aAAa;IAIb;;;;OAIG;IACH,eAAe;IAIf;;;;;;;;;;;;;OAaG;IACG,UAAU,CACZ,WAAW,EAAE,MAAM,EACnB,OAAO,GAAE,4BAAiC,GAC3C,OAAO,CAAC,yBAAyB,CAAC;IAmErC;;;;;;;;;;;;;OAaG;IACG,WAAW,CACb,YAAY,EAAE,YAAY,EAC1B,OAAO,GAAE,4BAAiC,GAC3C,OAAO,CAAC,sBAAsB,CAAC;IA4GlC;;;;;;;;OAQG;IACG,kBAAkB,CACpB,QAAQ,EAAE,YAAY,CAAC,YAAY,CAAC,EACpC,OAAO,GAAE,yBAA8B,GACxC,OAAO,CAAC,wBAAwB,CAAC;IAmIpC;;;;;OAKG;IACG,UAAU,CAAC,CAAC,SAAS,UAAU,GAAG,UAAU,EAAE,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;IAW3F;;;;;;;;;;;;;;;;OAgBG;IACH,QAAQ,CAAC,gBAAgB,CAAC,CAAC,SAAS,UAAU,GAAG,UAAU,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;IAE1F;;;;;OAKG;IACG,kBAAkB,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,yBAAyB,GAAG,IAAI,CAAC;IAwCrF;;;;;OAKG;IACG,cAAc,CAChB,OAAO,EAAE,OAAO,EAChB,OAAO,GAAE,4BAAiC,GAC3C,OAAO,CAAC,yBAAyB,GAAG,IAAI,CAAC;IAmC5C,SAAS,CAAC,QAAQ,CAAC,oBAAoB,IAAI,OAAO,CAAC,IAAI,CAAC;IAExD;;;;;OAKG;IACG,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC;IAKjC;;;;;OAKG;IACH,QAAQ,CAAC,UAAU,IAAI,OAAO,CAAC,OAAO,CAAC;IAEvC,SAAS,CAAC,MAAM;IAShB;;OAEG;IACH,SAAS,CAAC,aAAa,CAAC,QAAQ,EAAE,MAAM,EAAE,kBAAkB,EAAE,yBAAyB,GAAG,IAAI;IAc9F;;OAEG;IACH,SAAS,CAAC,2BAA2B,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,GAAG,IAAI;IAQlF;;;OAGG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAO3B;;;OAGG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAkB5B;;OAEG;IACI,CAAC,MAAM,CAAC,aAAa,CAAC;IAQ7B;;;;;;;;;OASG;IACG,YAAY,IAAI,OAAO,CAAC,MAAM,CAAC;IAMrC;;;;;;;;;;;;;;;;;;OAkBG;IACG,OAAO,IAAI,OAAO,CAAC,gBAAgB,CAAC;IAM1C;;OAEG;cACa,qBAAqB,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IAwBxF;;OAEG;cACa,mBAAmB,CAC/B,MAAM,EAAE,cAAc,EACtB,eAAe,EAAE,cAAc,EAAE,EACjC,OAAO,EAAE,4BAA4B;IAiBzC;;OAEG;YACW,mBAAmB;IAWjC;;;;;;;;;;;;;;;;OAgBG;WACU,IAAI,CACb,UAAU,CAAC,EAAE,MAAM,GAAG,iBAAiB,GAAG,IAAI,EAC9C,OAAO,GAAE,kBAAuB,GACjC,OAAO,CAAC,eAAe,CAAC;CAmD9B;AAYD,UAAU,cAAc;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,OAAO,CAAC;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,OAAO,GAAG,IAAI,CAAC;IACzB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,SAAS,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,WAAW,sBAAsB;IACnC,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,kBAAkB,CAAC;IAE3B;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;CAC3C;AAED;;GAEG;AACH,MAAM,WAAW,mBAAoB,SAAQ,sBAAsB;CAAG;AAEtE;;GAEG;AACH,MAAM,WAAW,8BAA+B,SAAQ,sBAAsB;IAC1E,SAAS,EAAE,MAAM,CAAC;IAClB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,8BAA8B,EAAE,MAAM,CAAC;CAC1C;AAED,MAAM,WAAW,4BAA4B;IACzC;;;;;;;;;;;;OAYG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB;;;;OAIG;IACH,KAAK,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,yBAA0B,SAAQ,kBAAkB;IACjE,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,WAAW,yBAA0B,SAAQ,4BAA4B;IAC3E;;;OAGG;IACH,2BAA2B,CAAC,EAAE,OAAO,CAAC;IAEtC;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;OAEG;IACH,wBAAwB,CAAC,EAAE,MAAM,CAAC;CACrC;AAED,MAAM,WAAW,wBAAwB;IACrC,aAAa,EAAE,gBAAgB,EAAE,CAAC;IAClC;;;;;;;;;;;;;;;OAeG;IACH,2BAA2B,EAAE,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAAC;CAC5D"}
|