@crawlee/core 4.0.0-beta.6 → 4.0.0-beta.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. package/README.md +9 -5
  2. package/autoscaling/autoscaled_pool.d.ts +3 -5
  3. package/autoscaling/autoscaled_pool.d.ts.map +1 -1
  4. package/autoscaling/autoscaled_pool.js +3 -9
  5. package/autoscaling/autoscaled_pool.js.map +1 -1
  6. package/autoscaling/snapshotter.d.ts +3 -13
  7. package/autoscaling/snapshotter.d.ts.map +1 -1
  8. package/autoscaling/snapshotter.js +18 -29
  9. package/autoscaling/snapshotter.js.map +1 -1
  10. package/autoscaling/system_status.d.ts +0 -3
  11. package/autoscaling/system_status.d.ts.map +1 -1
  12. package/autoscaling/system_status.js +2 -3
  13. package/autoscaling/system_status.js.map +1 -1
  14. package/configuration.d.ts +85 -227
  15. package/configuration.d.ts.map +1 -1
  16. package/configuration.js +159 -223
  17. package/configuration.js.map +1 -1
  18. package/cookie_utils.d.ts +4 -2
  19. package/cookie_utils.d.ts.map +1 -1
  20. package/cookie_utils.js +18 -12
  21. package/cookie_utils.js.map +1 -1
  22. package/crawlers/context_pipeline.d.ts +71 -0
  23. package/crawlers/context_pipeline.d.ts.map +1 -0
  24. package/crawlers/context_pipeline.js +123 -0
  25. package/crawlers/context_pipeline.js.map +1 -0
  26. package/crawlers/crawler_commons.d.ts +19 -28
  27. package/crawlers/crawler_commons.d.ts.map +1 -1
  28. package/crawlers/crawler_commons.js +12 -20
  29. package/crawlers/crawler_commons.js.map +1 -1
  30. package/crawlers/crawler_utils.d.ts +2 -2
  31. package/crawlers/crawler_utils.d.ts.map +1 -1
  32. package/crawlers/crawler_utils.js +1 -1
  33. package/crawlers/crawler_utils.js.map +1 -1
  34. package/crawlers/error_snapshotter.d.ts +3 -2
  35. package/crawlers/error_snapshotter.d.ts.map +1 -1
  36. package/crawlers/error_snapshotter.js +2 -2
  37. package/crawlers/error_snapshotter.js.map +1 -1
  38. package/crawlers/error_tracker.d.ts +2 -1
  39. package/crawlers/error_tracker.d.ts.map +1 -1
  40. package/crawlers/error_tracker.js.map +1 -1
  41. package/crawlers/index.d.ts +1 -1
  42. package/crawlers/index.d.ts.map +1 -1
  43. package/crawlers/index.js +1 -1
  44. package/crawlers/index.js.map +1 -1
  45. package/crawlers/internals/types.d.ts +8 -0
  46. package/crawlers/internals/types.d.ts.map +1 -0
  47. package/crawlers/internals/types.js +2 -0
  48. package/crawlers/internals/types.js.map +1 -0
  49. package/crawlers/statistics.d.ts +15 -15
  50. package/crawlers/statistics.d.ts.map +1 -1
  51. package/crawlers/statistics.js +21 -24
  52. package/crawlers/statistics.js.map +1 -1
  53. package/enqueue_links/enqueue_links.d.ts +32 -18
  54. package/enqueue_links/enqueue_links.d.ts.map +1 -1
  55. package/enqueue_links/enqueue_links.js +45 -24
  56. package/enqueue_links/enqueue_links.js.map +1 -1
  57. package/enqueue_links/shared.d.ts +25 -8
  58. package/enqueue_links/shared.d.ts.map +1 -1
  59. package/enqueue_links/shared.js +69 -37
  60. package/enqueue_links/shared.js.map +1 -1
  61. package/errors.d.ts +33 -3
  62. package/errors.d.ts.map +1 -1
  63. package/errors.js +48 -4
  64. package/errors.js.map +1 -1
  65. package/events/event_manager.d.ts +8 -5
  66. package/events/event_manager.d.ts.map +1 -1
  67. package/events/event_manager.js +7 -9
  68. package/events/event_manager.js.map +1 -1
  69. package/events/local_event_manager.d.ts +14 -4
  70. package/events/local_event_manager.d.ts.map +1 -1
  71. package/events/local_event_manager.js +33 -39
  72. package/events/local_event_manager.js.map +1 -1
  73. package/index.d.ts +3 -2
  74. package/index.d.ts.map +1 -1
  75. package/index.js +2 -1
  76. package/index.js.map +1 -1
  77. package/log.d.ts +82 -2
  78. package/log.d.ts.map +1 -1
  79. package/log.js +102 -0
  80. package/log.js.map +1 -1
  81. package/package.json +9 -10
  82. package/proxy_configuration.d.ts +14 -148
  83. package/proxy_configuration.d.ts.map +1 -1
  84. package/proxy_configuration.js +19 -167
  85. package/proxy_configuration.js.map +1 -1
  86. package/recoverable_state.d.ts +121 -0
  87. package/recoverable_state.d.ts.map +1 -0
  88. package/recoverable_state.js +142 -0
  89. package/recoverable_state.js.map +1 -0
  90. package/request.d.ts +74 -10
  91. package/request.d.ts.map +1 -1
  92. package/request.js +85 -23
  93. package/request.js.map +1 -1
  94. package/router.d.ts.map +1 -1
  95. package/router.js.map +1 -1
  96. package/serialization.js +1 -1
  97. package/serialization.js.map +1 -1
  98. package/service_locator.d.ts +157 -0
  99. package/service_locator.d.ts.map +1 -0
  100. package/service_locator.js +234 -0
  101. package/service_locator.js.map +1 -0
  102. package/session_pool/index.d.ts +0 -1
  103. package/session_pool/index.d.ts.map +1 -1
  104. package/session_pool/index.js +0 -1
  105. package/session_pool/index.js.map +1 -1
  106. package/session_pool/session.d.ts +26 -72
  107. package/session_pool/session.d.ts.map +1 -1
  108. package/session_pool/session.js +36 -98
  109. package/session_pool/session.js.map +1 -1
  110. package/session_pool/session_pool.d.ts +65 -71
  111. package/session_pool/session_pool.d.ts.map +1 -1
  112. package/session_pool/session_pool.js +101 -100
  113. package/session_pool/session_pool.js.map +1 -1
  114. package/storages/dataset.d.ts +90 -46
  115. package/storages/dataset.d.ts.map +1 -1
  116. package/storages/dataset.js +149 -121
  117. package/storages/dataset.js.map +1 -1
  118. package/storages/index.d.ts +3 -1
  119. package/storages/index.d.ts.map +1 -1
  120. package/storages/index.js +3 -1
  121. package/storages/index.js.map +1 -1
  122. package/storages/key_value_store.d.ts +104 -22
  123. package/storages/key_value_store.d.ts.map +1 -1
  124. package/storages/key_value_store.js +166 -51
  125. package/storages/key_value_store.js.map +1 -1
  126. package/storages/request_list.d.ts +9 -9
  127. package/storages/request_list.d.ts.map +1 -1
  128. package/storages/request_list.js +13 -8
  129. package/storages/request_list.js.map +1 -1
  130. package/storages/request_list_adapter.d.ts +58 -0
  131. package/storages/request_list_adapter.d.ts.map +1 -0
  132. package/storages/request_list_adapter.js +81 -0
  133. package/storages/request_list_adapter.js.map +1 -0
  134. package/storages/request_manager_tandem.d.ts +68 -0
  135. package/storages/request_manager_tandem.d.ts.map +1 -0
  136. package/storages/request_manager_tandem.js +124 -0
  137. package/storages/request_manager_tandem.js.map +1 -0
  138. package/storages/request_provider.d.ts +87 -22
  139. package/storages/request_provider.d.ts.map +1 -1
  140. package/storages/request_provider.js +127 -77
  141. package/storages/request_provider.js.map +1 -1
  142. package/storages/request_queue.d.ts +1 -3
  143. package/storages/request_queue.d.ts.map +1 -1
  144. package/storages/request_queue.js +2 -4
  145. package/storages/request_queue.js.map +1 -1
  146. package/storages/request_queue_v2.d.ts +3 -3
  147. package/storages/request_queue_v2.d.ts.map +1 -1
  148. package/storages/request_queue_v2.js +4 -5
  149. package/storages/request_queue_v2.js.map +1 -1
  150. package/storages/sitemap_request_list.d.ts +5 -5
  151. package/storages/sitemap_request_list.d.ts.map +1 -1
  152. package/storages/sitemap_request_list.js +10 -7
  153. package/storages/sitemap_request_list.js.map +1 -1
  154. package/storages/storage_instance_manager.d.ts +91 -0
  155. package/storages/storage_instance_manager.d.ts.map +1 -0
  156. package/storages/storage_instance_manager.js +236 -0
  157. package/storages/storage_instance_manager.js.map +1 -0
  158. package/storages/utils.d.ts +47 -1
  159. package/storages/utils.d.ts.map +1 -1
  160. package/storages/utils.js +57 -5
  161. package/storages/utils.js.map +1 -1
  162. package/typedefs.d.ts +1 -1
  163. package/typedefs.d.ts.map +1 -1
  164. package/validators.d.ts +4 -0
  165. package/validators.d.ts.map +1 -1
  166. package/validators.js +4 -0
  167. package/validators.js.map +1 -1
  168. package/crawlers/crawler_extension.d.ts +0 -12
  169. package/crawlers/crawler_extension.d.ts.map +0 -1
  170. package/crawlers/crawler_extension.js +0 -14
  171. package/crawlers/crawler_extension.js.map +0 -1
  172. package/http_clients/base-http-client.d.ts +0 -134
  173. package/http_clients/base-http-client.d.ts.map +0 -1
  174. package/http_clients/base-http-client.js +0 -33
  175. package/http_clients/base-http-client.js.map +0 -1
  176. package/http_clients/form-data-like.d.ts +0 -67
  177. package/http_clients/form-data-like.d.ts.map +0 -1
  178. package/http_clients/form-data-like.js +0 -5
  179. package/http_clients/form-data-like.js.map +0 -1
  180. package/http_clients/got-scraping-http-client.d.ts +0 -15
  181. package/http_clients/got-scraping-http-client.d.ts.map +0 -1
  182. package/http_clients/got-scraping-http-client.js +0 -69
  183. package/http_clients/got-scraping-http-client.js.map +0 -1
  184. package/http_clients/index.d.ts +0 -3
  185. package/http_clients/index.d.ts.map +0 -1
  186. package/http_clients/index.js +0 -3
  187. package/http_clients/index.js.map +0 -1
  188. package/session_pool/events.d.ts +0 -3
  189. package/session_pool/events.d.ts.map +0 -1
  190. package/session_pool/events.js +0 -3
  191. package/session_pool/events.js.map +0 -1
  192. package/storages/storage_manager.d.ts +0 -58
  193. package/storages/storage_manager.d.ts.map +0 -1
  194. package/storages/storage_manager.js +0 -105
  195. package/storages/storage_manager.js.map +0 -1
  196. package/tsconfig.build.tsbuildinfo +0 -1
@@ -1,25 +1,81 @@
1
- import type { BatchAddRequestsResult, Dictionary, ProcessedRequest, QueueOperationInfo, RequestQueueClient, RequestQueueInfo, StorageClient } from '@crawlee/types';
1
+ import type { BaseHttpClient, BatchAddRequestsResult, Dictionary, ProcessedRequest, QueueOperationInfo, RequestQueueClient, RequestQueueInfo } from '@crawlee/types';
2
+ import type { ReadonlyDeep } from 'type-fest';
2
3
  import { ListDictionary, LruCache } from '@apify/datastructures';
3
- import type { Log } from '@apify/log';
4
4
  import { Configuration } from '../configuration.js';
5
+ import type { EventManager } from '../events/event_manager.js';
6
+ import type { CrawleeLogger } from '../log.js';
5
7
  import type { ProxyConfiguration } from '../proxy_configuration.js';
6
8
  import type { InternalSource, RequestOptions, Source } from '../request.js';
7
9
  import { Request } from '../request.js';
8
- import type { IStorage, StorageManagerOptions } from './storage_manager.js';
9
- export declare abstract class RequestProvider implements IStorage {
10
- readonly config: Configuration;
10
+ import type { IStorage, StorageIdentifier } from './storage_instance_manager.js';
11
+ import type { StorageOpenOptions } from './utils.js';
12
+ export type RequestsLike = AsyncIterable<Source | string> | Iterable<Source | string> | (Source | string)[];
13
+ /**
14
+ * Represents a provider of requests/URLs to crawl.
15
+ */
16
+ export interface IRequestManager {
17
+ /**
18
+ * Returns `true` if all requests were already handled and there are no more left.
19
+ */
20
+ isFinished(): Promise<boolean>;
21
+ /**
22
+ * Resolves to `true` if the next call to {@link IRequestManager.fetchNextRequest} function
23
+ * would return `null`, otherwise it resolves to `false`.
24
+ * Note that even if the provider is empty, there might be some pending requests currently being processed.
25
+ */
26
+ isEmpty(): Promise<boolean>;
27
+ /**
28
+ * Returns number of handled requests.
29
+ */
30
+ handledCount(): Promise<number>;
31
+ /**
32
+ * Get the total number of requests known to the request manager.
33
+ */
34
+ getTotalCount(): number;
35
+ /**
36
+ * Get an offline approximation of the number of pending requests.
37
+ */
38
+ getPendingCount(): number;
39
+ /**
40
+ * Gets the next {@link Request} to process.
41
+ *
42
+ * The function's `Promise` resolves to `null` if there are no more
43
+ * requests to process.
44
+ */
45
+ fetchNextRequest<T extends Dictionary = Dictionary>(): Promise<Request<T> | null>;
46
+ /**
47
+ * Can be used to iterate over the `RequestManager` instance in a `for await .. of` loop.
48
+ * Provides an alternative for the repeated use of `fetchNextRequest`.
49
+ */
50
+ [Symbol.asyncIterator](): AsyncGenerator<Request>;
51
+ /**
52
+ * Marks request as handled after successful processing.
53
+ */
54
+ markRequestHandled(request: Request): Promise<RequestQueueOperationInfo | void | null>;
55
+ /**
56
+ * Reclaims request to the provider if its processing failed.
57
+ * The request will become available in the next `fetchNextRequest()`.
58
+ */
59
+ reclaimRequest(request: Request, options?: RequestQueueOperationOptions): Promise<RequestQueueOperationInfo | null>;
60
+ addRequest(requestLike: Source, options?: RequestQueueOperationOptions): Promise<RequestQueueOperationInfo>;
61
+ addRequestsBatched(requests: RequestsLike, options?: AddRequestsBatchedOptions): Promise<AddRequestsBatchedResult>;
62
+ }
63
+ export declare abstract class RequestProvider implements IStorage, IRequestManager {
64
+ protected readonly config: Configuration;
11
65
  id: string;
12
66
  name?: string;
13
67
  timeoutSecs: number;
14
68
  clientKey: string;
15
69
  client: RequestQueueClient;
16
70
  protected proxyConfiguration?: ProxyConfiguration;
17
- log: Log;
71
+ log: CrawleeLogger;
18
72
  internalTimeoutMillis: number;
19
73
  requestLockSecs: number;
20
74
  assumedTotalCount: number;
21
75
  assumedHandledCount: number;
22
76
  private initialCount;
77
+ private initialHandledCount;
78
+ private isInitialized;
23
79
  protected queueHeadIds: ListDictionary<string>;
24
80
  protected requestCache: LruCache<RequestLruItem>;
25
81
  protected recentlyHandledRequestsCache: LruCache<boolean>;
@@ -27,6 +83,8 @@ export declare abstract class RequestProvider implements IStorage {
27
83
  protected lastActivity: Date;
28
84
  protected isFinishedCalledWhileHeadWasNotEmpty: number;
29
85
  protected inProgressRequestBatchCount: number;
86
+ protected httpClient?: BaseHttpClient;
87
+ protected readonly events: EventManager;
30
88
  constructor(options: InternalRequestProviderOptions, config?: Configuration);
31
89
  /**
32
90
  * Returns an offline approximation of the total number of requests in the queue (i.e. pending + handled).
@@ -34,6 +92,12 @@ export declare abstract class RequestProvider implements IStorage {
34
92
  * Survives restarts and actor migrations.
35
93
  */
36
94
  getTotalCount(): number;
95
+ /**
96
+ * Returns an offline approximation of the total number of pending requests in the queue.
97
+ *
98
+ * Survives restarts and Actor migrations.
99
+ */
100
+ getPendingCount(): number;
37
101
  /**
38
102
  * Adds a request to the queue.
39
103
  *
@@ -63,7 +127,7 @@ export declare abstract class RequestProvider implements IStorage {
63
127
  * Note that the function sets the `uniqueKey` and `id` fields to the passed requests if missing.
64
128
  * @param [options] Request queue operation options.
65
129
  */
66
- addRequests(requestsLike: Source[], options?: RequestQueueOperationOptions): Promise<BatchAddRequestsResult>;
130
+ addRequests(requestsLike: RequestsLike, options?: RequestQueueOperationOptions): Promise<BatchAddRequestsResult>;
67
131
  /**
68
132
  * Adds requests to the queue in batches. By default, it will resolve after the initial batch is added, and continue
69
133
  * adding the rest in the background. You can configure the batch size via `batchSize` option and the sleep time in between
@@ -73,7 +137,7 @@ export declare abstract class RequestProvider implements IStorage {
73
137
  * @param requests The requests to add
74
138
  * @param options Options for the request queue
75
139
  */
76
- addRequestsBatched(requests: (string | Source)[], options?: AddRequestsBatchedOptions): Promise<AddRequestsBatchedResult>;
140
+ addRequestsBatched(requests: ReadonlyDeep<RequestsLike>, options?: AddRequestsBatchedOptions): Promise<AddRequestsBatchedResult>;
77
141
  /**
78
142
  * Gets the request from the queue specified by ID.
79
143
  *
@@ -98,7 +162,7 @@ export declare abstract class RequestProvider implements IStorage {
98
162
  * @returns
99
163
  * Returns the request object or `null` if there are no more pending requests.
100
164
  */
101
- abstract fetchNextRequest<T extends Dictionary = Dictionary>(options?: RequestOptions): Promise<Request<T> | null>;
165
+ abstract fetchNextRequest<T extends Dictionary = Dictionary>(): Promise<Request<T> | null>;
102
166
  /**
103
167
  * Marks a request that was previously returned by the
104
168
  * {@link RequestQueue.fetchNextRequest}
@@ -142,6 +206,10 @@ export declare abstract class RequestProvider implements IStorage {
142
206
  * depending on the mode of operation.
143
207
  */
144
208
  drop(): Promise<void>;
209
+ /**
210
+ * @inheritdoc
211
+ */
212
+ [Symbol.asyncIterator](): AsyncGenerator<Request<Dictionary>, void, unknown>;
145
213
  /**
146
214
  * Returns the number of handled requests.
147
215
  *
@@ -150,23 +218,17 @@ export declare abstract class RequestProvider implements IStorage {
150
218
  * ```javascript
151
219
  * const { handledRequestCount } = await queue.getInfo();
152
220
  * ```
221
+ * @inheritdoc
153
222
  */
154
223
  handledCount(): Promise<number>;
155
224
  /**
156
225
  * Returns an object containing general information about the request queue.
157
226
  *
158
- * The function returns the same object as the Apify API Client's
159
- * [getQueue](https://docs.apify.com/api/apify-client-js/latest#ApifyClient-requestQueues)
160
- * function, which in turn calls the
161
- * [Get request queue](https://apify.com/docs/api/v2#/reference/request-queues/queue/get-request-queue)
162
- * API endpoint.
163
- *
164
227
  * **Example:**
165
228
  * ```
166
229
  * {
167
230
  * id: "WkzbQMuFYuamGv3YF",
168
231
  * name: "my-queue",
169
- * userId: "wRsJZtadYvn4mBZmm",
170
232
  * createdAt: new Date("2015-12-12T07:34:14.202Z"),
171
233
  * modifiedAt: new Date("2015-12-13T08:36:13.202Z"),
172
234
  * accessedAt: new Date("2015-12-14T08:36:13.202Z"),
@@ -175,8 +237,10 @@ export declare abstract class RequestProvider implements IStorage {
175
237
  * pendingRequestCount: 20,
176
238
  * }
177
239
  * ```
240
+ *
241
+ * @throws If the underlying storage no longer exists (e.g. it was deleted externally).
178
242
  */
179
- getInfo(): Promise<RequestQueueInfo | undefined>;
243
+ getInfo(): Promise<RequestQueueInfo>;
180
244
  /**
181
245
  * Fetches URLs from requestsFromUrl and returns them in format of list of requests
182
246
  */
@@ -200,12 +264,13 @@ export declare abstract class RequestProvider implements IStorage {
200
264
  *
201
265
  * For more details and code examples, see the {@link RequestQueue} class.
202
266
  *
203
- * @param [queueIdOrName]
204
- * ID or name of the request queue to be opened. If `null` or `undefined`,
205
- * the function returns the default request queue associated with the crawler run.
267
+ * @param [identifier]
268
+ * ID or name of the request queue to be opened. If a string is provided, it will first be
269
+ * looked up as an ID; if no such storage exists, it will be treated as a name.
270
+ * If `null` or `undefined`, the function returns the default request queue associated with the crawler run.
206
271
  * @param [options] Open Request Queue options.
207
272
  */
208
- static open(queueIdOrName?: string | null, options?: StorageManagerOptions): Promise<RequestProvider>;
273
+ static open(identifier?: string | StorageIdentifier | null, options?: StorageOpenOptions): Promise<RequestProvider>;
209
274
  }
210
275
  interface RequestLruItem {
211
276
  uniqueKey: string;
@@ -218,7 +283,7 @@ interface RequestLruItem {
218
283
  export interface RequestProviderOptions {
219
284
  id: string;
220
285
  name?: string;
221
- client: StorageClient;
286
+ client: RequestQueueClient;
222
287
  /**
223
288
  * Used to pass the proxy configuration for the `requestsFromUrl` objects.
224
289
  * Takes advantage of the internal address rotation and authentication process.
@@ -1 +1 @@
1
- {"version":3,"file":"request_provider.d.ts","sourceRoot":"","sources":["../../src/storages/request_provider.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACR,sBAAsB,EACtB,UAAU,EACV,gBAAgB,EAChB,kBAAkB,EAClB,kBAAkB,EAClB,gBAAgB,EAChB,aAAa,EAChB,MAAM,gBAAgB,CAAC;AAIxB,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AACjE,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,YAAY,CAAC;AAGtC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AACpE,OAAO,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAC5E,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAGxC,OAAO,KAAK,EAAE,QAAQ,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC;AAI5E,8BAAsB,eAAgB,YAAW,QAAQ;IAkCjD,QAAQ,CAAC,MAAM;IAjCnB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,SAAM;IACjB,SAAS,SAA0B;IACnC,MAAM,EAAE,kBAAkB,CAAC;IAC3B,SAAS,CAAC,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAElD,GAAG,EAAE,GAAG,CAAC;IACT,qBAAqB,SAAc;IACnC,eAAe,SAAU;IAIzB,iBAAiB,SAAK;IACtB,mBAAmB,SAAK;IAExB,OAAO,CAAC,YAAY,CAAK;IAEzB,SAAS,CAAC,YAAY,yBAAgC;IACtD,SAAS,CAAC,YAAY,EAAE,QAAQ,CAAC,cAAc,CAAC,CAAC;IAEjD,SAAS,CAAC,4BAA4B,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;IAE1D,SAAS,CAAC,uBAAuB,UAAS;IAE1C,SAAS,CAAC,YAAY,OAAc;IAEpC,SAAS,CAAC,oCAAoC,SAAK;IAEnD,SAAS,CAAC,2BAA2B,SAAK;gBAGtC,OAAO,EAAE,8BAA8B,EAC9B,MAAM,gBAAkC;IAsBrD;;;;OAIG;IACH,aAAa;IAIb;;;;;;;;;;;;;OAaG;IACG,UAAU,CACZ,WAAW,EAAE,MAAM,EACnB,OAAO,GAAE,4BAAiC,GAC3C,OAAO,CAAC,yBAAyB,CAAC;IAmErC;;;;;;;;;;;;;OAaG;IACG,WAAW,CACb,YAAY,EAAE,MAAM,EAAE,EACtB,OAAO,GAAE,4BAAiC,GAC3C,OAAO,CAAC,sBAAsB,CAAC;IAqGlC;;;;;;;;OAQG;IACG,kBAAkB,CACpB,QAAQ,EAAE,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,EAC7B,OAAO,GAAE,yBAA8B,GACxC,OAAO,CAAC,wBAAwB,CAAC;IAyHpC;;;;;OAKG;IACG,UAAU,CAAC,CAAC,SAAS,UAAU,GAAG,UAAU,EAAE,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;IAW3F;;;;;;;;;;;;;;;;OAgBG;IACH,QAAQ,CAAC,gBAAgB,CAAC,CAAC,SAAS,UAAU,GAAG,UAAU,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;IAElH;;;;;OAKG;IACG,kBAAkB,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,yBAAyB,GAAG,IAAI,CAAC;IAwCrF;;;;;OAKG;IACG,cAAc,CAChB,OAAO,EAAE,OAAO,EAChB,OAAO,GAAE,4BAAiC,GAC3C,OAAO,CAAC,yBAAyB,GAAG,IAAI,CAAC;IAmC5C,SAAS,CAAC,QAAQ,CAAC,oBAAoB,IAAI,OAAO,CAAC,IAAI,CAAC;IAExD;;;;;OAKG;IACG,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC;IAKjC;;;;;OAKG;IACH,QAAQ,CAAC,UAAU,IAAI,OAAO,CAAC,OAAO,CAAC;IAEvC,SAAS,CAAC,MAAM;IAShB;;OAEG;IACH,SAAS,CAAC,aAAa,CAAC,QAAQ,EAAE,MAAM,EAAE,kBAAkB,EAAE,yBAAyB,GAAG,IAAI;IAc9F;;OAEG;IACH,SAAS,CAAC,2BAA2B,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,GAAG,IAAI;IAQlF;;;OAGG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAQ3B;;;;;;;;OAQG;IACG,YAAY,IAAI,OAAO,CAAC,MAAM,CAAC;IAMrC;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACG,OAAO,IAAI,OAAO,CAAC,gBAAgB,GAAG,SAAS,CAAC;IAMtD;;OAEG;cACa,qBAAqB,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IAwBxF;;OAEG;cACa,mBAAmB,CAC/B,MAAM,EAAE,cAAc,EACtB,eAAe,EAAE,cAAc,EAAE,EACjC,OAAO,EAAE,4BAA4B;IAiBzC;;OAEG;YACW,mBAAmB;IAMjC;;;;;;;;;;;;;;;OAeG;WACU,IAAI,CAAC,aAAa,CAAC,EAAE,MAAM,GAAG,IAAI,EAAE,OAAO,GAAE,qBAA0B,GAAG,OAAO,CAAC,eAAe,CAAC;CA2BlH;AAYD,UAAU,cAAc;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,OAAO,CAAC;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,OAAO,GAAG,IAAI,CAAC;IACzB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,SAAS,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,WAAW,sBAAsB;IACnC,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,aAAa,CAAC;IAEtB;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;CAC3C;AAED;;GAEG;AACH,MAAM,WAAW,mBAAoB,SAAQ,sBAAsB;CAAG;AAEtE;;GAEG;AACH,MAAM,WAAW,8BAA+B,SAAQ,sBAAsB;IAC1E,SAAS,EAAE,MAAM,CAAC;IAClB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,8BAA8B,EAAE,MAAM,CAAC;CAC1C;AAED,MAAM,WAAW,4BAA4B;IACzC;;;;;;;;;;;;OAYG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB;;;;OAIG;IACH,KAAK,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,yBAA0B,SAAQ,kBAAkB;IACjE,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,WAAW,yBAA0B,SAAQ,4BAA4B;IAC3E;;;OAGG;IACH,2BAA2B,CAAC,EAAE,OAAO,CAAC;IAEtC;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;OAEG;IACH,wBAAwB,CAAC,EAAE,MAAM,CAAC;CACrC;AAED,MAAM,WAAW,wBAAwB;IACrC,aAAa,EAAE,gBAAgB,EAAE,CAAC;IAClC;;;;;;;;;;;;;;;OAeG;IACH,2BAA2B,EAAE,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAAC;CAC5D"}
1
+ {"version":3,"file":"request_provider.d.ts","sourceRoot":"","sources":["../../src/storages/request_provider.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACR,cAAc,EACd,sBAAsB,EACtB,UAAU,EACV,gBAAgB,EAChB,kBAAkB,EAClB,kBAAkB,EAClB,gBAAgB,EACnB,MAAM,gBAAgB,CAAC;AAWxB,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAE9C,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAGjE,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAE/D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAC/C,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AACpE,OAAO,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAC5E,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAGxC,OAAO,KAAK,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AACjF,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAIrD,MAAM,MAAM,YAAY,GAAG,aAAa,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,QAAQ,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,CAAC;AAE5G;;GAEG;AACH,MAAM,WAAW,eAAe;IAC5B;;OAEG;IACH,UAAU,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC;IAE/B;;;;OAIG;IACH,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC;IAE5B;;OAEG;IACH,YAAY,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC;IAEhC;;OAEG;IACH,aAAa,IAAI,MAAM,CAAC;IAExB;;OAEG;IACH,eAAe,IAAI,MAAM,CAAC;IAE1B;;;;;OAKG;IACH,gBAAgB,CAAC,CAAC,SAAS,UAAU,GAAG,UAAU,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC;IAElF;;;OAGG;IACH,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,cAAc,CAAC,OAAO,CAAC,CAAC;IAElD;;OAEG;IACH,kBAAkB,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,yBAAyB,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC;IAEvF;;;OAGG;IACH,cAAc,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,EAAE,4BAA4B,GAAG,OAAO,CAAC,yBAAyB,GAAG,IAAI,CAAC,CAAC;IAEpH,UAAU,CAAC,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,4BAA4B,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAC;IAE5G,kBAAkB,CAAC,QAAQ,EAAE,YAAY,EAAE,OAAO,CAAC,EAAE,yBAAyB,GAAG,OAAO,CAAC,wBAAwB,CAAC,CAAC;CACtH;AAED,8BAAsB,eAAgB,YAAW,QAAQ,EAAE,eAAe;IAwClE,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,aAAa;IAvC5C,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,SAAM;IACjB,SAAS,SAA0B;IACnC,MAAM,EAAE,kBAAkB,CAAC;IAC3B,SAAS,CAAC,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAElD,GAAG,EAAE,aAAa,CAAC;IACnB,qBAAqB,SAAc;IACnC,eAAe,SAAU;IAIzB,iBAAiB,SAAK;IACtB,mBAAmB,SAAK;IAExB,OAAO,CAAC,YAAY,CAAK;IACzB,OAAO,CAAC,mBAAmB,CAAK;IAChC,OAAO,CAAC,aAAa,CAAS;IAE9B,SAAS,CAAC,YAAY,yBAAgC;IACtD,SAAS,CAAC,YAAY,EAAE,QAAQ,CAAC,cAAc,CAAC,CAAC;IAEjD,SAAS,CAAC,4BAA4B,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;IAE1D,SAAS,CAAC,uBAAuB,UAAS;IAE1C,SAAS,CAAC,YAAY,OAAc;IAEpC,SAAS,CAAC,oCAAoC,SAAK;IAEnD,SAAS,CAAC,2BAA2B,SAAK;IAE1C,SAAS,CAAC,UAAU,CAAC,EAAE,cAAc,CAAC;IAEtC,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,YAAY,CAAC;gBAGpC,OAAO,EAAE,8BAA8B,EACpB,MAAM,GAAE,aAA+C;IAoB9E;;;;OAIG;IACH,aAAa;IAIb;;;;OAIG;IACH,eAAe;IAIf;;;;;;;;;;;;;OAaG;IACG,UAAU,CACZ,WAAW,EAAE,MAAM,EACnB,OAAO,GAAE,4BAAiC,GAC3C,OAAO,CAAC,yBAAyB,CAAC;IAmErC;;;;;;;;;;;;;OAaG;IACG,WAAW,CACb,YAAY,EAAE,YAAY,EAC1B,OAAO,GAAE,4BAAiC,GAC3C,OAAO,CAAC,sBAAsB,CAAC;IA4GlC;;;;;;;;OAQG;IACG,kBAAkB,CACpB,QAAQ,EAAE,YAAY,CAAC,YAAY,CAAC,EACpC,OAAO,GAAE,yBAA8B,GACxC,OAAO,CAAC,wBAAwB,CAAC;IAmIpC;;;;;OAKG;IACG,UAAU,CAAC,CAAC,SAAS,UAAU,GAAG,UAAU,EAAE,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;IAW3F;;;;;;;;;;;;;;;;OAgBG;IACH,QAAQ,CAAC,gBAAgB,CAAC,CAAC,SAAS,UAAU,GAAG,UAAU,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;IAE1F;;;;;OAKG;IACG,kBAAkB,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,yBAAyB,GAAG,IAAI,CAAC;IAwCrF;;;;;OAKG;IACG,cAAc,CAChB,OAAO,EAAE,OAAO,EAChB,OAAO,GAAE,4BAAiC,GAC3C,OAAO,CAAC,yBAAyB,GAAG,IAAI,CAAC;IAmC5C,SAAS,CAAC,QAAQ,CAAC,oBAAoB,IAAI,OAAO,CAAC,IAAI,CAAC;IAExD;;;;;OAKG;IACG,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC;IAKjC;;;;;OAKG;IACH,QAAQ,CAAC,UAAU,IAAI,OAAO,CAAC,OAAO,CAAC;IAEvC,SAAS,CAAC,MAAM;IAShB;;OAEG;IACH,SAAS,CAAC,aAAa,CAAC,QAAQ,EAAE,MAAM,EAAE,kBAAkB,EAAE,yBAAyB,GAAG,IAAI;IAc9F;;OAEG;IACH,SAAS,CAAC,2BAA2B,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,GAAG,IAAI;IAQlF;;;OAGG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAO3B;;OAEG;IACI,CAAC,MAAM,CAAC,aAAa,CAAC;IAQ7B;;;;;;;;;OASG;IACG,YAAY,IAAI,OAAO,CAAC,MAAM,CAAC;IAMrC;;;;;;;;;;;;;;;;;;OAkBG;IACG,OAAO,IAAI,OAAO,CAAC,gBAAgB,CAAC;IAM1C;;OAEG;cACa,qBAAqB,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IAwBxF;;OAEG;cACa,mBAAmB,CAC/B,MAAM,EAAE,cAAc,EACtB,eAAe,EAAE,cAAc,EAAE,EACjC,OAAO,EAAE,4BAA4B;IAiBzC;;OAEG;YACW,mBAAmB;IAWjC;;;;;;;;;;;;;;;;OAgBG;WACU,IAAI,CACb,UAAU,CAAC,EAAE,MAAM,GAAG,iBAAiB,GAAG,IAAI,EAC9C,OAAO,GAAE,kBAAuB,GACjC,OAAO,CAAC,eAAe,CAAC;CAmD9B;AAYD,UAAU,cAAc;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,OAAO,CAAC;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,OAAO,GAAG,IAAI,CAAC;IACzB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,SAAS,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,WAAW,sBAAsB;IACnC,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,kBAAkB,CAAC;IAE3B;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;CAC3C;AAED;;GAEG;AACH,MAAM,WAAW,mBAAoB,SAAQ,sBAAsB;CAAG;AAEtE;;GAEG;AACH,MAAM,WAAW,8BAA+B,SAAQ,sBAAsB;IAC1E,SAAS,EAAE,MAAM,CAAC;IAClB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,8BAA8B,EAAE,MAAM,CAAC;CAC1C;AAED,MAAM,WAAW,4BAA4B;IACzC;;;;;;;;;;;;OAYG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB;;;;OAIG;IACH,KAAK,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,yBAA0B,SAAQ,kBAAkB;IACjE,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,WAAW,yBAA0B,SAAQ,4BAA4B;IAC3E;;;OAGG;IACH,2BAA2B,CAAC,EAAE,OAAO,CAAC;IAEtC;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;OAEG;IACH,wBAAwB,CAAC,EAAE,MAAM,CAAC;CACrC;AAED,MAAM,WAAW,wBAAwB;IACrC,aAAa,EAAE,gBAAgB,EAAE,CAAC;IAClC;;;;;;;;;;;;;;;OAeG;IACH,2BAA2B,EAAE,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAAC;CAC5D"}
@@ -1,13 +1,13 @@
1
1
  import { inspect } from 'node:util';
2
- import { chunk, downloadListOfUrls, sleep } from '@crawlee/utils';
2
+ import { chunkedAsyncIterable, downloadListOfUrls, getObjectType, isAsyncIterable, isIterable, peekableAsyncIterable, sleep, } from '@crawlee/utils';
3
3
  import ow from 'ow';
4
4
  import { ListDictionary, LruCache } from '@apify/datastructures';
5
5
  import { cryptoRandomObjectId } from '@apify/utilities';
6
6
  import { Configuration } from '../configuration.js';
7
- import { log } from '../log.js';
8
7
  import { Request } from '../request.js';
8
+ import { serviceLocator } from '../service_locator.js';
9
9
  import { checkStorageAccess } from './access_checking.js';
10
- import { StorageManager } from './storage_manager.js';
10
+ import { resolveStorageIdentifier } from './storage_instance_manager.js';
11
11
  import { getRequestId, purgeDefaultStorages, QUERY_HEAD_MIN_LENGTH } from './utils.js';
12
12
  export class RequestProvider {
13
13
  config;
@@ -25,6 +25,8 @@ export class RequestProvider {
25
25
  assumedTotalCount = 0;
26
26
  assumedHandledCount = 0;
27
27
  initialCount = 0;
28
+ initialHandledCount = 0; // We track this separately from `assumedHandledCount` which is used non-trivially by RequestQueueV1
29
+ isInitialized = false;
28
30
  queueHeadIds = new ListDictionary();
29
31
  requestCache;
30
32
  recentlyHandledRequestsCache;
@@ -32,20 +34,21 @@ export class RequestProvider {
32
34
  lastActivity = new Date();
33
35
  isFinishedCalledWhileHeadWasNotEmpty = 0;
34
36
  inProgressRequestBatchCount = 0;
37
+ httpClient;
38
+ events;
35
39
  constructor(options, config = Configuration.getGlobalConfig()) {
36
40
  this.config = config;
37
41
  this.id = options.id;
38
42
  this.name = options.name;
39
- this.client = options.client.requestQueue(this.id, {
40
- clientKey: this.clientKey,
41
- timeoutSecs: this.timeoutSecs,
42
- });
43
+ this.events = serviceLocator.getEventManager();
44
+ this.client = options.client;
43
45
  this.proxyConfiguration = options.proxyConfiguration;
44
46
  this.requestCache = new LruCache({ maxLength: options.requestCacheMaxSize });
45
47
  this.recentlyHandledRequestsCache = new LruCache({ maxLength: options.recentlyHandledRequestsMaxSize });
46
- this.log = log.child({ prefix: `${options.logPrefix}(${this.id}, ${this.name ?? 'no-name'})` });
47
- const eventManager = config.getEventManager();
48
- eventManager.on("migrating" /* EventType.MIGRATING */, async () => {
48
+ this.log = serviceLocator
49
+ .getLogger()
50
+ .child({ prefix: `${options.logPrefix}(${this.id}, ${this.name ?? 'no-name'})` });
51
+ this.events.on("migrating" /* EventType.MIGRATING */, async () => {
49
52
  this.queuePausedForMigration = true;
50
53
  });
51
54
  }
@@ -57,6 +60,14 @@ export class RequestProvider {
57
60
  getTotalCount() {
58
61
  return this.assumedTotalCount + this.initialCount;
59
62
  }
63
+ /**
64
+ * Returns an offline approximation of the total number of pending requests in the queue.
65
+ *
66
+ * Survives restarts and Actor migrations.
67
+ */
68
+ getPendingCount() {
69
+ return this.getTotalCount() - this.initialHandledCount - this.assumedHandledCount;
70
+ }
60
71
  /**
61
72
  * Adds a request to the queue.
62
73
  *
@@ -134,7 +145,9 @@ export class RequestProvider {
134
145
  async addRequests(requestsLike, options = {}) {
135
146
  checkStorageAccess();
136
147
  this.lastActivity = new Date();
137
- ow(requestsLike, ow.array);
148
+ ow(requestsLike, ow.object
149
+ .is((value) => isIterable(value) || isAsyncIterable(value))
150
+ .message((value) => `Expected an iterable or async iterable, got ${getObjectType(value)}`));
138
151
  ow(options, ow.object.exactShape({
139
152
  forefront: ow.optional.boolean,
140
153
  cache: ow.optional.boolean,
@@ -153,17 +166,19 @@ export class RequestProvider {
153
166
  processedRequests: [],
154
167
  unprocessedRequests: [],
155
168
  };
156
- for (const requestLike of requestsLike) {
157
- if ('requestsFromUrl' in requestLike) {
158
- const requests = await this._fetchRequestsFromUrl(requestLike);
159
- await this._addFetchedRequests(requestLike, requests, options);
169
+ const requests = [];
170
+ for await (const requestLike of requestsLike) {
171
+ if (typeof requestLike === 'string') {
172
+ requests.push(new Request({ url: requestLike }));
173
+ }
174
+ else if ('requestsFromUrl' in requestLike) {
175
+ const fetchedRequests = await this._fetchRequestsFromUrl(requestLike);
176
+ await this._addFetchedRequests(requestLike, fetchedRequests, options);
177
+ }
178
+ else {
179
+ requests.push(requestLike instanceof Request ? requestLike : new Request(requestLike));
160
180
  }
161
181
  }
162
- const requests = requestsLike
163
- .filter((requestLike) => !('requestsFromUrl' in requestLike))
164
- .map((requestLike) => {
165
- return requestLike instanceof Request ? requestLike : new Request(requestLike);
166
- });
167
182
  const requestsToAdd = new Map();
168
183
  for (const request of requests) {
169
184
  const cacheKey = getCachedRequestId(request.uniqueKey);
@@ -219,43 +234,44 @@ export class RequestProvider {
219
234
  async addRequestsBatched(requests, options = {}) {
220
235
  checkStorageAccess();
221
236
  this.lastActivity = new Date();
237
+ ow(requests, ow.object
238
+ .is((value) => isIterable(value) || isAsyncIterable(value))
239
+ .message((value) => `Expected an iterable or async iterable, got ${getObjectType(value)}`));
222
240
  ow(options, ow.object.exactShape({
223
241
  forefront: ow.optional.boolean,
224
242
  waitForAllRequestsToBeAdded: ow.optional.boolean,
225
243
  batchSize: ow.optional.number,
226
244
  waitBetweenBatchesMillis: ow.optional.number,
227
245
  }));
228
- // The `requests` array can be huge, and `ow` is very slow for anything more complex.
229
- // This explicit iteration takes a few milliseconds, while the ow check can take tens of seconds.
230
- // ow(requests, ow.array.ofType(ow.any(
231
- // ow.string,
232
- // ow.object.partialShape({ url: ow.string, id: ow.undefined }),
233
- // ow.object.partialShape({ requestsFromUrl: ow.string, regex: ow.optional.regExp }),
234
- // )));
235
- for (const request of requests) {
236
- if (typeof request === 'string') {
237
- continue;
238
- }
239
- if (typeof request === 'object' && request !== null) {
240
- if (typeof request.url === 'string' && typeof request.id === 'undefined') {
241
- continue;
246
+ const addRequest = this.addRequest.bind(this);
247
+ async function* generateRequests() {
248
+ for await (const opts of requests) {
249
+ // Validate the input
250
+ if (typeof opts === 'object' && opts !== null) {
251
+ if (opts.url !== undefined && typeof opts.url !== 'string') {
252
+ throw new Error(`Request options are not valid, the 'url' property is not a string. Input: ${inspect(opts)}`);
253
+ }
254
+ if (opts.id !== undefined) {
255
+ throw new Error(`Request options are not valid, the 'id' property must not be present. Input: ${inspect(opts)}`);
256
+ }
257
+ if (opts.requestsFromUrl !== undefined &&
258
+ typeof opts.requestsFromUrl !== 'string') {
259
+ throw new Error(`Request options are not valid, the 'requestsFromUrl' property is not a string. Input: ${inspect(opts)}`);
260
+ }
242
261
  }
243
- if (typeof request.requestsFromUrl === 'string') {
244
- continue;
262
+ if (opts && typeof opts === 'object' && 'requestsFromUrl' in opts) {
263
+ // Handle URL lists right away
264
+ await addRequest(opts, { forefront: options.forefront });
265
+ }
266
+ else {
267
+ // Yield valid requests
268
+ yield typeof opts === 'string' ? { url: opts } : opts;
245
269
  }
246
270
  }
247
- throw new Error(`Request options are not valid, provide either a URL or an object with 'url' property (but without 'id' property), or an object with 'requestsFromUrl' property. Input: ${inspect(request)}`);
248
271
  }
249
272
  const { batchSize = 1000, waitBetweenBatchesMillis = 1000 } = options;
250
- const sources = [];
251
- for (const opts of requests) {
252
- if (opts && typeof opts === 'object' && 'requestsFromUrl' in opts) {
253
- await this.addRequest(opts, { forefront: options.forefront });
254
- }
255
- else {
256
- sources.push(typeof opts === 'string' ? { url: opts } : opts);
257
- }
258
- }
273
+ const chunks = peekableAsyncIterable(chunkedAsyncIterable(generateRequests(), batchSize));
274
+ const chunksIterator = chunks[Symbol.asyncIterator]();
259
275
  const attemptToAddToQueueAndAddAnyUnprocessed = async (providedRequests, cache = true) => {
260
276
  const resultsToReturn = [];
261
277
  const apiResult = await this.addRequests(providedRequests, { forefront: options.forefront, cache });
@@ -266,11 +282,15 @@ export class RequestProvider {
266
282
  }
267
283
  return resultsToReturn;
268
284
  };
269
- const initialChunk = sources.splice(0, batchSize);
270
285
  // Add initial batch of `batchSize` to process them right away
286
+ const initialChunk = await chunksIterator.peek();
287
+ if (initialChunk === undefined) {
288
+ return { addedRequests: [], waitForAllRequestsToBeAdded: Promise.resolve([]) };
289
+ }
271
290
  const addedRequests = await attemptToAddToQueueAndAddAnyUnprocessed(initialChunk);
272
- // If we have no more requests to add, return early
273
- if (!sources.length) {
291
+ await chunksIterator.next();
292
+ // If we have no more requests to add, return immediately
293
+ if ((await chunksIterator.peek()) === undefined) {
274
294
  return {
275
295
  addedRequests,
276
296
  waitForAllRequestsToBeAdded: Promise.resolve([]),
@@ -278,9 +298,8 @@ export class RequestProvider {
278
298
  }
279
299
  // eslint-disable-next-line no-async-promise-executor
280
300
  const promise = new Promise(async (resolve) => {
281
- const chunks = chunk(sources, batchSize);
282
301
  const finalAddedRequests = [];
283
- for (const requestChunk of chunks) {
302
+ for await (const requestChunk of chunks) {
284
303
  finalAddedRequests.push(...(await attemptToAddToQueueAndAddAnyUnprocessed(requestChunk, false)));
285
304
  await sleep(waitBetweenBatchesMillis);
286
305
  }
@@ -425,9 +444,19 @@ export class RequestProvider {
425
444
  */
426
445
  async drop() {
427
446
  checkStorageAccess();
428
- await this.client.delete();
429
- const manager = StorageManager.getManager(this.constructor, this.config);
430
- manager.closeStorage(this);
447
+ await this.client.drop();
448
+ serviceLocator.getStorageInstanceManager().removeFromCache(this);
449
+ }
450
+ /**
451
+ * @inheritdoc
452
+ */
453
+ async *[Symbol.asyncIterator]() {
454
+ while (true) {
455
+ const req = await this.fetchNextRequest();
456
+ if (!req)
457
+ break;
458
+ yield req;
459
+ }
431
460
  }
432
461
  /**
433
462
  * Returns the number of handled requests.
@@ -437,27 +466,21 @@ export class RequestProvider {
437
466
  * ```javascript
438
467
  * const { handledRequestCount } = await queue.getInfo();
439
468
  * ```
469
+ * @inheritdoc
440
470
  */
441
471
  async handledCount() {
442
472
  // NOTE: We keep this function for compatibility with RequestList.handledCount()
443
- const { handledRequestCount } = (await this.getInfo()) ?? {};
444
- return handledRequestCount ?? 0;
473
+ const { handledRequestCount } = await this.getInfo();
474
+ return handledRequestCount;
445
475
  }
446
476
  /**
447
477
  * Returns an object containing general information about the request queue.
448
478
  *
449
- * The function returns the same object as the Apify API Client's
450
- * [getQueue](https://docs.apify.com/api/apify-client-js/latest#ApifyClient-requestQueues)
451
- * function, which in turn calls the
452
- * [Get request queue](https://apify.com/docs/api/v2#/reference/request-queues/queue/get-request-queue)
453
- * API endpoint.
454
- *
455
479
  * **Example:**
456
480
  * ```
457
481
  * {
458
482
  * id: "WkzbQMuFYuamGv3YF",
459
483
  * name: "my-queue",
460
- * userId: "wRsJZtadYvn4mBZmm",
461
484
  * createdAt: new Date("2015-12-12T07:34:14.202Z"),
462
485
  * modifiedAt: new Date("2015-12-13T08:36:13.202Z"),
463
486
  * accessedAt: new Date("2015-12-14T08:36:13.202Z"),
@@ -466,10 +489,12 @@ export class RequestProvider {
466
489
  * pendingRequestCount: 20,
467
490
  * }
468
491
  * ```
492
+ *
493
+ * @throws If the underlying storage no longer exists (e.g. it was deleted externally).
469
494
  */
470
495
  async getInfo() {
471
496
  checkStorageAccess();
472
- return this.client.get();
497
+ return this.client.getMetadata();
473
498
  }
474
499
  /**
475
500
  * Fetches URLs from requestsFromUrl and returns them in format of list of requests
@@ -515,7 +540,10 @@ export class RequestProvider {
515
540
  * @internal wraps public utility for mocking purposes
516
541
  */
517
542
  async _downloadListOfUrls(options) {
518
- return downloadListOfUrls(options);
543
+ return downloadListOfUrls({
544
+ ...options,
545
+ httpClient: this.httpClient,
546
+ });
519
547
  }
520
548
  /**
521
549
  * Opens a request queue and returns a promise resolving to an instance
@@ -528,27 +556,49 @@ export class RequestProvider {
528
556
  *
529
557
  * For more details and code examples, see the {@link RequestQueue} class.
530
558
  *
531
- * @param [queueIdOrName]
532
- * ID or name of the request queue to be opened. If `null` or `undefined`,
533
- * the function returns the default request queue associated with the crawler run.
559
+ * @param [identifier]
560
+ * ID or name of the request queue to be opened. If a string is provided, it will first be
561
+ * looked up as an ID; if no such storage exists, it will be treated as a name.
562
+ * If `null` or `undefined`, the function returns the default request queue associated with the crawler run.
534
563
  * @param [options] Open Request Queue options.
535
564
  */
536
- static async open(queueIdOrName, options = {}) {
565
+ static async open(identifier, options = {}) {
537
566
  checkStorageAccess();
538
- ow(queueIdOrName, ow.optional.any(ow.string, ow.null));
539
567
  ow(options, ow.object.exactShape({
540
568
  config: ow.optional.object.instanceOf(Configuration),
541
569
  storageClient: ow.optional.object,
542
570
  proxyConfiguration: ow.optional.object,
571
+ httpClient: ow.optional.object,
543
572
  }));
544
- options.config ??= Configuration.getGlobalConfig();
545
- options.storageClient ??= options.config.getStorageClient();
546
- await purgeDefaultStorages({ onlyPurgeOnce: true, client: options.storageClient, config: options.config });
547
- const manager = StorageManager.getManager(this, options.config);
548
- const queue = await manager.openStorage(queueIdOrName, options.storageClient);
573
+ const client = options.storageClient ?? serviceLocator.getStorageClient();
574
+ const config = options.config ?? serviceLocator.getConfiguration();
575
+ await purgeDefaultStorages({ onlyPurgeOnce: true, client, config });
576
+ const resolved = await resolveStorageIdentifier(identifier, client, 'RequestQueue');
577
+ const queue = await serviceLocator
578
+ .getStorageInstanceManager()
579
+ .openStorage(this, {
580
+ ...resolved,
581
+ clientOpener: () => client.createRequestQueueClient(resolved),
582
+ clientCacheKey: client.getStorageClientCacheKey?.() ?? client.constructor.name,
583
+ });
549
584
  queue.proxyConfiguration = options.proxyConfiguration;
550
- // eslint-disable-next-line dot-notation
551
- queue['initialCount'] = (await queue.client.get())?.totalRequestCount ?? 0;
585
+ queue.httpClient = options.httpClient;
586
+ if (!queue.isInitialized) {
587
+ // Re-create the request queue client with clientKey and timeoutSecs so that
588
+ // request locking works correctly for API-backed implementations.
589
+ // TODO: clientKey/timeoutSecs are Apify-platform concerns and should eventually be pushed
590
+ // down into the Apify SDK's client implementation, aligning with crawlee-python's approach
591
+ // where locking is handled internally by the client (see crawlee-python PR #1194).
592
+ queue.client = await client.createRequestQueueClient({
593
+ id: queue.id,
594
+ clientKey: queue.clientKey,
595
+ timeoutSecs: queue.timeoutSecs,
596
+ });
597
+ const queueInfo = await queue.client.getMetadata();
598
+ queue.initialCount = queueInfo.totalRequestCount;
599
+ queue.initialHandledCount = queueInfo.handledRequestCount;
600
+ queue.isInitialized = true;
601
+ }
552
602
  return queue;
553
603
  }
554
604
  }