@crawlee/core 4.0.0-beta.6 → 4.0.0-beta.61
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -5
- package/autoscaling/autoscaled_pool.d.ts +3 -5
- package/autoscaling/autoscaled_pool.d.ts.map +1 -1
- package/autoscaling/autoscaled_pool.js +3 -9
- package/autoscaling/autoscaled_pool.js.map +1 -1
- package/autoscaling/snapshotter.d.ts +3 -13
- package/autoscaling/snapshotter.d.ts.map +1 -1
- package/autoscaling/snapshotter.js +18 -29
- package/autoscaling/snapshotter.js.map +1 -1
- package/autoscaling/system_status.d.ts +0 -3
- package/autoscaling/system_status.d.ts.map +1 -1
- package/autoscaling/system_status.js +2 -3
- package/autoscaling/system_status.js.map +1 -1
- package/configuration.d.ts +85 -227
- package/configuration.d.ts.map +1 -1
- package/configuration.js +159 -223
- package/configuration.js.map +1 -1
- package/cookie_utils.d.ts +4 -2
- package/cookie_utils.d.ts.map +1 -1
- package/cookie_utils.js +18 -12
- package/cookie_utils.js.map +1 -1
- package/crawlers/context_pipeline.d.ts +71 -0
- package/crawlers/context_pipeline.d.ts.map +1 -0
- package/crawlers/context_pipeline.js +123 -0
- package/crawlers/context_pipeline.js.map +1 -0
- package/crawlers/crawler_commons.d.ts +19 -28
- package/crawlers/crawler_commons.d.ts.map +1 -1
- package/crawlers/crawler_commons.js +12 -20
- package/crawlers/crawler_commons.js.map +1 -1
- package/crawlers/crawler_utils.d.ts +2 -2
- package/crawlers/crawler_utils.d.ts.map +1 -1
- package/crawlers/crawler_utils.js +1 -1
- package/crawlers/crawler_utils.js.map +1 -1
- package/crawlers/error_snapshotter.d.ts +3 -2
- package/crawlers/error_snapshotter.d.ts.map +1 -1
- package/crawlers/error_snapshotter.js +2 -2
- package/crawlers/error_snapshotter.js.map +1 -1
- package/crawlers/error_tracker.d.ts +2 -1
- package/crawlers/error_tracker.d.ts.map +1 -1
- package/crawlers/error_tracker.js.map +1 -1
- package/crawlers/index.d.ts +1 -1
- package/crawlers/index.d.ts.map +1 -1
- package/crawlers/index.js +1 -1
- package/crawlers/index.js.map +1 -1
- package/crawlers/internals/types.d.ts +8 -0
- package/crawlers/internals/types.d.ts.map +1 -0
- package/crawlers/internals/types.js +2 -0
- package/crawlers/internals/types.js.map +1 -0
- package/crawlers/statistics.d.ts +15 -15
- package/crawlers/statistics.d.ts.map +1 -1
- package/crawlers/statistics.js +21 -24
- package/crawlers/statistics.js.map +1 -1
- package/enqueue_links/enqueue_links.d.ts +32 -18
- package/enqueue_links/enqueue_links.d.ts.map +1 -1
- package/enqueue_links/enqueue_links.js +45 -24
- package/enqueue_links/enqueue_links.js.map +1 -1
- package/enqueue_links/shared.d.ts +25 -8
- package/enqueue_links/shared.d.ts.map +1 -1
- package/enqueue_links/shared.js +69 -37
- package/enqueue_links/shared.js.map +1 -1
- package/errors.d.ts +33 -3
- package/errors.d.ts.map +1 -1
- package/errors.js +48 -4
- package/errors.js.map +1 -1
- package/events/event_manager.d.ts +8 -5
- package/events/event_manager.d.ts.map +1 -1
- package/events/event_manager.js +7 -9
- package/events/event_manager.js.map +1 -1
- package/events/local_event_manager.d.ts +14 -4
- package/events/local_event_manager.d.ts.map +1 -1
- package/events/local_event_manager.js +33 -39
- package/events/local_event_manager.js.map +1 -1
- package/index.d.ts +3 -2
- package/index.d.ts.map +1 -1
- package/index.js +2 -1
- package/index.js.map +1 -1
- package/log.d.ts +82 -2
- package/log.d.ts.map +1 -1
- package/log.js +102 -0
- package/log.js.map +1 -1
- package/package.json +9 -10
- package/proxy_configuration.d.ts +14 -148
- package/proxy_configuration.d.ts.map +1 -1
- package/proxy_configuration.js +19 -167
- package/proxy_configuration.js.map +1 -1
- package/recoverable_state.d.ts +121 -0
- package/recoverable_state.d.ts.map +1 -0
- package/recoverable_state.js +142 -0
- package/recoverable_state.js.map +1 -0
- package/request.d.ts +74 -10
- package/request.d.ts.map +1 -1
- package/request.js +85 -23
- package/request.js.map +1 -1
- package/router.d.ts.map +1 -1
- package/router.js.map +1 -1
- package/serialization.js +1 -1
- package/serialization.js.map +1 -1
- package/service_locator.d.ts +157 -0
- package/service_locator.d.ts.map +1 -0
- package/service_locator.js +234 -0
- package/service_locator.js.map +1 -0
- package/session_pool/index.d.ts +0 -1
- package/session_pool/index.d.ts.map +1 -1
- package/session_pool/index.js +0 -1
- package/session_pool/index.js.map +1 -1
- package/session_pool/session.d.ts +26 -72
- package/session_pool/session.d.ts.map +1 -1
- package/session_pool/session.js +36 -98
- package/session_pool/session.js.map +1 -1
- package/session_pool/session_pool.d.ts +65 -71
- package/session_pool/session_pool.d.ts.map +1 -1
- package/session_pool/session_pool.js +101 -100
- package/session_pool/session_pool.js.map +1 -1
- package/storages/dataset.d.ts +90 -46
- package/storages/dataset.d.ts.map +1 -1
- package/storages/dataset.js +149 -121
- package/storages/dataset.js.map +1 -1
- package/storages/index.d.ts +3 -1
- package/storages/index.d.ts.map +1 -1
- package/storages/index.js +3 -1
- package/storages/index.js.map +1 -1
- package/storages/key_value_store.d.ts +104 -22
- package/storages/key_value_store.d.ts.map +1 -1
- package/storages/key_value_store.js +166 -51
- package/storages/key_value_store.js.map +1 -1
- package/storages/request_list.d.ts +9 -9
- package/storages/request_list.d.ts.map +1 -1
- package/storages/request_list.js +13 -8
- package/storages/request_list.js.map +1 -1
- package/storages/request_list_adapter.d.ts +58 -0
- package/storages/request_list_adapter.d.ts.map +1 -0
- package/storages/request_list_adapter.js +81 -0
- package/storages/request_list_adapter.js.map +1 -0
- package/storages/request_manager_tandem.d.ts +68 -0
- package/storages/request_manager_tandem.d.ts.map +1 -0
- package/storages/request_manager_tandem.js +124 -0
- package/storages/request_manager_tandem.js.map +1 -0
- package/storages/request_provider.d.ts +87 -22
- package/storages/request_provider.d.ts.map +1 -1
- package/storages/request_provider.js +127 -77
- package/storages/request_provider.js.map +1 -1
- package/storages/request_queue.d.ts +1 -3
- package/storages/request_queue.d.ts.map +1 -1
- package/storages/request_queue.js +2 -4
- package/storages/request_queue.js.map +1 -1
- package/storages/request_queue_v2.d.ts +3 -3
- package/storages/request_queue_v2.d.ts.map +1 -1
- package/storages/request_queue_v2.js +4 -5
- package/storages/request_queue_v2.js.map +1 -1
- package/storages/sitemap_request_list.d.ts +5 -5
- package/storages/sitemap_request_list.d.ts.map +1 -1
- package/storages/sitemap_request_list.js +10 -7
- package/storages/sitemap_request_list.js.map +1 -1
- package/storages/storage_instance_manager.d.ts +91 -0
- package/storages/storage_instance_manager.d.ts.map +1 -0
- package/storages/storage_instance_manager.js +236 -0
- package/storages/storage_instance_manager.js.map +1 -0
- package/storages/utils.d.ts +47 -1
- package/storages/utils.d.ts.map +1 -1
- package/storages/utils.js +57 -5
- package/storages/utils.js.map +1 -1
- package/typedefs.d.ts +1 -1
- package/typedefs.d.ts.map +1 -1
- package/validators.d.ts +4 -0
- package/validators.d.ts.map +1 -1
- package/validators.js +4 -0
- package/validators.js.map +1 -1
- package/crawlers/crawler_extension.d.ts +0 -12
- package/crawlers/crawler_extension.d.ts.map +0 -1
- package/crawlers/crawler_extension.js +0 -14
- package/crawlers/crawler_extension.js.map +0 -1
- package/http_clients/base-http-client.d.ts +0 -134
- package/http_clients/base-http-client.d.ts.map +0 -1
- package/http_clients/base-http-client.js +0 -33
- package/http_clients/base-http-client.js.map +0 -1
- package/http_clients/form-data-like.d.ts +0 -67
- package/http_clients/form-data-like.d.ts.map +0 -1
- package/http_clients/form-data-like.js +0 -5
- package/http_clients/form-data-like.js.map +0 -1
- package/http_clients/got-scraping-http-client.d.ts +0 -15
- package/http_clients/got-scraping-http-client.d.ts.map +0 -1
- package/http_clients/got-scraping-http-client.js +0 -69
- package/http_clients/got-scraping-http-client.js.map +0 -1
- package/http_clients/index.d.ts +0 -3
- package/http_clients/index.d.ts.map +0 -1
- package/http_clients/index.js +0 -3
- package/http_clients/index.js.map +0 -1
- package/session_pool/events.d.ts +0 -3
- package/session_pool/events.d.ts.map +0 -1
- package/session_pool/events.js +0 -3
- package/session_pool/events.js.map +0 -1
- package/storages/storage_manager.d.ts +0 -58
- package/storages/storage_manager.d.ts.map +0 -1
- package/storages/storage_manager.js +0 -105
- package/storages/storage_manager.js.map +0 -1
- package/tsconfig.build.tsbuildinfo +0 -1
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import { AsyncQueue } from '@sapphire/async-queue';
|
|
2
|
+
/** Reserved alias for the default (unnamed) storage. */
|
|
3
|
+
const DEFAULT_STORAGE_ALIAS = '__default__';
|
|
4
|
+
/**
|
|
5
|
+
* Three-tier cache for storage instances, modelled after crawlee-python's `_StorageCache`.
|
|
6
|
+
*
|
|
7
|
+
* Each tier maps `[storageClass][key][clientCacheKey] → instance`:
|
|
8
|
+
* - `byId` — keyed by the backend-assigned storage id
|
|
9
|
+
* - `byName` — keyed by the persistent storage name
|
|
10
|
+
* - `byAlias` — keyed by a run-scoped alias (e.g. `'__default__'` for unnamed storages)
|
|
11
|
+
*/
|
|
12
|
+
class StorageCache {
|
|
13
|
+
byId = new Map();
|
|
14
|
+
byName = new Map();
|
|
15
|
+
byAlias = new Map();
|
|
16
|
+
get(cls, { id, name, alias, clientCacheKey, }) {
|
|
17
|
+
for (const [tier, key] of [
|
|
18
|
+
[this.byId, id],
|
|
19
|
+
[this.byName, name],
|
|
20
|
+
[this.byAlias, alias],
|
|
21
|
+
]) {
|
|
22
|
+
if (key === undefined)
|
|
23
|
+
continue;
|
|
24
|
+
const cached = tier.get(cls)?.get(key)?.get(clientCacheKey);
|
|
25
|
+
if (cached) {
|
|
26
|
+
if (cached instanceof cls) {
|
|
27
|
+
return cached;
|
|
28
|
+
}
|
|
29
|
+
throw new Error('Cached storage instance type mismatch.');
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return undefined;
|
|
33
|
+
}
|
|
34
|
+
/** Write a single entry into a given tier. */
|
|
35
|
+
setInMap(tier, cls, key, instance, clientCacheKey) {
|
|
36
|
+
if (!tier.has(cls))
|
|
37
|
+
tier.set(cls, new Map());
|
|
38
|
+
const keyMap = tier.get(cls);
|
|
39
|
+
if (!keyMap.has(key))
|
|
40
|
+
keyMap.set(key, new Map());
|
|
41
|
+
keyMap.get(key).set(clientCacheKey, instance);
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Cache an instance under its actual id, name, and an optional alias.
|
|
45
|
+
*/
|
|
46
|
+
set(cls, instance, clientCacheKey, alias) {
|
|
47
|
+
// Always cache by id.
|
|
48
|
+
this.setInMap(this.byId, cls, instance.id, instance, clientCacheKey);
|
|
49
|
+
// Cache by name — only for named storages.
|
|
50
|
+
if (instance.name) {
|
|
51
|
+
this.setInMap(this.byName, cls, instance.name, instance, clientCacheKey);
|
|
52
|
+
}
|
|
53
|
+
// Cache by alias — only for unnamed storages opened via alias.
|
|
54
|
+
if (alias !== undefined) {
|
|
55
|
+
this.setInMap(this.byAlias, cls, alias, instance, clientCacheKey);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
removeFromCache(instance) {
|
|
59
|
+
const storageType = instance.constructor;
|
|
60
|
+
for (const tier of [this.byId, this.byName, this.byAlias]) {
|
|
61
|
+
const classMap = tier.get(storageType);
|
|
62
|
+
if (!classMap)
|
|
63
|
+
continue;
|
|
64
|
+
for (const keyMap of classMap.values()) {
|
|
65
|
+
for (const [cacheKey, cached] of keyMap) {
|
|
66
|
+
if (cached === instance) {
|
|
67
|
+
keyMap.delete(cacheKey);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
/** Iterate all cached instances across all storage types. */
|
|
74
|
+
*allValues() {
|
|
75
|
+
const seen = new Set();
|
|
76
|
+
for (const classMap of this.byId.values()) {
|
|
77
|
+
for (const keyMap of classMap.values()) {
|
|
78
|
+
for (const instance of keyMap.values()) {
|
|
79
|
+
if (!seen.has(instance)) {
|
|
80
|
+
seen.add(instance);
|
|
81
|
+
yield instance;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
clear() {
|
|
88
|
+
this.byId.clear();
|
|
89
|
+
this.byName.clear();
|
|
90
|
+
this.byAlias.clear();
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Unified manager for opening and caching storage instances (Dataset, KeyValueStore, RequestQueue).
|
|
95
|
+
*
|
|
96
|
+
* A single instance manages all storage types. Instances are cached by
|
|
97
|
+
* `(storageClass, id/name/alias, clientCacheKey)` so the same storage is never opened twice.
|
|
98
|
+
*
|
|
99
|
+
* The manager itself does not resolve identifiers — callers pass explicit `id`, `name`, or `alias` (at most one),
|
|
100
|
+
* and a pre-bound `clientOpener` promise. When none of `id`, `name`, `alias` are provided, the manager automatically
|
|
101
|
+
* assigns a reserved default alias.
|
|
102
|
+
*
|
|
103
|
+
* @ignore
|
|
104
|
+
*/
|
|
105
|
+
export class StorageInstanceManager {
|
|
106
|
+
cache = new StorageCache();
|
|
107
|
+
openerLocks = new Map();
|
|
108
|
+
/**
|
|
109
|
+
* Open (or retrieve from cache) a storage instance.
|
|
110
|
+
*
|
|
111
|
+
* @param cls The storage class constructor (e.g. `Dataset`, `KeyValueStore`, `RequestQueue`).
|
|
112
|
+
* @param id Storage ID (mutually exclusive with `name` and `alias`).
|
|
113
|
+
* @param name Storage name (mutually exclusive with `id` and `alias`).
|
|
114
|
+
* @param alias Run-scoped alias (mutually exclusive with `id` and `name`).
|
|
115
|
+
* Automatically assigned when no identifier is provided.
|
|
116
|
+
* @param clientOpener A **lazy** factory that creates the sub-client.
|
|
117
|
+
* Only called on a cache miss.
|
|
118
|
+
* @param clientCacheKey Opaque key identifying the storage backend, so that the same logical
|
|
119
|
+
* storage opened through different clients is cached separately.
|
|
120
|
+
*/
|
|
121
|
+
async openStorage(cls, { id, name, alias, clientOpener, clientCacheKey, }) {
|
|
122
|
+
// Auto-set alias='__default__' when no parameters are specified (mirrors crawlee-python).
|
|
123
|
+
if (!id && !name && !alias) {
|
|
124
|
+
alias = DEFAULT_STORAGE_ALIAS;
|
|
125
|
+
}
|
|
126
|
+
// Fast-path cache check (no lock).
|
|
127
|
+
if (alias !== undefined) {
|
|
128
|
+
const cached = this.cache.get(cls, { alias, clientCacheKey });
|
|
129
|
+
if (cached)
|
|
130
|
+
return cached;
|
|
131
|
+
}
|
|
132
|
+
else if (id) {
|
|
133
|
+
const cached = this.cache.get(cls, { id, clientCacheKey });
|
|
134
|
+
if (cached)
|
|
135
|
+
return cached;
|
|
136
|
+
}
|
|
137
|
+
else if (name) {
|
|
138
|
+
const cached = this.cache.get(cls, { name, clientCacheKey });
|
|
139
|
+
if (cached)
|
|
140
|
+
return cached;
|
|
141
|
+
}
|
|
142
|
+
const identifierKey = id ?? name ?? alias ?? DEFAULT_STORAGE_ALIAS;
|
|
143
|
+
const lockKey = `${cls.name}:${identifierKey}:${clientCacheKey}`;
|
|
144
|
+
if (!this.openerLocks.has(lockKey)) {
|
|
145
|
+
this.openerLocks.set(lockKey, new AsyncQueue());
|
|
146
|
+
}
|
|
147
|
+
const queue = this.openerLocks.get(lockKey);
|
|
148
|
+
await queue.wait();
|
|
149
|
+
try {
|
|
150
|
+
// Double-check cache under lock (another caller may have filled it while we waited).
|
|
151
|
+
if (alias !== undefined) {
|
|
152
|
+
const cached = this.cache.get(cls, { alias, clientCacheKey });
|
|
153
|
+
if (cached)
|
|
154
|
+
return cached;
|
|
155
|
+
}
|
|
156
|
+
else if (id) {
|
|
157
|
+
const cached = this.cache.get(cls, { id, clientCacheKey });
|
|
158
|
+
if (cached)
|
|
159
|
+
return cached;
|
|
160
|
+
}
|
|
161
|
+
else if (name) {
|
|
162
|
+
const cached = this.cache.get(cls, { name, clientCacheKey });
|
|
163
|
+
if (cached)
|
|
164
|
+
return cached;
|
|
165
|
+
}
|
|
166
|
+
// Cache miss — create the sub-client and storage instance.
|
|
167
|
+
const subClient = await clientOpener();
|
|
168
|
+
const storageInfo = await subClient.getMetadata();
|
|
169
|
+
const instance = new cls({
|
|
170
|
+
id: storageInfo.id,
|
|
171
|
+
name: storageInfo.name,
|
|
172
|
+
client: subClient,
|
|
173
|
+
});
|
|
174
|
+
// Atomic cache writes (no awaits between these).
|
|
175
|
+
this.cache.set(cls, instance, clientCacheKey, alias);
|
|
176
|
+
return instance;
|
|
177
|
+
}
|
|
178
|
+
finally {
|
|
179
|
+
queue.shift();
|
|
180
|
+
// Clean up idle locks so the map doesn't grow unboundedly
|
|
181
|
+
// (mirrors crawlee-python's WeakValueDictionary behaviour).
|
|
182
|
+
if (queue.remaining === 0) {
|
|
183
|
+
this.openerLocks.delete(lockKey);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Remove a storage instance from the cache (called from `storage.drop()`).
|
|
189
|
+
*/
|
|
190
|
+
removeFromCache(instance) {
|
|
191
|
+
this.cache.removeFromCache(instance);
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Clear the entire cache. Also calls `clearCache()` on any cached KeyValueStore
|
|
195
|
+
* instances (duck-typed to avoid importing KeyValueStore and circular dependencies).
|
|
196
|
+
* Called during service locator reset.
|
|
197
|
+
*/
|
|
198
|
+
clearCache() {
|
|
199
|
+
for (const instance of this.cache.allValues()) {
|
|
200
|
+
if ('clearCache' in instance && typeof instance.clearCache === 'function') {
|
|
201
|
+
instance.clearCache();
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
this.cache.clear();
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Decompose a user-provided `identifier` (the `Dataset.open()` / `KeyValueStore.open()` /
|
|
209
|
+
* `RequestQueue.open()` argument) into separate `id`, `name`, and `alias` fields that
|
|
210
|
+
* the `StorageInstanceManager` and `StorageClient.create*Client` expect.
|
|
211
|
+
*
|
|
212
|
+
* - `null` / `undefined` / `{}` → default storage alias
|
|
213
|
+
* - `string` → resolved via `storageExists` (ID-first, then name)
|
|
214
|
+
* - `{ id }` → `{ id }`
|
|
215
|
+
* - `{ name }` → `{ name }`
|
|
216
|
+
*/
|
|
217
|
+
export async function resolveStorageIdentifier(identifier, client, storageType) {
|
|
218
|
+
if (identifier === null || identifier === undefined) {
|
|
219
|
+
return { alias: DEFAULT_STORAGE_ALIAS };
|
|
220
|
+
}
|
|
221
|
+
if (typeof identifier === 'string') {
|
|
222
|
+
if (client.storageExists && (await client.storageExists(identifier, storageType))) {
|
|
223
|
+
return { id: identifier };
|
|
224
|
+
}
|
|
225
|
+
return { name: identifier };
|
|
226
|
+
}
|
|
227
|
+
if (identifier.id) {
|
|
228
|
+
return { id: identifier.id };
|
|
229
|
+
}
|
|
230
|
+
if (identifier.name) {
|
|
231
|
+
return { name: identifier.name };
|
|
232
|
+
}
|
|
233
|
+
// Empty object — treated as default storage.
|
|
234
|
+
return { alias: DEFAULT_STORAGE_ALIAS };
|
|
235
|
+
}
|
|
236
|
+
//# sourceMappingURL=storage_instance_manager.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"storage_instance_manager.js","sourceRoot":"","sources":["../../src/storages/storage_instance_manager.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AAgBnD,wDAAwD;AACxD,MAAM,qBAAqB,GAAG,aAAa,CAAC;AAI5C;;;;;;;GAOG;AACH,MAAM,YAAY;IACL,IAAI,GAAc,IAAI,GAAG,EAAE,CAAC;IAC5B,MAAM,GAAc,IAAI,GAAG,EAAE,CAAC;IAC9B,OAAO,GAAc,IAAI,GAAG,EAAE,CAAC;IAExC,GAAG,CACC,GAAmB,EACnB,EACI,EAAE,EACF,IAAI,EACJ,KAAK,EACL,cAAc,GAKc;QAEhC,KAAK,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI;YACtB,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;YACf,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC;YACnB,CAAC,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC;SACa,EAAE,CAAC;YACrC,IAAI,GAAG,KAAK,SAAS;gBAAE,SAAS;YAChC,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,cAAc,CAAC,CAAC;YAC5D,IAAI,MAAM,EAAE,CAAC;gBACT,IAAI,MAAM,YAAa,GAAuD,EAAE,CAAC;oBAC7E,OAAO,MAAW,CAAC;gBACvB,CAAC;gBACD,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;YAC9D,CAAC;QACL,CAAC;QAED,OAAO,SAAS,CAAC;IACrB,CAAC;IAED,8CAA8C;IACtC,QAAQ,CACZ,IAAe,EACf,GAAmB,EACnB,GAAW,EACX,QAAW,EACX,cAAwB;QAExB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC;QAC7C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC;QAC9B,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC;QACjD,MAAM,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,GAAG,CAAC,cAAc,EAAE,QAAQ,CAAC,CAAC;IACnD,CAAC;IAED;;OAEG;IACH,GAAG,CAAqB,GAAmB,EAAE,QAAW,EAAE,cAAwB,EAAE,KAAc;QAC9F,sBAAsB;QACtB,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,GAAG,EAAE,QAAQ,CAAC,EAAE,EAAE,QAAQ,EAAE,cAAc,CAAC,CAAC;QAErE,2CAA2C;QAC3C,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC;YAChB,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,EAAE,QAAQ,CAAC,IAAI,EAAE,QAAQ,EAAE,cAAc,CAAC,CAAC;QAC7E,CAAC;QAED,+DAA+D;QAC/D,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACtB,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,cAAc,CAAC,CAAC;QACtE,CAAC;IACL,CAAC;IAED,eAAe,CAAC,QAAkB;QAC9B,MAAM,WAAW,GAAG,QAAQ,CAAC,WAAoC,CAAC;QAElE,KAAK,MAAM,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YACxD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;YACvC,IAAI,CAAC,QAAQ;gBAAE,SAAS;YAExB,KAAK,MAAM,MAAM,IAAI,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC;gBACrC,KAAK,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;oBACtC,IAAI,MAAM,KAAK,QAAQ,EAAE,CAAC;wBACtB,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;oBAC5B,CAAC;gBACL,CAAC;YACL,CAAC;QACL,CAAC;IACL,CAAC;IAED,6DAA6D;IAC7D,CAAC,SAAS;QACN,MAAM,IAAI,GAAG,IAAI,GAAG,EAAY,CAAC;QACjC,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;YACxC,KAAK,MAAM,MAAM,IAAI,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC;gBACrC,KAAK,MAAM,QAAQ,IAAI,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;oBACrC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;wBACtB,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;wBACnB,MAAM,QAAQ,CAAC;oBACnB,CAAC;gBACL,CAAC;YACL,CAAC;QACL,CAAC;IACL,CAAC;IAED,KAAK;QACD,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;QAClB,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;QACpB,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;IACzB,CAAC;CACJ;AAED;;;;;;;;;;;GAWG;AACH,MAAM,OAAO,sBAAsB;IACd,KAAK,GAAG,IAAI,YAAY,EAAE,CAAC;IAC3B,WAAW,GAAG,IAAI,GAAG,EAAsB,CAAC;IAE7D;;;;;;;;;;;;OAYG;IACH,KAAK,CAAC,WAAW,CACb,GAA0B,EAC1B,EACI,EAAE,EACF,IAAI,EACJ,KAAK,EACL,YAAY,EACZ,cAAc,GAIjB;QAED,0FAA0F;QAC1F,IAAI,CAAC,EAAE,IAAI,CAAC,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACzB,KAAK,GAAG,qBAAqB,CAAC;QAClC,CAAC;QAED,mCAAmC;QACnC,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACtB,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,cAAc,EAAE,CAAC,CAAC;YAC9D,IAAI,MAAM;gBAAE,OAAO,MAAM,CAAC;QAC9B,CAAC;aAAM,IAAI,EAAE,EAAE,CAAC;YACZ,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,EAAE,EAAE,cAAc,EAAE,CAAC,CAAC;YAC3D,IAAI,MAAM;gBAAE,OAAO,MAAM,CAAC;QAC9B,CAAC;aAAM,IAAI,IAAI,EAAE,CAAC;YACd,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE,cAAc,EAAE,CAAC,CAAC;YAC7D,IAAI,MAAM;gBAAE,OAAO,MAAM,CAAC;QAC9B,CAAC;QAED,MAAM,aAAa,GAAG,EAAE,IAAI,IAAI,IAAI,KAAK,IAAI,qBAAqB,CAAC;QACnE,MAAM,OAAO,GAAG,GAAG,GAAG,CAAC,IAAI,IAAI,aAAa,IAAI,cAAc,EAAE,CAAC;QAEjE,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YACjC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,UAAU,EAAE,CAAC,CAAC;QACpD,CAAC;QACD,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,OAAO,CAAE,CAAC;QAE7C,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;QACnB,IAAI,CAAC;YACD,qFAAqF;YACrF,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;gBACtB,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,cAAc,EAAE,CAAC,CAAC;gBAC9D,IAAI,MAAM;oBAAE,OAAO,MAAM,CAAC;YAC9B,CAAC;iBAAM,IAAI,EAAE,EAAE,CAAC;gBACZ,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,EAAE,EAAE,cAAc,EAAE,CAAC,CAAC;gBAC3D,IAAI,MAAM;oBAAE,OAAO,MAAM,CAAC;YAC9B,CAAC;iBAAM,IAAI,IAAI,EAAE,CAAC;gBACd,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE,cAAc,EAAE,CAAC,CAAC;gBAC7D,IAAI,MAAM;oBAAE,OAAO,MAAM,CAAC;YAC9B,CAAC;YAED,2DAA2D;YAC3D,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;YACvC,MAAM,WAAW,GAAG,MAChB,SACH,CAAC,WAAW,EAAE,CAAC;YAEhB,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC;gBACrB,EAAE,EAAE,WAAW,CAAC,EAAE;gBAClB,IAAI,EAAE,WAAW,CAAC,IAAI;gBACtB,MAAM,EAAE,SAAS;aACpB,CAAa,CAAC;YAEf,iDAAiD;YACjD,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,QAAQ,EAAE,cAAc,EAAE,KAAK,CAAC,CAAC;YAErD,OAAO,QAAQ,CAAC;QACpB,CAAC;gBAAS,CAAC;YACP,KAAK,CAAC,KAAK,EAAE,CAAC;YAEd,0DAA0D;YAC1D,4DAA4D;YAC5D,IAAI,KAAK,CAAC,SAAS,KAAK,CAAC,EAAE,CAAC;gBACxB,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YACrC,CAAC;QACL,CAAC;IACL,CAAC;IAED;;OAEG;IACH,eAAe,CAAC,QAAkB;QAC9B,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC;IACzC,CAAC;IAED;;;;OAIG;IACH,UAAU;QACN,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,EAAE,CAAC;YAC5C,IAAI,YAAY,IAAI,QAAQ,IAAI,OAAQ,QAAgB,CAAC,UAAU,KAAK,UAAU,EAAE,CAAC;gBAChF,QAAgB,CAAC,UAAU,EAAE,CAAC;YACnC,CAAC;QACL,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;IACvB,CAAC;CACJ;AAoBD;;;;;;;;;GASG;AACH,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAC1C,UAAyD,EACzD,MAAqB,EACrB,WAAyD;IAEzD,IAAI,UAAU,KAAK,IAAI,IAAI,UAAU,KAAK,SAAS,EAAE,CAAC;QAClD,OAAO,EAAE,KAAK,EAAE,qBAAqB,EAAE,CAAC;IAC5C,CAAC;IAED,IAAI,OAAO,UAAU,KAAK,QAAQ,EAAE,CAAC;QACjC,IAAI,MAAM,CAAC,aAAa,IAAI,CAAC,MAAM,MAAM,CAAC,aAAa,CAAC,UAAU,EAAE,WAAW,CAAC,CAAC,EAAE,CAAC;YAChF,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC;QAC9B,CAAC;QACD,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC;IAChC,CAAC;IAED,IAAI,UAAU,CAAC,EAAE,EAAE,CAAC;QAChB,OAAO,EAAE,EAAE,EAAE,UAAU,CAAC,EAAE,EAAE,CAAC;IACjC,CAAC;IAED,IAAI,UAAU,CAAC,IAAI,EAAE,CAAC;QAClB,OAAO,EAAE,IAAI,EAAE,UAAU,CAAC,IAAI,EAAE,CAAC;IACrC,CAAC;IAED,6CAA6C;IAC7C,OAAO,EAAE,KAAK,EAAE,qBAAqB,EAAE,CAAC;AAC5C,CAAC"}
|
package/storages/utils.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import type { Dictionary, StorageClient } from '@crawlee/types';
|
|
1
|
+
import type { BaseHttpClient, Dictionary, StorageClient } from '@crawlee/types';
|
|
2
2
|
import { Configuration } from '../configuration.js';
|
|
3
|
+
import type { ProxyConfiguration } from '../proxy_configuration.js';
|
|
3
4
|
/**
|
|
4
5
|
* Options for purging default storage.
|
|
5
6
|
*/
|
|
@@ -89,5 +90,50 @@ export declare const API_PROCESSED_REQUESTS_DELAY_MILLIS = 10000;
|
|
|
89
90
|
* @internal
|
|
90
91
|
*/
|
|
91
92
|
export declare const MAX_QUERIES_FOR_CONSISTENCY = 6;
|
|
93
|
+
/** @internal */
|
|
94
|
+
export interface DualIterableOptions<TItem, TRawPage> {
|
|
95
|
+
/** Factory that returns an async generator yielding pages. */
|
|
96
|
+
createPages: () => AsyncGenerator<TRawPage>;
|
|
97
|
+
/** Extracts individual items from a page (for iteration). */
|
|
98
|
+
extractItems: (page: TRawPage) => TItem[];
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Creates an object that is both an `AsyncIterable<TItem>` (for `for await...of`)
|
|
102
|
+
* and a `Promise<TItem[]>` (for `await`) from a single async page generator.
|
|
103
|
+
*
|
|
104
|
+
* - `await result` drains all pages from a fresh generator and returns every
|
|
105
|
+
* item as a flat array.
|
|
106
|
+
* - `for await (const item of result)` streams all items across all pages,
|
|
107
|
+
* yielding them one by one without buffering everything in memory.
|
|
108
|
+
*
|
|
109
|
+
* Each usage path creates its own generator instance, so `await` and
|
|
110
|
+
* `for await...of` never interfere with each other.
|
|
111
|
+
*
|
|
112
|
+
* @internal
|
|
113
|
+
*/
|
|
114
|
+
export declare function createDualIterable<TItem, TRawPage>(options: DualIterableOptions<TItem, TRawPage>): AsyncIterable<TItem> & Promise<TItem[]>;
|
|
115
|
+
/**
|
|
116
|
+
* Options for the static `open()` method on storage classes ({@link Dataset}, {@link KeyValueStore}, {@link RequestQueue}).
|
|
117
|
+
*/
|
|
118
|
+
export interface StorageOpenOptions {
|
|
119
|
+
/**
|
|
120
|
+
* SDK configuration instance, defaults to the static register.
|
|
121
|
+
*/
|
|
122
|
+
config?: Configuration;
|
|
123
|
+
/**
|
|
124
|
+
* Optional storage client that should be used to open storages.
|
|
125
|
+
*/
|
|
126
|
+
storageClient?: StorageClient;
|
|
127
|
+
/**
|
|
128
|
+
* Used to pass the proxy configuration for the `requestsFromUrl` objects.
|
|
129
|
+
* Takes advantage of the internal address rotation and authentication process.
|
|
130
|
+
* If undefined, the `requestsFromUrl` requests will be made without proxy.
|
|
131
|
+
*/
|
|
132
|
+
proxyConfiguration?: ProxyConfiguration;
|
|
133
|
+
/**
|
|
134
|
+
* HTTP client to be used to download the list of URLs in `RequestQueue`.
|
|
135
|
+
*/
|
|
136
|
+
httpClient?: BaseHttpClient;
|
|
137
|
+
}
|
|
92
138
|
export {};
|
|
93
139
|
//# sourceMappingURL=utils.d.ts.map
|
package/storages/utils.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/storages/utils.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/storages/utils.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,cAAc,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAEhF,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAIpE;;GAEG;AACH,UAAU,0BAA0B;IAChC;;OAEG;IACH,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,MAAM,CAAC,EAAE,aAAa,CAAC;IACvB,MAAM,CAAC,EAAE,aAAa,CAAC;CAC1B;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,oBAAoB,CAAC,OAAO,CAAC,EAAE,0BAA0B,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;AAChG;;;;;;;;;;GAUG;AACH,wBAAsB,oBAAoB,CAAC,MAAM,CAAC,EAAE,aAAa,EAAE,MAAM,CAAC,EAAE,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;AAwB1G,MAAM,WAAW,eAAe;IAC5B,MAAM,CAAC,EAAE,aAAa,CAAC;IACvB;;;OAGG;IACH,iBAAiB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACrC;AAED;;;;;;;;GAQG;AACH,wBAAsB,QAAQ,CAAC,KAAK,SAAS,UAAU,GAAG,UAAU,EAChE,IAAI,CAAC,EAAE,MAAM,EACb,YAAY,GAAS,KAAK,EAC1B,OAAO,CAAC,EAAE,eAAe,kBAM5B;AAED;;;;;;;;GAQG;AACH,wBAAgB,YAAY,CAAC,SAAS,EAAE,MAAM,UAI7C;AAED;;;GAGG;AACH,eAAO,MAAM,qBAAqB,MAAM,CAAC;AAEzC;;;;GAIG;AACH,eAAO,MAAM,gCAAgC,OAAO,CAAC;AAErD,gBAAgB;AAChB,eAAO,MAAM,iBAAiB,IAAI,CAAC;AAEnC;;;;GAIG;AACH,eAAO,MAAM,mCAAmC,QAAS,CAAC;AAE1D;;;GAGG;AACH,eAAO,MAAM,2BAA2B,IAAI,CAAC;AAE7C,gBAAgB;AAChB,MAAM,WAAW,mBAAmB,CAAC,KAAK,EAAE,QAAQ;IAChD,8DAA8D;IAC9D,WAAW,EAAE,MAAM,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC5C,6DAA6D;IAC7D,YAAY,EAAE,CAAC,IAAI,EAAE,QAAQ,KAAK,KAAK,EAAE,CAAC;CAC7C;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAC9C,OAAO,EAAE,mBAAmB,CAAC,KAAK,EAAE,QAAQ,CAAC,GAC9C,aAAa,CAAC,KAAK,CAAC,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC,CA6CzC;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAC/B;;OAEG;IACH,MAAM,CAAC,EAAE,aAAa,CAAC;IAEvB;;OAEG;IACH,aAAa,CAAC,EAAE,aAAa,CAAC;IAE9B;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAExC;;OAEG;IACH,UAAU,CAAC,EAAE,cAAc,CAAC;CAC/B"}
|
package/storages/utils.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import crypto from 'node:crypto';
|
|
2
2
|
import { Configuration } from '../configuration.js';
|
|
3
|
+
import { serviceLocator } from '../service_locator.js';
|
|
3
4
|
import { KeyValueStore } from './key_value_store.js';
|
|
4
5
|
export async function purgeDefaultStorages(configOrOptions, client) {
|
|
5
6
|
const options = configOrOptions instanceof Configuration
|
|
@@ -8,11 +9,11 @@ export async function purgeDefaultStorages(configOrOptions, client) {
|
|
|
8
9
|
config: configOrOptions,
|
|
9
10
|
}
|
|
10
11
|
: (configOrOptions ?? {});
|
|
11
|
-
const { config =
|
|
12
|
-
({ client =
|
|
12
|
+
const { config = serviceLocator.getConfiguration(), onlyPurgeOnce = false } = options;
|
|
13
|
+
({ client = serviceLocator.getStorageClient() } = options);
|
|
13
14
|
const casted = client;
|
|
14
15
|
// if `onlyPurgeOnce` is true, will purge anytime this function is called, otherwise - only on start
|
|
15
|
-
if (!onlyPurgeOnce || (config.
|
|
16
|
+
if (!onlyPurgeOnce || (config.purgeOnStart && !casted.__purged)) {
|
|
16
17
|
casted.__purged = true;
|
|
17
18
|
await casted.purge?.();
|
|
18
19
|
}
|
|
@@ -27,8 +28,8 @@ export async function purgeDefaultStorages(configOrOptions, client) {
|
|
|
27
28
|
* @param options An optional object parameter where a custom `keyValueStoreName` and `config` can be passed in.
|
|
28
29
|
*/
|
|
29
30
|
export async function useState(name, defaultValue = {}, options) {
|
|
30
|
-
const kvStore = await KeyValueStore.open(options?.keyValueStoreName, {
|
|
31
|
-
config: options?.config ||
|
|
31
|
+
const kvStore = await KeyValueStore.open(options?.keyValueStoreName ? { name: options.keyValueStoreName } : null, {
|
|
32
|
+
config: options?.config || serviceLocator.getConfiguration(),
|
|
32
33
|
});
|
|
33
34
|
return kvStore.getAutoSavedValue(name || 'CRAWLEE_GLOBAL_STATE', defaultValue);
|
|
34
35
|
}
|
|
@@ -69,4 +70,55 @@ export const API_PROCESSED_REQUESTS_DELAY_MILLIS = 10_000;
|
|
|
69
70
|
* @internal
|
|
70
71
|
*/
|
|
71
72
|
export const MAX_QUERIES_FOR_CONSISTENCY = 6;
|
|
73
|
+
/**
|
|
74
|
+
* Creates an object that is both an `AsyncIterable<TItem>` (for `for await...of`)
|
|
75
|
+
* and a `Promise<TItem[]>` (for `await`) from a single async page generator.
|
|
76
|
+
*
|
|
77
|
+
* - `await result` drains all pages from a fresh generator and returns every
|
|
78
|
+
* item as a flat array.
|
|
79
|
+
* - `for await (const item of result)` streams all items across all pages,
|
|
80
|
+
* yielding them one by one without buffering everything in memory.
|
|
81
|
+
*
|
|
82
|
+
* Each usage path creates its own generator instance, so `await` and
|
|
83
|
+
* `for await...of` never interfere with each other.
|
|
84
|
+
*
|
|
85
|
+
* @internal
|
|
86
|
+
*/
|
|
87
|
+
export function createDualIterable(options) {
|
|
88
|
+
const { createPages, extractItems } = options;
|
|
89
|
+
let cached = null;
|
|
90
|
+
function getOrCreate() {
|
|
91
|
+
if (!cached) {
|
|
92
|
+
cached = (async () => {
|
|
93
|
+
const items = [];
|
|
94
|
+
for await (const page of createPages()) {
|
|
95
|
+
items.push(...extractItems(page));
|
|
96
|
+
}
|
|
97
|
+
return items;
|
|
98
|
+
})();
|
|
99
|
+
}
|
|
100
|
+
return cached;
|
|
101
|
+
}
|
|
102
|
+
async function* iterateAll() {
|
|
103
|
+
for await (const page of createPages()) {
|
|
104
|
+
yield* extractItems(page);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
const result = {
|
|
108
|
+
[Symbol.asyncIterator]() {
|
|
109
|
+
return iterateAll();
|
|
110
|
+
},
|
|
111
|
+
then(onfulfilled, onrejected) {
|
|
112
|
+
return getOrCreate().then(onfulfilled, onrejected);
|
|
113
|
+
},
|
|
114
|
+
catch(onrejected) {
|
|
115
|
+
return getOrCreate().catch(onrejected);
|
|
116
|
+
},
|
|
117
|
+
finally(onfinally) {
|
|
118
|
+
return getOrCreate().finally(onfinally);
|
|
119
|
+
},
|
|
120
|
+
[Symbol.toStringTag]: 'DualIterable',
|
|
121
|
+
};
|
|
122
|
+
return result;
|
|
123
|
+
}
|
|
72
124
|
//# sourceMappingURL=utils.js.map
|
package/storages/utils.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.js","sourceRoot":"","sources":["../../src/storages/utils.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,aAAa,CAAC;AAIjC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"utils.js","sourceRoot":"","sources":["../../src/storages/utils.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,aAAa,CAAC;AAIjC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEpD,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAwCrD,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACtC,eAA4D,EAC5D,MAAsB;IAEtB,MAAM,OAAO,GACT,eAAe,YAAY,aAAa;QACpC,CAAC,CAAC;YACI,MAAM;YACN,MAAM,EAAE,eAAe;SAC1B;QACH,CAAC,CAAC,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC;IAClC,MAAM,EAAE,MAAM,GAAG,cAAc,CAAC,gBAAgB,EAAE,EAAE,aAAa,GAAG,KAAK,EAAE,GAAG,OAAO,CAAC;IACtF,CAAC,EAAE,MAAM,GAAG,cAAc,CAAC,gBAAgB,EAAE,EAAE,GAAG,OAAO,CAAC,CAAC;IAE3D,MAAM,MAAM,GAAG,MAAgD,CAAC;IAEhE,oGAAoG;IACpG,IAAI,CAAC,aAAa,IAAI,CAAC,MAAM,CAAC,YAAY,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC9D,MAAM,CAAC,QAAQ,GAAG,IAAI,CAAC;QACvB,MAAM,MAAM,CAAC,KAAK,EAAE,EAAE,CAAC;IAC3B,CAAC;AACL,CAAC;AAWD;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC1B,IAAa,EACb,eAAe,EAAW,EAC1B,OAAyB;IAEzB,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,IAAI,CAAC,OAAO,EAAE,iBAAiB,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,iBAAiB,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE;QAC9G,MAAM,EAAE,OAAO,EAAE,MAAM,IAAI,cAAc,CAAC,gBAAgB,EAAE;KAC/D,CAAC,CAAC;IACH,OAAO,OAAO,CAAC,iBAAiB,CAAQ,IAAI,IAAI,sBAAsB,EAAE,YAAY,CAAC,CAAC;AAC1F,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,YAAY,CAAC,SAAiB;IAC1C,MAAM,GAAG,GAAG,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IAEjG,OAAO,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AAC5B,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAEzC;;;;GAIG;AACH,MAAM,CAAC,MAAM,gCAAgC,GAAG,IAAI,CAAC;AAErD,gBAAgB;AAChB,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAAC,CAAC;AAEnC;;;;GAIG;AACH,MAAM,CAAC,MAAM,mCAAmC,GAAG,MAAM,CAAC;AAE1D;;;GAGG;AACH,MAAM,CAAC,MAAM,2BAA2B,GAAG,CAAC,CAAC;AAU7C;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,kBAAkB,CAC9B,OAA6C;IAE7C,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC;IAC9C,IAAI,MAAM,GAA4B,IAAI,CAAC;IAE3C,SAAS,WAAW;QAChB,IAAI,CAAC,MAAM,EAAE,CAAC;YACV,MAAM,GAAG,CAAC,KAAK,IAAI,EAAE;gBACjB,MAAM,KAAK,GAAY,EAAE,CAAC;gBAC1B,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,WAAW,EAAE,EAAE,CAAC;oBACrC,KAAK,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC;gBACtC,CAAC;gBACD,OAAO,KAAK,CAAC;YACjB,CAAC,CAAC,EAAE,CAAC;QACT,CAAC;QACD,OAAO,MAAM,CAAC;IAClB,CAAC;IAED,KAAK,SAAS,CAAC,CAAC,UAAU;QACtB,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,WAAW,EAAE,EAAE,CAAC;YACrC,KAAK,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC;IACL,CAAC;IAED,MAAM,MAAM,GAAG;QACX,CAAC,MAAM,CAAC,aAAa,CAAC;YAClB,OAAO,UAAU,EAAE,CAAC;QACxB,CAAC;QACD,IAAI,CACA,WAA2E,EAC3E,UAAuE;YAEvE,OAAO,WAAW,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,UAAU,CAAC,CAAC;QACvD,CAAC;QACD,KAAK,CACD,UAAqE;YAErE,OAAO,WAAW,EAAE,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;QAC3C,CAAC;QACD,OAAO,CAAC,SAA+B;YACnC,OAAO,WAAW,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QAC5C,CAAC;QACD,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE,cAAc;KACI,CAAC;IAE7C,OAAO,MAAM,CAAC;AAClB,CAAC"}
|
package/typedefs.d.ts
CHANGED
|
@@ -6,5 +6,5 @@ export type Awaitable<T> = T | PromiseLike<T>;
|
|
|
6
6
|
export declare function entries<T extends {}>(obj: T): [keyof T, T[keyof T]][];
|
|
7
7
|
/** @ignore */
|
|
8
8
|
export declare function keys<T extends {}>(obj: T): (keyof T)[];
|
|
9
|
-
export
|
|
9
|
+
export type { AllowedHttpMethods } from '@crawlee/types';
|
|
10
10
|
//# sourceMappingURL=typedefs.d.ts.map
|
package/typedefs.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"typedefs.d.ts","sourceRoot":"","sources":["../src/typedefs.ts"],"names":[],"mappings":"AAAA,cAAc;AACd,MAAM,MAAM,WAAW,CAAC,CAAC,GAAG,OAAO,IAAI,KAAK,GAAG,IAAI,EAAE,GAAG,EAAE,KAAK,CAAC,CAAC;AAEjE,cAAc;AACd,MAAM,MAAM,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;AAE9C,cAAc;AACd,wBAAgB,OAAO,CAAC,CAAC,SAAS,EAAE,EAAE,GAAG,EAAE,CAAC,GACV,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CACxD;AAED,cAAc;AACd,wBAAgB,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,GAAG,EAAE,CAAC,GACV,CAAC,MAAM,CAAC,CAAC,EAAE,CACzC;AAED,
|
|
1
|
+
{"version":3,"file":"typedefs.d.ts","sourceRoot":"","sources":["../src/typedefs.ts"],"names":[],"mappings":"AAAA,cAAc;AACd,MAAM,MAAM,WAAW,CAAC,CAAC,GAAG,OAAO,IAAI,KAAK,GAAG,IAAI,EAAE,GAAG,EAAE,KAAK,CAAC,CAAC;AAEjE,cAAc;AACd,MAAM,MAAM,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;AAE9C,cAAc;AACd,wBAAgB,OAAO,CAAC,CAAC,SAAS,EAAE,EAAE,GAAG,EAAE,CAAC,GACV,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CACxD;AAED,cAAc;AACd,wBAAgB,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,GAAG,EAAE,CAAC,GACV,CAAC,MAAM,CAAC,CAAC,EAAE,CACzC;AAED,YAAY,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC"}
|
package/validators.d.ts
CHANGED
|
@@ -17,5 +17,9 @@ export declare const validators: {
|
|
|
17
17
|
validator: boolean;
|
|
18
18
|
message: (label: string) => string;
|
|
19
19
|
};
|
|
20
|
+
sessionPool: (value: Dictionary) => {
|
|
21
|
+
validator: boolean;
|
|
22
|
+
message: (label: string) => string;
|
|
23
|
+
};
|
|
20
24
|
};
|
|
21
25
|
//# sourceMappingURL=validators.d.ts.map
|
package/validators.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"validators.d.ts","sourceRoot":"","sources":["../src/validators.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGjD,gBAAgB;AAChB,eAAO,MAAM,UAAU;yBAEE,UAAU;;yBAEV,MAAM;;gCAEC,UAAU;;yBAEjB,MAAM;;yBAEN,UAAU;;yBAEV,MAAM;;0BAEL,UAAU;;yBAEX,MAAM;;
|
|
1
|
+
{"version":3,"file":"validators.d.ts","sourceRoot":"","sources":["../src/validators.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGjD,gBAAgB;AAChB,eAAO,MAAM,UAAU;yBAEE,UAAU;;yBAEV,MAAM;;gCAEC,UAAU;;yBAEjB,MAAM;;yBAEN,UAAU;;yBAEV,MAAM;;0BAEL,UAAU;;yBAEX,MAAM;;yBAEN,UAAU;;yBAEV,MAAM;;CAG9B,CAAC"}
|
package/validators.js
CHANGED
|
@@ -18,5 +18,9 @@ export const validators = {
|
|
|
18
18
|
validator: ow.isValid(value, ow.object.hasKeys('fetchNextRequest', 'addRequest')),
|
|
19
19
|
message: (label) => `Expected argument '${label}' to be a RequestQueue, got something else.`,
|
|
20
20
|
}),
|
|
21
|
+
sessionPool: (value) => ({
|
|
22
|
+
validator: ow.isValid(value, ow.object.hasKeys('getSession')),
|
|
23
|
+
message: (label) => `Expected argument '${label}' to implement the ISessionPool interface (missing 'getSession'), got something else.`,
|
|
24
|
+
}),
|
|
21
25
|
};
|
|
22
26
|
//# sourceMappingURL=validators.js.map
|
package/validators.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"validators.js","sourceRoot":"","sources":["../src/validators.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,gBAAgB;AAChB,MAAM,CAAC,MAAM,UAAU,GAAG;IACtB,6DAA6D;IAC7D,WAAW,EAAE,CAAC,KAAiB,EAAE,EAAE,CAAC,CAAC;QACjC,SAAS,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,UAAU,EAAE,GAAG,EAAE,IAAI,CAAC,CAAC;QAC9E,OAAO,EAAE,CAAC,KAAa,EAAE,EAAE,CAAC,sBAAsB,KAAK,+CAA+C;KACzG,CAAC;IACF,kBAAkB,EAAE,CAAC,KAAiB,EAAE,EAAE,CAAC,CAAC;QACxC,SAAS,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,EAAE,cAAc,CAAC,CAAC;QACzE,OAAO,EAAE,CAAC,KAAa,EAAE,EAAE,CAAC,sBAAsB,KAAK,mDAAmD;KAC7G,CAAC;IACF,WAAW,EAAE,CAAC,KAAiB,EAAE,EAAE,CAAC,CAAC;QACjC,SAAS,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,kBAAkB,EAAE,cAAc,CAAC,CAAC;QACnF,OAAO,EAAE,CAAC,KAAa,EAAE,EAAE,CAAC,sBAAsB,KAAK,4CAA4C;KACtG,CAAC;IACF,YAAY,EAAE,CAAC,KAAiB,EAAE,EAAE,CAAC,CAAC;QAClC,SAAS,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,kBAAkB,EAAE,YAAY,CAAC,CAAC;QACjF,OAAO,EAAE,CAAC,KAAa,EAAE,EAAE,CAAC,sBAAsB,KAAK,6CAA6C;KACvG,CAAC;CACL,CAAC"}
|
|
1
|
+
{"version":3,"file":"validators.js","sourceRoot":"","sources":["../src/validators.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,gBAAgB;AAChB,MAAM,CAAC,MAAM,UAAU,GAAG;IACtB,6DAA6D;IAC7D,WAAW,EAAE,CAAC,KAAiB,EAAE,EAAE,CAAC,CAAC;QACjC,SAAS,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,UAAU,EAAE,GAAG,EAAE,IAAI,CAAC,CAAC;QAC9E,OAAO,EAAE,CAAC,KAAa,EAAE,EAAE,CAAC,sBAAsB,KAAK,+CAA+C;KACzG,CAAC;IACF,kBAAkB,EAAE,CAAC,KAAiB,EAAE,EAAE,CAAC,CAAC;QACxC,SAAS,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,EAAE,cAAc,CAAC,CAAC;QACzE,OAAO,EAAE,CAAC,KAAa,EAAE,EAAE,CAAC,sBAAsB,KAAK,mDAAmD;KAC7G,CAAC;IACF,WAAW,EAAE,CAAC,KAAiB,EAAE,EAAE,CAAC,CAAC;QACjC,SAAS,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,kBAAkB,EAAE,cAAc,CAAC,CAAC;QACnF,OAAO,EAAE,CAAC,KAAa,EAAE,EAAE,CAAC,sBAAsB,KAAK,4CAA4C;KACtG,CAAC;IACF,YAAY,EAAE,CAAC,KAAiB,EAAE,EAAE,CAAC,CAAC;QAClC,SAAS,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,kBAAkB,EAAE,YAAY,CAAC,CAAC;QACjF,OAAO,EAAE,CAAC,KAAa,EAAE,EAAE,CAAC,sBAAsB,KAAK,6CAA6C;KACvG,CAAC;IACF,WAAW,EAAE,CAAC,KAAiB,EAAE,EAAE,CAAC,CAAC;QACjC,SAAS,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;QAC7D,OAAO,EAAE,CAAC,KAAa,EAAE,EAAE,CACvB,sBAAsB,KAAK,uFAAuF;KACzH,CAAC;CACL,CAAC"}
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
import { type Log } from '../log.js';
|
|
2
|
-
/**
|
|
3
|
-
* Abstract class with pre-defined method to connect to the Crawlers class by the "use" crawler method.
|
|
4
|
-
* @category Crawlers
|
|
5
|
-
* @ignore
|
|
6
|
-
*/
|
|
7
|
-
export declare abstract class CrawlerExtension {
|
|
8
|
-
name: string;
|
|
9
|
-
log: Log;
|
|
10
|
-
getCrawlerOptions(): Record<string, unknown>;
|
|
11
|
-
}
|
|
12
|
-
//# sourceMappingURL=crawler_extension.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"crawler_extension.d.ts","sourceRoot":"","sources":["../../src/crawlers/crawler_extension.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,GAAG,EAAqB,MAAM,WAAW,CAAC;AAExD;;;;GAIG;AACH,8BAAsB,gBAAgB;IAClC,IAAI,SAAyB;IAC7B,GAAG,EAAE,GAAG,CAA2C;IAEnD,iBAAiB,IAAI,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC;CAG/C"}
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
import { log as defaultLog } from '../log.js';
|
|
2
|
-
/**
|
|
3
|
-
* Abstract class with pre-defined method to connect to the Crawlers class by the "use" crawler method.
|
|
4
|
-
* @category Crawlers
|
|
5
|
-
* @ignore
|
|
6
|
-
*/
|
|
7
|
-
export class CrawlerExtension {
|
|
8
|
-
name = this.constructor.name;
|
|
9
|
-
log = defaultLog.child({ prefix: this.name });
|
|
10
|
-
getCrawlerOptions() {
|
|
11
|
-
throw new Error(`${this.name} has not implemented "getCrawlerOptions" method.`);
|
|
12
|
-
}
|
|
13
|
-
}
|
|
14
|
-
//# sourceMappingURL=crawler_extension.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"crawler_extension.js","sourceRoot":"","sources":["../../src/crawlers/crawler_extension.ts"],"names":[],"mappings":"AAAA,OAAO,EAAY,GAAG,IAAI,UAAU,EAAE,MAAM,WAAW,CAAC;AAExD;;;;GAIG;AACH,MAAM,OAAgB,gBAAgB;IAClC,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC;IAC7B,GAAG,GAAQ,UAAU,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;IAEnD,iBAAiB;QACb,MAAM,IAAI,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,kDAAkD,CAAC,CAAC;IACpF,CAAC;CACJ"}
|
|
@@ -1,134 +0,0 @@
|
|
|
1
|
-
import type { Readable } from 'node:stream';
|
|
2
|
-
import { type SearchParams } from '@crawlee/utils';
|
|
3
|
-
import type { FormDataLike } from './form-data-like.js';
|
|
4
|
-
type Timeout = {
|
|
5
|
-
lookup: number;
|
|
6
|
-
connect: number;
|
|
7
|
-
secureConnect: number;
|
|
8
|
-
socket: number;
|
|
9
|
-
send: number;
|
|
10
|
-
response: number;
|
|
11
|
-
} | {
|
|
12
|
-
request: number;
|
|
13
|
-
};
|
|
14
|
-
type Method = 'GET' | 'POST' | 'PUT' | 'PATCH' | 'HEAD' | 'DELETE' | 'OPTIONS' | 'TRACE' | 'get' | 'post' | 'put' | 'patch' | 'head' | 'delete' | 'options' | 'trace';
|
|
15
|
-
/**
|
|
16
|
-
* Maps permitted values of the `responseType` option on {@link HttpRequest} to the types that they produce.
|
|
17
|
-
*/
|
|
18
|
-
export interface ResponseTypes {
|
|
19
|
-
'json': unknown;
|
|
20
|
-
'text': string;
|
|
21
|
-
'buffer': Buffer;
|
|
22
|
-
}
|
|
23
|
-
interface Progress {
|
|
24
|
-
percent: number;
|
|
25
|
-
transferred: number;
|
|
26
|
-
total?: number;
|
|
27
|
-
}
|
|
28
|
-
interface ToughCookieJar {
|
|
29
|
-
getCookieString: ((currentUrl: string, options: Record<string, unknown>, callback: (error: Error | null, cookies: string) => void) => string) & ((url: string, callback: (error: Error | null, cookieHeader: string) => void) => string);
|
|
30
|
-
setCookie: ((cookieOrString: unknown, currentUrl: string, options: Record<string, unknown>, callback: (error: Error | null, cookie: unknown) => void) => void) & ((rawCookie: string, url: string, callback: (error: Error | null, result: unknown) => void) => void);
|
|
31
|
-
}
|
|
32
|
-
interface PromiseCookieJar {
|
|
33
|
-
getCookieString: (url: string) => Promise<string>;
|
|
34
|
-
setCookie: (rawCookie: string, url: string) => Promise<unknown>;
|
|
35
|
-
}
|
|
36
|
-
type SimpleHeaders = Record<string, string | string[] | undefined>;
|
|
37
|
-
/**
|
|
38
|
-
* HTTP Request as accepted by {@link BaseHttpClient} methods.
|
|
39
|
-
*/
|
|
40
|
-
export interface HttpRequest<TResponseType extends keyof ResponseTypes = 'text'> {
|
|
41
|
-
[k: string]: unknown;
|
|
42
|
-
url: string | URL;
|
|
43
|
-
method?: Method;
|
|
44
|
-
headers?: SimpleHeaders;
|
|
45
|
-
body?: string | Buffer | Readable | Generator | AsyncGenerator | FormDataLike;
|
|
46
|
-
signal?: AbortSignal;
|
|
47
|
-
timeout?: Partial<Timeout>;
|
|
48
|
-
cookieJar?: ToughCookieJar | PromiseCookieJar;
|
|
49
|
-
followRedirect?: boolean | ((response: any) => boolean);
|
|
50
|
-
maxRedirects?: number;
|
|
51
|
-
encoding?: BufferEncoding;
|
|
52
|
-
responseType?: TResponseType;
|
|
53
|
-
throwHttpErrors?: boolean;
|
|
54
|
-
proxyUrl?: string;
|
|
55
|
-
headerGeneratorOptions?: Record<string, unknown>;
|
|
56
|
-
useHeaderGenerator?: boolean;
|
|
57
|
-
headerGenerator?: {
|
|
58
|
-
getHeaders: (options: Record<string, unknown>) => Record<string, string>;
|
|
59
|
-
};
|
|
60
|
-
insecureHTTPParser?: boolean;
|
|
61
|
-
sessionToken?: object;
|
|
62
|
-
}
|
|
63
|
-
/**
|
|
64
|
-
* Additional options for HTTP requests that need to be handled separately before passing to {@link BaseHttpClient}.
|
|
65
|
-
*/
|
|
66
|
-
export interface HttpRequestOptions<TResponseType extends keyof ResponseTypes = 'text'> extends HttpRequest<TResponseType> {
|
|
67
|
-
/** Search (query string) parameters to be appended to the request URL */
|
|
68
|
-
searchParams?: SearchParams;
|
|
69
|
-
/** A form to be sent in the HTTP request body (URL encoding will be used) */
|
|
70
|
-
form?: Record<string, string>;
|
|
71
|
-
/** Artbitrary object to be JSON-serialized and sent as the HTTP request body */
|
|
72
|
-
json?: unknown;
|
|
73
|
-
/** Basic HTTP Auth username */
|
|
74
|
-
username?: string;
|
|
75
|
-
/** Basic HTTP Auth password */
|
|
76
|
-
password?: string;
|
|
77
|
-
}
|
|
78
|
-
/**
|
|
79
|
-
* HTTP response data, without a body, as returned by {@link BaseHttpClient} methods.
|
|
80
|
-
*/
|
|
81
|
-
export interface BaseHttpResponseData {
|
|
82
|
-
redirectUrls: URL[];
|
|
83
|
-
url: string;
|
|
84
|
-
ip?: string;
|
|
85
|
-
statusCode: number;
|
|
86
|
-
statusMessage?: string;
|
|
87
|
-
headers: SimpleHeaders;
|
|
88
|
-
trailers: SimpleHeaders;
|
|
89
|
-
complete: boolean;
|
|
90
|
-
}
|
|
91
|
-
interface HttpResponseWithoutBody<TResponseType extends keyof ResponseTypes = keyof ResponseTypes> extends BaseHttpResponseData {
|
|
92
|
-
request: HttpRequest<TResponseType>;
|
|
93
|
-
}
|
|
94
|
-
/**
|
|
95
|
-
* HTTP response data as returned by the {@link BaseHttpClient.sendRequest} method.
|
|
96
|
-
*/
|
|
97
|
-
export interface HttpResponse<TResponseType extends keyof ResponseTypes = keyof ResponseTypes> extends HttpResponseWithoutBody<TResponseType> {
|
|
98
|
-
[k: string]: any;
|
|
99
|
-
body: ResponseTypes[TResponseType];
|
|
100
|
-
}
|
|
101
|
-
/**
|
|
102
|
-
* HTTP response data as returned by the {@link BaseHttpClient.stream} method.
|
|
103
|
-
*/
|
|
104
|
-
export interface StreamingHttpResponse extends HttpResponseWithoutBody {
|
|
105
|
-
stream: Readable;
|
|
106
|
-
readonly downloadProgress: Progress;
|
|
107
|
-
readonly uploadProgress: Progress;
|
|
108
|
-
}
|
|
109
|
-
/**
|
|
110
|
-
* Type of a function called when an HTTP redirect takes place. It is allowed to mutate the `updatedRequest` argument.
|
|
111
|
-
*/
|
|
112
|
-
export type RedirectHandler = (redirectResponse: BaseHttpResponseData, updatedRequest: {
|
|
113
|
-
url?: string | URL;
|
|
114
|
-
headers: SimpleHeaders;
|
|
115
|
-
}) => void;
|
|
116
|
-
/**
|
|
117
|
-
* Interface for user-defined HTTP clients to be used for plain HTTP crawling and for sending additional requests during a crawl.
|
|
118
|
-
*/
|
|
119
|
-
export interface BaseHttpClient {
|
|
120
|
-
/**
|
|
121
|
-
* Perform an HTTP Request and return the complete response.
|
|
122
|
-
*/
|
|
123
|
-
sendRequest<TResponseType extends keyof ResponseTypes = 'text'>(request: HttpRequest<TResponseType>): Promise<HttpResponse<TResponseType>>;
|
|
124
|
-
/**
|
|
125
|
-
* Perform an HTTP Request and return after the response headers are received. The body may be read from a stream contained in the response.
|
|
126
|
-
*/
|
|
127
|
-
stream(request: HttpRequest, onRedirect?: RedirectHandler): Promise<StreamingHttpResponse>;
|
|
128
|
-
}
|
|
129
|
-
/**
|
|
130
|
-
* Converts {@link HttpRequestOptions} to a {@link HttpRequest}.
|
|
131
|
-
*/
|
|
132
|
-
export declare function processHttpRequestOptions<TResponseType extends keyof ResponseTypes = 'text'>({ searchParams, form, json, username, password, ...request }: HttpRequestOptions<TResponseType>): HttpRequest<TResponseType>;
|
|
133
|
-
export {};
|
|
134
|
-
//# sourceMappingURL=base-http-client.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"base-http-client.d.ts","sourceRoot":"","sources":["../../src/http_clients/base-http-client.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAE5C,OAAO,EAAqB,KAAK,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAEtE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAExD,KAAK,OAAO,GACN;IACI,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,EAAE,MAAM,CAAC;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;CACpB,GACD;IAAE,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC;AAE1B,KAAK,MAAM,GACL,KAAK,GACL,MAAM,GACN,KAAK,GACL,OAAO,GACP,MAAM,GACN,QAAQ,GACR,SAAS,GACT,OAAO,GACP,KAAK,GACL,MAAM,GACN,KAAK,GACL,OAAO,GACP,MAAM,GACN,QAAQ,GACR,SAAS,GACT,OAAO,CAAC;AAEd;;GAEG;AACH,MAAM,WAAW,aAAa;IAC1B,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;CACpB;AAED,UAAU,QAAQ;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAClB;AAGD,UAAU,cAAc;IACpB,eAAe,EAAE,CAAC,CACd,UAAU,EAAE,MAAM,EAClB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAChC,QAAQ,EAAE,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI,EAAE,OAAO,EAAE,MAAM,KAAK,IAAI,KACvD,MAAM,CAAC,GACR,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI,EAAE,YAAY,EAAE,MAAM,KAAK,IAAI,KAAK,MAAM,CAAC,CAAC;IAC7F,SAAS,EAAE,CAAC,CACR,cAAc,EAAE,OAAO,EACvB,UAAU,EAAE,MAAM,EAClB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAChC,QAAQ,EAAE,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI,EAAE,MAAM,EAAE,OAAO,KAAK,IAAI,KACvD,IAAI,CAAC,GACN,CAAC,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI,EAAE,MAAM,EAAE,OAAO,KAAK,IAAI,KAAK,IAAI,CAAC,CAAC;CAC5G;AAED,UAAU,gBAAgB;IACtB,eAAe,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;IAClD,SAAS,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;CACnE;AAED,KAAK,aAAa,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,SAAS,CAAC,CAAC;AAEnE;;GAEG;AACH,MAAM,WAAW,WAAW,CAAC,aAAa,SAAS,MAAM,aAAa,GAAG,MAAM;IAC3E,CAAC,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC;IAErB,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,aAAa,CAAC;IACxB,IAAI,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,SAAS,GAAG,cAAc,GAAG,YAAY,CAAC;IAE9E,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,OAAO,CAAC,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;IAE3B,SAAS,CAAC,EAAE,cAAc,GAAG,gBAAgB,CAAC;IAC9C,cAAc,CAAC,EAAE,OAAO,GAAG,CAAC,CAAC,QAAQ,EAAE,GAAG,KAAK,OAAO,CAAC,CAAC;IACxD,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB,QAAQ,CAAC,EAAE,cAAc,CAAC;IAC1B,YAAY,CAAC,EAAE,aAAa,CAAC;IAC7B,eAAe,CAAC,EAAE,OAAO,CAAC;IAG1B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,sBAAsB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACjD,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,eAAe,CAAC,EAAE;QACd,UAAU,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAC5E,CAAC;IACF,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,YAAY,CAAC,EAAE,MAAM,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB,CAAC,aAAa,SAAS,MAAM,aAAa,GAAG,MAAM,CAClF,SAAQ,WAAW,CAAC,aAAa,CAAC;IAClC,yEAAyE;IACzE,YAAY,CAAC,EAAE,YAAY,CAAC;IAE5B,6EAA6E;IAC7E,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC9B,gFAAgF;IAChF,IAAI,CAAC,EAAE,OAAO,CAAC;IAEf,+BAA+B;IAC/B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,+BAA+B;IAC/B,QAAQ,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACjC,YAAY,EAAE,GAAG,EAAE,CAAC;IACpB,GAAG,EAAE,MAAM,CAAC;IAEZ,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB,OAAO,EAAE,aAAa,CAAC;IACvB,QAAQ,EAAE,aAAa,CAAC;IAExB,QAAQ,EAAE,OAAO,CAAC;CACrB;AAED,UAAU,uBAAuB,CAAC,aAAa,SAAS,MAAM,aAAa,GAAG,MAAM,aAAa,CAC7F,SAAQ,oBAAoB;IAC5B,OAAO,EAAE,WAAW,CAAC,aAAa,CAAC,CAAC;CACvC;AAED;;GAEG;AACH,MAAM,WAAW,YAAY,CAAC,aAAa,SAAS,MAAM,aAAa,GAAG,MAAM,aAAa,CACzF,SAAQ,uBAAuB,CAAC,aAAa,CAAC;IAC9C,CAAC,CAAC,EAAE,MAAM,GAAG,GAAG,CAAC;IAEjB,IAAI,EAAE,aAAa,CAAC,aAAa,CAAC,CAAC;CACtC;AAED;;GAEG;AACH,MAAM,WAAW,qBAAsB,SAAQ,uBAAuB;IAClE,MAAM,EAAE,QAAQ,CAAC;IACjB,QAAQ,CAAC,gBAAgB,EAAE,QAAQ,CAAC;IACpC,QAAQ,CAAC,cAAc,EAAE,QAAQ,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,MAAM,eAAe,GAAG,CAC1B,gBAAgB,EAAE,oBAAoB,EACtC,cAAc,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,GAAG,GAAG,CAAC;IAAC,OAAO,EAAE,aAAa,CAAA;CAAE,KAC7D,IAAI,CAAC;AAEV;;GAEG;AACH,MAAM,WAAW,cAAc;IAC3B;;OAEG;IACH,WAAW,CAAC,aAAa,SAAS,MAAM,aAAa,GAAG,MAAM,EAC1D,OAAO,EAAE,WAAW,CAAC,aAAa,CAAC,GACpC,OAAO,CAAC,YAAY,CAAC,aAAa,CAAC,CAAC,CAAC;IAExC;;OAEG;IACH,MAAM,CAAC,OAAO,EAAE,WAAW,EAAE,UAAU,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAC;CAC9F;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CAAC,aAAa,SAAS,MAAM,aAAa,GAAG,MAAM,EAAE,EAC1F,YAAY,EACZ,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,GAAG,OAAO,EACb,EAAE,kBAAkB,CAAC,aAAa,CAAC,GAAG,WAAW,CAAC,aAAa,CAAC,CAoChE"}
|