@crawlee/core 4.0.0-beta.4 → 4.0.0-beta.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -5
- package/autoscaling/autoscaled_pool.d.ts +3 -5
- package/autoscaling/autoscaled_pool.d.ts.map +1 -1
- package/autoscaling/autoscaled_pool.js +3 -9
- package/autoscaling/autoscaled_pool.js.map +1 -1
- package/autoscaling/snapshotter.d.ts +3 -13
- package/autoscaling/snapshotter.d.ts.map +1 -1
- package/autoscaling/snapshotter.js +15 -29
- package/autoscaling/snapshotter.js.map +1 -1
- package/autoscaling/system_status.d.ts +0 -3
- package/autoscaling/system_status.d.ts.map +1 -1
- package/autoscaling/system_status.js +2 -3
- package/autoscaling/system_status.js.map +1 -1
- package/configuration.d.ts +5 -78
- package/configuration.d.ts.map +1 -1
- package/configuration.js +6 -102
- package/configuration.js.map +1 -1
- package/cookie_utils.d.ts +1 -1
- package/cookie_utils.d.ts.map +1 -1
- package/cookie_utils.js +8 -8
- package/cookie_utils.js.map +1 -1
- package/crawlers/context_pipeline.d.ts +71 -0
- package/crawlers/context_pipeline.d.ts.map +1 -0
- package/crawlers/context_pipeline.js +121 -0
- package/crawlers/context_pipeline.js.map +1 -0
- package/crawlers/crawler_commons.d.ts +15 -23
- package/crawlers/crawler_commons.d.ts.map +1 -1
- package/crawlers/crawler_commons.js +0 -8
- package/crawlers/crawler_commons.js.map +1 -1
- package/crawlers/error_snapshotter.d.ts +3 -2
- package/crawlers/error_snapshotter.d.ts.map +1 -1
- package/crawlers/error_snapshotter.js +2 -2
- package/crawlers/error_snapshotter.js.map +1 -1
- package/crawlers/error_tracker.d.ts +2 -1
- package/crawlers/error_tracker.d.ts.map +1 -1
- package/crawlers/error_tracker.js.map +1 -1
- package/crawlers/index.d.ts +1 -1
- package/crawlers/index.d.ts.map +1 -1
- package/crawlers/index.js +1 -1
- package/crawlers/index.js.map +1 -1
- package/crawlers/internals/types.d.ts +8 -0
- package/crawlers/internals/types.d.ts.map +1 -0
- package/crawlers/internals/types.js +2 -0
- package/crawlers/internals/types.js.map +1 -0
- package/crawlers/statistics.d.ts +15 -15
- package/crawlers/statistics.d.ts.map +1 -1
- package/crawlers/statistics.js +21 -18
- package/crawlers/statistics.js.map +1 -1
- package/enqueue_links/enqueue_links.d.ts +30 -18
- package/enqueue_links/enqueue_links.d.ts.map +1 -1
- package/enqueue_links/enqueue_links.js +41 -23
- package/enqueue_links/enqueue_links.js.map +1 -1
- package/enqueue_links/shared.d.ts +24 -7
- package/enqueue_links/shared.d.ts.map +1 -1
- package/enqueue_links/shared.js +66 -37
- package/enqueue_links/shared.js.map +1 -1
- package/errors.d.ts +18 -0
- package/errors.d.ts.map +1 -1
- package/errors.js +35 -0
- package/errors.js.map +1 -1
- package/events/event_manager.d.ts +8 -5
- package/events/event_manager.d.ts.map +1 -1
- package/events/event_manager.js +7 -9
- package/events/event_manager.js.map +1 -1
- package/events/local_event_manager.d.ts +14 -4
- package/events/local_event_manager.d.ts.map +1 -1
- package/events/local_event_manager.js +27 -39
- package/events/local_event_manager.js.map +1 -1
- package/index.d.ts +2 -1
- package/index.d.ts.map +1 -1
- package/index.js +2 -1
- package/index.js.map +1 -1
- package/log.d.ts +146 -2
- package/log.d.ts.map +1 -1
- package/log.js +102 -0
- package/log.js.map +1 -1
- package/package.json +6 -7
- package/proxy_configuration.d.ts +17 -94
- package/proxy_configuration.d.ts.map +1 -1
- package/proxy_configuration.js +18 -54
- package/proxy_configuration.js.map +1 -1
- package/recoverable_state.d.ts +121 -0
- package/recoverable_state.d.ts.map +1 -0
- package/recoverable_state.js +137 -0
- package/recoverable_state.js.map +1 -0
- package/request.d.ts +48 -6
- package/request.d.ts.map +1 -1
- package/request.js +62 -16
- package/request.js.map +1 -1
- package/service_locator.d.ts +130 -0
- package/service_locator.d.ts.map +1 -0
- package/service_locator.js +249 -0
- package/service_locator.js.map +1 -0
- package/session_pool/session.d.ts +9 -31
- package/session_pool/session.d.ts.map +1 -1
- package/session_pool/session.js +17 -21
- package/session_pool/session.js.map +1 -1
- package/session_pool/session_pool.d.ts +27 -54
- package/session_pool/session_pool.d.ts.map +1 -1
- package/session_pool/session_pool.js +54 -69
- package/session_pool/session_pool.js.map +1 -1
- package/storages/dataset.d.ts +53 -3
- package/storages/dataset.d.ts.map +1 -1
- package/storages/dataset.js +78 -6
- package/storages/dataset.js.map +1 -1
- package/storages/index.d.ts +2 -0
- package/storages/index.d.ts.map +1 -1
- package/storages/index.js +2 -0
- package/storages/index.js.map +1 -1
- package/storages/key_value_store.d.ts +71 -1
- package/storages/key_value_store.d.ts.map +1 -1
- package/storages/key_value_store.js +95 -12
- package/storages/key_value_store.js.map +1 -1
- package/storages/request_list.d.ts +9 -9
- package/storages/request_list.d.ts.map +1 -1
- package/storages/request_list.js +11 -8
- package/storages/request_list.js.map +1 -1
- package/storages/request_list_adapter.d.ts +58 -0
- package/storages/request_list_adapter.d.ts.map +1 -0
- package/storages/request_list_adapter.js +81 -0
- package/storages/request_list_adapter.js.map +1 -0
- package/storages/request_manager_tandem.d.ts +68 -0
- package/storages/request_manager_tandem.d.ts.map +1 -0
- package/storages/request_manager_tandem.js +124 -0
- package/storages/request_manager_tandem.js.map +1 -0
- package/storages/request_provider.d.ts +76 -9
- package/storages/request_provider.d.ts.map +1 -1
- package/storages/request_provider.js +92 -54
- package/storages/request_provider.js.map +1 -1
- package/storages/request_queue.d.ts +1 -3
- package/storages/request_queue.d.ts.map +1 -1
- package/storages/request_queue.js +2 -4
- package/storages/request_queue.js.map +1 -1
- package/storages/request_queue_v2.d.ts +3 -3
- package/storages/request_queue_v2.d.ts.map +1 -1
- package/storages/request_queue_v2.js +4 -5
- package/storages/request_queue_v2.js.map +1 -1
- package/storages/sitemap_request_list.d.ts +5 -5
- package/storages/sitemap_request_list.d.ts.map +1 -1
- package/storages/sitemap_request_list.js +8 -7
- package/storages/sitemap_request_list.js.map +1 -1
- package/storages/storage_manager.d.ts +10 -8
- package/storages/storage_manager.d.ts.map +1 -1
- package/storages/storage_manager.js +12 -22
- package/storages/storage_manager.js.map +1 -1
- package/storages/utils.d.ts.map +1 -1
- package/storages/utils.js +4 -3
- package/storages/utils.js.map +1 -1
- package/typedefs.d.ts +1 -1
- package/typedefs.d.ts.map +1 -1
- package/crawlers/crawler_extension.d.ts +0 -12
- package/crawlers/crawler_extension.d.ts.map +0 -1
- package/crawlers/crawler_extension.js +0 -14
- package/crawlers/crawler_extension.js.map +0 -1
- package/http_clients/base-http-client.d.ts +0 -134
- package/http_clients/base-http-client.d.ts.map +0 -1
- package/http_clients/base-http-client.js +0 -33
- package/http_clients/base-http-client.js.map +0 -1
- package/http_clients/form-data-like.d.ts +0 -67
- package/http_clients/form-data-like.d.ts.map +0 -1
- package/http_clients/form-data-like.js +0 -5
- package/http_clients/form-data-like.js.map +0 -1
- package/http_clients/got-scraping-http-client.d.ts +0 -15
- package/http_clients/got-scraping-http-client.d.ts.map +0 -1
- package/http_clients/got-scraping-http-client.js +0 -69
- package/http_clients/got-scraping-http-client.js.map +0 -1
- package/http_clients/index.d.ts +0 -3
- package/http_clients/index.d.ts.map +0 -1
- package/http_clients/index.js +0 -3
- package/http_clients/index.js.map +0 -1
- package/tsconfig.build.tsbuildinfo +0 -1
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
import { AsyncLocalStorage } from 'node:async_hooks';
|
|
2
|
+
import { MemoryStorage } from '@crawlee/memory-storage';
|
|
3
|
+
import log from '@apify/log';
|
|
4
|
+
import { Configuration } from './configuration.js';
|
|
5
|
+
import { ServiceConflictError } from './errors.js';
|
|
6
|
+
import { LocalEventManager } from './events/local_event_manager.js';
|
|
7
|
+
import { ApifyLogAdapter } from './log.js';
|
|
8
|
+
/**
|
|
9
|
+
* Service locator for managing the services used by Crawlee.
|
|
10
|
+
*
|
|
11
|
+
* All services are initialized to their default value lazily.
|
|
12
|
+
*
|
|
13
|
+
* There are two primary usage patterns:
|
|
14
|
+
*
|
|
15
|
+
* **1. Global service locator (for default services):**
|
|
16
|
+
* ```typescript
|
|
17
|
+
* import { serviceLocator, BasicCrawler } from 'crawlee';
|
|
18
|
+
*
|
|
19
|
+
* // Optionally configure global services before creating crawlers
|
|
20
|
+
* serviceLocator.setStorageClient(myCustomClient);
|
|
21
|
+
*
|
|
22
|
+
* // Crawler uses global services
|
|
23
|
+
* const crawler = new BasicCrawler({ ... });
|
|
24
|
+
* ```
|
|
25
|
+
*
|
|
26
|
+
* **2. Per-crawler services (recommended for isolation):**
|
|
27
|
+
* ```typescript
|
|
28
|
+
* import { BasicCrawler, Configuration, LocalEventManager } from 'crawlee';
|
|
29
|
+
* import { MemoryStorage } from '@crawlee/memory-storage';
|
|
30
|
+
*
|
|
31
|
+
* const crawler = new BasicCrawler({
|
|
32
|
+
* requestHandler: async ({ request }) => { ... },
|
|
33
|
+
* configuration: new Configuration({ ... }), // custom config
|
|
34
|
+
* storageClient: new MemoryStorage(), // custom storage
|
|
35
|
+
* eventManager: LocalEventManager.fromConfig(), // custom events
|
|
36
|
+
* });
|
|
37
|
+
* // Crawler has its own isolated ServiceLocator instance
|
|
38
|
+
* ```
|
|
39
|
+
*/
|
|
40
|
+
// Used as fallback in ServiceLocator methods that need to log before a logger is explicitly set,
|
|
41
|
+
// without implicitly locking the logger slot (which getLogger() would do).
|
|
42
|
+
const fallbackLog = new ApifyLogAdapter(log);
|
|
43
|
+
export class ServiceLocator {
|
|
44
|
+
configuration;
|
|
45
|
+
eventManager;
|
|
46
|
+
storageClient;
|
|
47
|
+
logger;
|
|
48
|
+
/**
|
|
49
|
+
* Storage managers for Dataset, KeyValueStore, and RequestQueue.
|
|
50
|
+
* Manages caching and lifecycle of storage instances.
|
|
51
|
+
*/
|
|
52
|
+
storageManagers = new Map();
|
|
53
|
+
/**
|
|
54
|
+
* Creates a new ServiceLocator instance.
|
|
55
|
+
*
|
|
56
|
+
* @param configuration Optional configuration instance to use
|
|
57
|
+
* @param eventManager Optional event manager instance to use
|
|
58
|
+
* @param storageClient Optional storage client instance to use
|
|
59
|
+
* @param logger Optional logger instance to use
|
|
60
|
+
*/
|
|
61
|
+
constructor(configuration, eventManager, storageClient, logger) {
|
|
62
|
+
this.configuration = configuration;
|
|
63
|
+
this.eventManager = eventManager;
|
|
64
|
+
this.storageClient = storageClient;
|
|
65
|
+
this.logger = logger;
|
|
66
|
+
}
|
|
67
|
+
getConfiguration() {
|
|
68
|
+
if (!this.configuration) {
|
|
69
|
+
(this.logger ?? fallbackLog).debug('No configuration set, implicitly creating and using default Configuration.');
|
|
70
|
+
this.configuration = new Configuration();
|
|
71
|
+
}
|
|
72
|
+
return this.configuration;
|
|
73
|
+
}
|
|
74
|
+
setConfiguration(configuration) {
|
|
75
|
+
// Same instance, no need to do anything
|
|
76
|
+
if (this.configuration === configuration) {
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
// Already have a different configuration that was retrieved
|
|
80
|
+
if (this.configuration) {
|
|
81
|
+
throw new ServiceConflictError('Configuration', configuration, this.configuration);
|
|
82
|
+
}
|
|
83
|
+
this.configuration = configuration;
|
|
84
|
+
}
|
|
85
|
+
getEventManager() {
|
|
86
|
+
if (!this.eventManager) {
|
|
87
|
+
(this.logger ?? fallbackLog).debug('No event manager set, implicitly creating and using default LocalEventManager.');
|
|
88
|
+
if (!this.configuration) {
|
|
89
|
+
(this.logger ?? fallbackLog).warning('Implicit creation of event manager will implicitly set configuration as side effect. ' +
|
|
90
|
+
'It is advised to explicitly first set the configuration instead.');
|
|
91
|
+
}
|
|
92
|
+
this.eventManager = LocalEventManager.fromConfig(this.getConfiguration());
|
|
93
|
+
}
|
|
94
|
+
return this.eventManager;
|
|
95
|
+
}
|
|
96
|
+
setEventManager(eventManager) {
|
|
97
|
+
// Same instance, no need to do anything
|
|
98
|
+
if (this.eventManager === eventManager) {
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
// Already have a different event manager that was retrieved
|
|
102
|
+
if (this.eventManager) {
|
|
103
|
+
throw new ServiceConflictError('EventManager', eventManager, this.eventManager);
|
|
104
|
+
}
|
|
105
|
+
this.eventManager = eventManager;
|
|
106
|
+
}
|
|
107
|
+
getStorageClient() {
|
|
108
|
+
if (!this.storageClient) {
|
|
109
|
+
(this.logger ?? fallbackLog).debug('No storage client set, implicitly creating and using default MemoryStorage.');
|
|
110
|
+
if (!this.configuration) {
|
|
111
|
+
(this.logger ?? fallbackLog).warning('Implicit creation of storage client will implicitly set configuration as side effect. ' +
|
|
112
|
+
'It is advised to explicitly first set the configuration instead.');
|
|
113
|
+
}
|
|
114
|
+
const config = this.getConfiguration();
|
|
115
|
+
this.storageClient = new MemoryStorage({
|
|
116
|
+
persistStorage: config.get('persistStorage'),
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
return this.storageClient;
|
|
120
|
+
}
|
|
121
|
+
setStorageClient(storageClient) {
|
|
122
|
+
// Same instance, no need to do anything
|
|
123
|
+
if (this.storageClient === storageClient) {
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
// Already have a different storage client that was retrieved
|
|
127
|
+
if (this.storageClient) {
|
|
128
|
+
throw new ServiceConflictError('StorageClient', storageClient, this.storageClient);
|
|
129
|
+
}
|
|
130
|
+
this.storageClient = storageClient;
|
|
131
|
+
}
|
|
132
|
+
getLogger() {
|
|
133
|
+
if (!this.logger) {
|
|
134
|
+
this.logger = new ApifyLogAdapter(log);
|
|
135
|
+
}
|
|
136
|
+
return this.logger;
|
|
137
|
+
}
|
|
138
|
+
setLogger(logger) {
|
|
139
|
+
if (this.logger === logger) {
|
|
140
|
+
return;
|
|
141
|
+
}
|
|
142
|
+
if (this.logger) {
|
|
143
|
+
throw new ServiceConflictError('Logger', logger, this.logger);
|
|
144
|
+
}
|
|
145
|
+
this.logger = logger;
|
|
146
|
+
}
|
|
147
|
+
getChildLog(prefix) {
|
|
148
|
+
return this.getLogger().child({ prefix });
|
|
149
|
+
}
|
|
150
|
+
getStorageManager(constructor) {
|
|
151
|
+
return this.storageManagers.get(constructor);
|
|
152
|
+
}
|
|
153
|
+
setStorageManager(constructor, storageManager) {
|
|
154
|
+
if (this.storageManagers.has(constructor)) {
|
|
155
|
+
throw new ServiceConflictError(`StorageManager(${constructor.name})`, storageManager, this.storageManagers.get(constructor));
|
|
156
|
+
}
|
|
157
|
+
this.storageManagers.set(constructor, storageManager);
|
|
158
|
+
}
|
|
159
|
+
clearStorageManagerCache() {
|
|
160
|
+
this.storageManagers.forEach((manager) => {
|
|
161
|
+
// KeyValueStore has a clearCache method on its instances
|
|
162
|
+
// TODO this uses fragile string matching and `any` casts into private fields - remove as part of
|
|
163
|
+
// https://github.com/apify/crawlee/issues/3075 (Storage instance management will be reworked significantly)
|
|
164
|
+
if (manager.name === 'KeyValueStore') {
|
|
165
|
+
manager.cache?.forEach((item) => {
|
|
166
|
+
item.clearCache?.();
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
});
|
|
170
|
+
this.storageManagers.clear();
|
|
171
|
+
}
|
|
172
|
+
reset() {
|
|
173
|
+
this.configuration = undefined;
|
|
174
|
+
this.eventManager = undefined;
|
|
175
|
+
this.storageClient = undefined;
|
|
176
|
+
this.logger = undefined;
|
|
177
|
+
this.clearStorageManagerCache();
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Used as the default service provider when crawlers don't specify custom services.
|
|
182
|
+
*/
|
|
183
|
+
const globalServiceLocator = new ServiceLocator();
|
|
184
|
+
const serviceLocatorStorage = new AsyncLocalStorage();
|
|
185
|
+
/**
|
|
186
|
+
* Wraps all methods on `target` so that any code they invoke will see the given
|
|
187
|
+
* `serviceLocator` via `AsyncLocalStorage`, rather than the global one.
|
|
188
|
+
*
|
|
189
|
+
* Walks the prototype chain and replaces each method on the *instance* (not the prototype)
|
|
190
|
+
* with a wrapper that calls `serviceLocatorStorage.run(serviceLocator, originalMethod)`.
|
|
191
|
+
*
|
|
192
|
+
* The `AsyncLocalStorage` context propagates through the entire sync/async call tree of each
|
|
193
|
+
* wrapped method — including `super` calls, since the prototype methods execute within the
|
|
194
|
+
* context established by the instance-level wrapper.
|
|
195
|
+
*
|
|
196
|
+
* @internal
|
|
197
|
+
* @returns Scope control functions: `run` executes a callback within the scoped context,
|
|
198
|
+
* `enterScope`/`exitScope` allow entering/leaving the scope imperatively (e.g., for constructor bodies).
|
|
199
|
+
*/
|
|
200
|
+
export function bindMethodsToServiceLocator(serviceLocator, target) {
|
|
201
|
+
let proto = Object.getPrototypeOf(target);
|
|
202
|
+
while (proto !== null && proto !== Object.prototype) {
|
|
203
|
+
const propertyKeys = [...Object.getOwnPropertyNames(proto), ...Object.getOwnPropertySymbols(proto)];
|
|
204
|
+
for (const propertyKey of propertyKeys) {
|
|
205
|
+
const descriptor = Object.getOwnPropertyDescriptor(proto, propertyKey);
|
|
206
|
+
// We use property descriptors rather than accessing target[propertyKey] directly,
|
|
207
|
+
// because that would trigger getters and cause unwanted side effects.
|
|
208
|
+
// Skip getters, setters, and constructors — only wrap regular methods.
|
|
209
|
+
if (propertyKey === 'constructor' ||
|
|
210
|
+
!descriptor ||
|
|
211
|
+
descriptor.get ||
|
|
212
|
+
descriptor.set ||
|
|
213
|
+
typeof descriptor.value !== 'function')
|
|
214
|
+
continue;
|
|
215
|
+
const original = descriptor.value;
|
|
216
|
+
target[propertyKey] = (...args) => {
|
|
217
|
+
return serviceLocatorStorage.run(serviceLocator, () => {
|
|
218
|
+
return original.apply(target, args);
|
|
219
|
+
});
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
proto = Object.getPrototypeOf(proto);
|
|
223
|
+
}
|
|
224
|
+
let previousStore;
|
|
225
|
+
return {
|
|
226
|
+
run: (fn) => serviceLocatorStorage.run(serviceLocator, fn),
|
|
227
|
+
enterScope: () => {
|
|
228
|
+
previousStore = serviceLocatorStorage.getStore();
|
|
229
|
+
serviceLocatorStorage.enterWith(serviceLocator);
|
|
230
|
+
},
|
|
231
|
+
exitScope: () => {
|
|
232
|
+
serviceLocatorStorage.enterWith(previousStore); // casting to any so that `undefined` is accepted - this "unsets" the AsyncLocalStorage
|
|
233
|
+
},
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
export const serviceLocator = new Proxy({}, {
|
|
237
|
+
get(_target, prop) {
|
|
238
|
+
const active = serviceLocatorStorage.getStore() ?? globalServiceLocator;
|
|
239
|
+
const value = Reflect.get(active, prop, active);
|
|
240
|
+
if (typeof value === 'function') {
|
|
241
|
+
return value.bind(active);
|
|
242
|
+
}
|
|
243
|
+
return value;
|
|
244
|
+
},
|
|
245
|
+
set(_target, prop) {
|
|
246
|
+
throw new TypeError(`Cannot set property '${String(prop)}' on serviceLocator directly. Use the setter methods (e.g. setConfiguration(), setStorageClient()) instead.`);
|
|
247
|
+
},
|
|
248
|
+
});
|
|
249
|
+
//# sourceMappingURL=service_locator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"service_locator.js","sourceRoot":"","sources":["../src/service_locator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAErD,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AAGxD,OAAO,GAAG,MAAM,YAAY,CAAC;AAE7B,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AAEpE,OAAO,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAqF3C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,iGAAiG;AACjG,2EAA2E;AAC3E,MAAM,WAAW,GAAG,IAAI,eAAe,CAAC,GAAG,CAAC,CAAC;AAE7C,MAAM,OAAO,cAAc;IACf,aAAa,CAAiB;IAC9B,YAAY,CAAgB;IAC5B,aAAa,CAAiB;IAC9B,MAAM,CAAiB;IAE/B;;;OAGG;IACK,eAAe,GAAG,IAAI,GAAG,EAAyC,CAAC;IAE3E;;;;;;;OAOG;IACH,YACI,aAA6B,EAC7B,YAA2B,EAC3B,aAA6B,EAC7B,MAAsB;QAEtB,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;QACnC,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;QACnC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACzB,CAAC;IAED,gBAAgB;QACZ,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC;YACtB,CAAC,IAAI,CAAC,MAAM,IAAI,WAAW,CAAC,CAAC,KAAK,CAC9B,4EAA4E,CAC/E,CAAC;YACF,IAAI,CAAC,aAAa,GAAG,IAAI,aAAa,EAAE,CAAC;QAC7C,CAAC;QACD,OAAO,IAAI,CAAC,aAAa,CAAC;IAC9B,CAAC;IAED,gBAAgB,CAAC,aAA4B;QACzC,wCAAwC;QACxC,IAAI,IAAI,CAAC,aAAa,KAAK,aAAa,EAAE,CAAC;YACvC,OAAO;QACX,CAAC;QAED,4DAA4D;QAC5D,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACrB,MAAM,IAAI,oBAAoB,CAAC,eAAe,EAAE,aAAa,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC;QACvF,CAAC;QAED,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;IACvC,CAAC;IAED,eAAe;QACX,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACrB,CAAC,IAAI,CAAC,MAAM,IAAI,WAAW,CAAC,CAAC,KAAK,CAC9B,gFAAgF,CACnF,CAAC;YACF,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC;gBACtB,CAAC,IAAI,CAAC,MAAM,IAAI,WAAW,CAAC,CAAC,OAAO,CAChC,uFAAuF;oBACnF,kEAAkE,CACzE,CAAC;YACN,CAAC;YACD,IAAI,CAAC,YAAY,GAAG,iBAAiB,CAAC,UAAU,CAAC,IAAI,CAAC,gBAAgB,EAAE,CAAC,CAAC;QAC9E,CAAC;QACD,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED,eAAe,CAAC,YAA0B;QACtC,wCAAwC;QACxC,IAAI,IAAI,CAAC,YAAY,KAAK,YAAY,EAAE,CAAC;YACrC,OAAO;QACX,CAAC;QAED,4DAA4D;QAC5D,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACpB,MAAM,IAAI,oBAAoB,CAAC,cAAc,EAAE,YAAY,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC;QACpF,CAAC;QAED,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;IACrC,CAAC;IAED,gBAAgB;QACZ,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC;YACtB,CAAC,IAAI,CAAC,MAAM,IAAI,WAAW,CAAC,CAAC,KAAK,CAC9B,6EAA6E,CAChF,CAAC;YACF,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC;gBACtB,CAAC,IAAI,CAAC,MAAM,IAAI,WAAW,CAAC,CAAC,OAAO,CAChC,wFAAwF;oBACpF,kEAAkE,CACzE,CAAC;YACN,CAAC;YACD,MAAM,MAAM,GAAG,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACvC,IAAI,CAAC,aAAa,GAAG,IAAI,aAAa,CAAC;gBACnC,cAAc,EAAE,MAAM,CAAC,GAAG,CAAC,gBAAgB,CAAC;aAC/C,CAAC,CAAC;QACP,CAAC;QACD,OAAO,IAAI,CAAC,aAAa,CAAC;IAC9B,CAAC;IAED,gBAAgB,CAAC,aAA4B;QACzC,wCAAwC;QACxC,IAAI,IAAI,CAAC,aAAa,KAAK,aAAa,EAAE,CAAC;YACvC,OAAO;QACX,CAAC;QAED,6DAA6D;QAC7D,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACrB,MAAM,IAAI,oBAAoB,CAAC,eAAe,EAAE,aAAa,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC;QACvF,CAAC;QAED,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;IACvC,CAAC;IAED,SAAS;QACL,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACf,IAAI,CAAC,MAAM,GAAG,IAAI,eAAe,CAAC,GAAG,CAAC,CAAC;QAC3C,CAAC;QACD,OAAO,IAAI,CAAC,MAAM,CAAC;IACvB,CAAC;IAED,SAAS,CAAC,MAAqB;QAC3B,IAAI,IAAI,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YACzB,OAAO;QACX,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YACd,MAAM,IAAI,oBAAoB,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QAClE,CAAC;QAED,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACzB,CAAC;IAED,WAAW,CAAC,MAAc;QACtB,OAAO,IAAI,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED,iBAAiB,CAAC,WAAkC;QAChD,OAAO,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;IACjD,CAAC;IAED,iBAAiB,CAAC,WAAkC,EAAE,cAA8B;QAChF,IAAI,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;YACxC,MAAM,IAAI,oBAAoB,CAC1B,kBAAkB,WAAW,CAAC,IAAI,GAAG,EACrC,cAAc,EACd,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,WAAW,CAAC,CACxC,CAAC;QACN,CAAC;QAED,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;IAC1D,CAAC;IAED,wBAAwB;QACpB,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;YACrC,yDAAyD;YACzD,iGAAiG;YACjG,6GAA6G;YAC7G,IAAK,OAAe,CAAC,IAAI,KAAK,eAAe,EAAE,CAAC;gBAC3C,OAAe,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC,IAAS,EAAE,EAAE;oBAC1C,IAAI,CAAC,UAAU,EAAE,EAAE,CAAC;gBACxB,CAAC,CAAC,CAAC;YACP,CAAC;QACL,CAAC,CAAC,CAAC;QACH,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;IACjC,CAAC;IAED,KAAK;QACD,IAAI,CAAC,aAAa,GAAG,SAAS,CAAC;QAC/B,IAAI,CAAC,YAAY,GAAG,SAAS,CAAC;QAC9B,IAAI,CAAC,aAAa,GAAG,SAAS,CAAC;QAC/B,IAAI,CAAC,MAAM,GAAG,SAAS,CAAC;QACxB,IAAI,CAAC,wBAAwB,EAAE,CAAC;IACpC,CAAC;CACJ;AAED;;GAEG;AACH,MAAM,oBAAoB,GAAG,IAAI,cAAc,EAAE,CAAC;AAElD,MAAM,qBAAqB,GAAG,IAAI,iBAAiB,EAA2B,CAAC;AAE/E;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,2BAA2B,CACvC,cAA8B,EAC9B,MAAU;IAEV,IAAI,KAAK,GAAG,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;IAE1C,OAAO,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,MAAM,CAAC,SAAS,EAAE,CAAC;QAClD,MAAM,YAAY,GAAG,CAAC,GAAG,MAAM,CAAC,mBAAmB,CAAC,KAAK,CAAC,EAAE,GAAG,MAAM,CAAC,qBAAqB,CAAC,KAAK,CAAC,CAAC,CAAC;QAEpG,KAAK,MAAM,WAAW,IAAI,YAAY,EAAE,CAAC;YACrC,MAAM,UAAU,GAAG,MAAM,CAAC,wBAAwB,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;YAEvE,kFAAkF;YAClF,sEAAsE;YACtE,uEAAuE;YACvE,IACI,WAAW,KAAK,aAAa;gBAC7B,CAAC,UAAU;gBACX,UAAU,CAAC,GAAG;gBACd,UAAU,CAAC,GAAG;gBACd,OAAO,UAAU,CAAC,KAAK,KAAK,UAAU;gBAEtC,SAAS;YAEb,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,CAAC;YACjC,MAA2C,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,IAAW,EAAE,EAAE;gBAC3E,OAAO,qBAAqB,CAAC,GAAG,CAAC,cAAc,EAAE,GAAG,EAAE;oBAClD,OAAO,QAAQ,CAAC,KAAK,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;gBACxC,CAAC,CAAC,CAAC;YACP,CAAC,CAAC;QACN,CAAC;QAED,KAAK,GAAG,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,aAAkD,CAAC;IAEvD,OAAO;QACH,GAAG,EAAE,CAAI,EAAW,EAAK,EAAE,CAAC,qBAAqB,CAAC,GAAG,CAAC,cAAc,EAAE,EAAE,CAAC;QACzE,UAAU,EAAE,GAAG,EAAE;YACb,aAAa,GAAG,qBAAqB,CAAC,QAAQ,EAAE,CAAC;YACjD,qBAAqB,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;QACpD,CAAC;QACD,SAAS,EAAE,GAAG,EAAE;YACZ,qBAAqB,CAAC,SAAS,CAAC,aAAoB,CAAC,CAAC,CAAC,uFAAuF;QAClJ,CAAC;KACJ,CAAC;AACN,CAAC;AAED,MAAM,CAAC,MAAM,cAAc,GAAG,IAAI,KAAK,CAAC,EAA6B,EAAE;IACnE,GAAG,CAAC,OAAO,EAAE,IAAI;QACb,MAAM,MAAM,GAAG,qBAAqB,CAAC,QAAQ,EAAE,IAAI,oBAAoB,CAAC;QACxE,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;QAChD,IAAI,OAAO,KAAK,KAAK,UAAU,EAAE,CAAC;YAC9B,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC9B,CAAC;QACD,OAAO,KAAK,CAAC;IACjB,CAAC;IACD,GAAG,CAAC,OAAO,EAAE,IAAI;QACb,MAAM,IAAI,SAAS,CACf,wBAAwB,MAAM,CAAC,IAAI,CAAC,6GAA6G,CACpJ,CAAC;IACN,CAAC;CACJ,CAAC,CAAC"}
|
|
@@ -1,23 +1,7 @@
|
|
|
1
|
-
import type { Cookie as CookieObject, Dictionary } from '@crawlee/types';
|
|
2
|
-
import type { Cookie
|
|
1
|
+
import type { Cookie as CookieObject, Dictionary, ISession, ProxyInfo, SessionState } from '@crawlee/types';
|
|
2
|
+
import type { Cookie } from 'tough-cookie';
|
|
3
3
|
import { CookieJar } from 'tough-cookie';
|
|
4
|
-
import type {
|
|
5
|
-
import type { ResponseLike } from '../cookie_utils.js';
|
|
6
|
-
/**
|
|
7
|
-
* Persistable {@link Session} state.
|
|
8
|
-
*/
|
|
9
|
-
export interface SessionState {
|
|
10
|
-
id: string;
|
|
11
|
-
cookieJar: SerializedCookieJar;
|
|
12
|
-
userData: object;
|
|
13
|
-
errorScore: number;
|
|
14
|
-
maxErrorScore: number;
|
|
15
|
-
errorScoreDecrement: number;
|
|
16
|
-
usageCount: number;
|
|
17
|
-
maxUsageCount: number;
|
|
18
|
-
expiresAt: string;
|
|
19
|
-
createdAt: string;
|
|
20
|
-
}
|
|
4
|
+
import type { CrawleeLogger } from '../log.js';
|
|
21
5
|
export interface SessionOptions {
|
|
22
6
|
/** Id of session used for generating fingerprints. It is used as proxy session name. */
|
|
23
7
|
id?: string;
|
|
@@ -60,9 +44,10 @@ export interface SessionOptions {
|
|
|
60
44
|
maxUsageCount?: number;
|
|
61
45
|
/** SessionPool instance. Session will emit the `sessionRetired` event on this instance. */
|
|
62
46
|
sessionPool?: import('./session_pool.js').SessionPool;
|
|
63
|
-
log?:
|
|
47
|
+
log?: CrawleeLogger;
|
|
64
48
|
errorScore?: number;
|
|
65
49
|
cookieJar?: CookieJar;
|
|
50
|
+
proxyInfo?: ProxyInfo;
|
|
66
51
|
}
|
|
67
52
|
/**
|
|
68
53
|
* Sessions are used to store information such as cookies and can be used for generating fingerprints and proxy sessions.
|
|
@@ -70,7 +55,7 @@ export interface SessionOptions {
|
|
|
70
55
|
* Session internal state can be enriched with custom user data for example some authorization tokens and specific headers in general.
|
|
71
56
|
* @category Scaling
|
|
72
57
|
*/
|
|
73
|
-
export declare class Session {
|
|
58
|
+
export declare class Session implements ISession {
|
|
74
59
|
readonly id: string;
|
|
75
60
|
private maxAgeSecs;
|
|
76
61
|
userData: Dictionary;
|
|
@@ -82,6 +67,7 @@ export declare class Session {
|
|
|
82
67
|
private _maxUsageCount;
|
|
83
68
|
private sessionPool;
|
|
84
69
|
private _errorScore;
|
|
70
|
+
private _proxyInfo?;
|
|
85
71
|
private _cookieJar;
|
|
86
72
|
private log;
|
|
87
73
|
get errorScore(): number;
|
|
@@ -92,6 +78,7 @@ export declare class Session {
|
|
|
92
78
|
get createdAt(): Date;
|
|
93
79
|
get maxUsageCount(): number;
|
|
94
80
|
get cookieJar(): CookieJar;
|
|
81
|
+
get proxyInfo(): ProxyInfo | undefined;
|
|
95
82
|
/**
|
|
96
83
|
* Session configuration.
|
|
97
84
|
*/
|
|
@@ -140,15 +127,6 @@ export declare class Session {
|
|
|
140
127
|
* Should be used when the session has been used unsuccessfully. For example because of timeouts.
|
|
141
128
|
*/
|
|
142
129
|
markBad(): void;
|
|
143
|
-
/**
|
|
144
|
-
* With certain status codes: `401`, `403` or `429` we can be certain
|
|
145
|
-
* that the target website is blocking us. This function helps to do this conveniently
|
|
146
|
-
* by retiring the session when such code is received. Optionally, the default status
|
|
147
|
-
* codes can be extended in the second parameter.
|
|
148
|
-
* @param statusCode HTTP status code.
|
|
149
|
-
* @returns Whether the session was retired.
|
|
150
|
-
*/
|
|
151
|
-
retireOnBlockedStatusCodes(statusCode: number): boolean;
|
|
152
130
|
/**
|
|
153
131
|
* Saves cookies from an HTTP response to be used with the session.
|
|
154
132
|
* It expects an object with a `headers` property that's either an `Object`
|
|
@@ -156,7 +134,7 @@ export declare class Session {
|
|
|
156
134
|
*
|
|
157
135
|
* It then parses and saves the cookies from the `set-cookie` header, if available.
|
|
158
136
|
*/
|
|
159
|
-
setCookiesFromResponse(response:
|
|
137
|
+
setCookiesFromResponse(response: Response): void;
|
|
160
138
|
/**
|
|
161
139
|
* Saves an array with cookie objects to be used with the session.
|
|
162
140
|
* The objects should be in the format that
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"session.d.ts","sourceRoot":"","sources":["../../src/session_pool/session.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,MAAM,IAAI,YAAY,EAAE,UAAU,EAAE,
|
|
1
|
+
{"version":3,"file":"session.d.ts","sourceRoot":"","sources":["../../src/session_pool/session.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,MAAM,IAAI,YAAY,EAAE,UAAU,EAAE,QAAQ,EAAE,SAAS,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAE5G,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAC3C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAUzC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAI/C,MAAM,WAAW,cAAc;IAC3B,wFAAwF;IACxF,EAAE,CAAC,EAAE,MAAM,CAAC;IAEZ;;;OAGG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB,+EAA+E;IAC/E,QAAQ,CAAC,EAAE,UAAU,CAAC;IAEtB;;;;;;OAMG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB;;;;;OAKG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAE7B,wBAAwB;IACxB,SAAS,CAAC,EAAE,IAAI,CAAC;IAEjB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,IAAI,CAAC;IAEjB;;;OAGG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB;;;;OAIG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB,2FAA2F;IAC3F,WAAW,CAAC,EAAE,OAAO,mBAAmB,EAAE,WAAW,CAAC;IAEtD,GAAG,CAAC,EAAE,aAAa,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,SAAS,CAAC,EAAE,SAAS,CAAC;CACzB;AAED;;;;;GAKG;AACH,qBAAa,OAAQ,YAAW,QAAQ;IACpC,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,UAAU,CAAS;IAC3B,QAAQ,EAAE,UAAU,CAAC;IACrB,OAAO,CAAC,cAAc,CAAS;IAC/B,OAAO,CAAC,oBAAoB,CAAS;IACrC,OAAO,CAAC,UAAU,CAAO;IACzB,OAAO,CAAC,UAAU,CAAO;IACzB,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,cAAc,CAAS;IAC/B,OAAO,CAAC,WAAW,CAA0C;IAC7D,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,UAAU,CAAC,CAAY;IAC/B,OAAO,CAAC,UAAU,CAAY;IAC9B,OAAO,CAAC,GAAG,CAAgB;IAE3B,IAAI,UAAU,WAEb;IAED,IAAI,UAAU,WAEb;IAED,IAAI,aAAa,WAEhB;IAED,IAAI,mBAAmB,WAEtB;IAED,IAAI,SAAS,SAEZ;IAED,IAAI,SAAS,SAEZ;IAED,IAAI,aAAa,WAEhB;IAED,IAAI,SAAS,cAEZ;IAED,IAAI,SAAS,0BAEZ;IAED;;OAEG;gBACS,OAAO,EAAE,cAAc;IA0DnC;;;OAGG;IACH,SAAS,IAAI,OAAO;IAIpB;;;;OAIG;IACH,SAAS,IAAI,OAAO;IAIpB;;;OAGG;IACH,sBAAsB,IAAI,OAAO;IAIjC;;;OAGG;IACH,QAAQ,IAAI,OAAO;IAInB;;;OAGG;IACH,QAAQ;IAUR;;;OAGG;IACH,QAAQ,IAAI,YAAY;IAgBxB;;;;;;OAMG;IACH,MAAM;IASN;;;OAGG;IACH,OAAO;IAOP;;;;;;OAMG;IACH,sBAAsB,CAAC,QAAQ,EAAE,QAAQ;IAWzC;;;;;;;;;;;;OAYG;IACH,UAAU,CAAC,OAAO,EAAE,YAAY,EAAE,EAAE,GAAG,EAAE,MAAM;IAK/C;;;OAGG;IACH,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,YAAY,EAAE;IAKvC;;;;;OAKG;IACH,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM;IAIpC;;OAEG;IACH,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,IAAI;IAQ/C;;OAEG;IACH,SAAS,CAAC,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,GAAG,IAAI;IAkB3D;;OAEG;IACH,SAAS,CAAC,gBAAgB,IAAI,IAAI;CAKrC"}
|
package/session_pool/session.js
CHANGED
|
@@ -3,7 +3,7 @@ import ow from 'ow';
|
|
|
3
3
|
import { CookieJar } from 'tough-cookie';
|
|
4
4
|
import { cryptoRandomObjectId } from '@apify/utilities';
|
|
5
5
|
import { browserPoolCookieToToughCookie, getCookiesFromResponse, getDefaultCookieExpirationDate, toughCookieToBrowserPoolCookie, } from '../cookie_utils.js';
|
|
6
|
-
import {
|
|
6
|
+
import { serviceLocator } from '../service_locator.js';
|
|
7
7
|
import { EVENT_SESSION_RETIRED } from './events.js';
|
|
8
8
|
/**
|
|
9
9
|
* Sessions are used to store information such as cookies and can be used for generating fingerprints and proxy sessions.
|
|
@@ -23,6 +23,7 @@ export class Session {
|
|
|
23
23
|
_maxUsageCount;
|
|
24
24
|
sessionPool;
|
|
25
25
|
_errorScore;
|
|
26
|
+
_proxyInfo;
|
|
26
27
|
_cookieJar;
|
|
27
28
|
log;
|
|
28
29
|
get errorScore() {
|
|
@@ -49,6 +50,9 @@ export class Session {
|
|
|
49
50
|
get cookieJar() {
|
|
50
51
|
return this._cookieJar;
|
|
51
52
|
}
|
|
53
|
+
get proxyInfo() {
|
|
54
|
+
return this._proxyInfo;
|
|
55
|
+
}
|
|
52
56
|
/**
|
|
53
57
|
* Session configuration.
|
|
54
58
|
*/
|
|
@@ -57,6 +61,7 @@ export class Session {
|
|
|
57
61
|
sessionPool: ow.object.instanceOf(EventEmitter),
|
|
58
62
|
id: ow.optional.string,
|
|
59
63
|
cookieJar: ow.optional.object,
|
|
64
|
+
proxyInfo: ow.optional.object,
|
|
60
65
|
maxAgeSecs: ow.optional.number,
|
|
61
66
|
userData: ow.optional.object,
|
|
62
67
|
maxErrorScore: ow.optional.number,
|
|
@@ -68,10 +73,11 @@ export class Session {
|
|
|
68
73
|
maxUsageCount: ow.optional.number,
|
|
69
74
|
log: ow.optional.object,
|
|
70
75
|
}));
|
|
71
|
-
const { sessionPool, id = `session_${cryptoRandomObjectId(10)}`, cookieJar = new CookieJar(), maxAgeSecs = 3000, userData = {}, maxErrorScore = 3, errorScoreDecrement = 0.5, createdAt = new Date(), usageCount = 0, errorScore = 0, maxUsageCount = 50, log =
|
|
76
|
+
const { sessionPool, id = `session_${cryptoRandomObjectId(10)}`, cookieJar = new CookieJar(), proxyInfo = undefined, maxAgeSecs = 3000, userData = {}, maxErrorScore = 3, errorScoreDecrement = 0.5, createdAt = new Date(), usageCount = 0, errorScore = 0, maxUsageCount = 50, log = serviceLocator.getLogger(), } = options;
|
|
72
77
|
const { expiresAt = getDefaultCookieExpirationDate(maxAgeSecs) } = options;
|
|
73
78
|
this.log = log.child({ prefix: 'Session' });
|
|
74
79
|
this._cookieJar = cookieJar.setCookie ? cookieJar : CookieJar.fromJSON(JSON.stringify(cookieJar));
|
|
80
|
+
this._proxyInfo = proxyInfo;
|
|
75
81
|
this.id = id;
|
|
76
82
|
this.maxAgeSecs = maxAgeSecs;
|
|
77
83
|
this.userData = userData;
|
|
@@ -133,6 +139,7 @@ export class Session {
|
|
|
133
139
|
return {
|
|
134
140
|
id: this.id,
|
|
135
141
|
cookieJar: this.cookieJar.toJSON(),
|
|
142
|
+
proxyInfo: this._proxyInfo,
|
|
136
143
|
userData: this.userData,
|
|
137
144
|
maxErrorScore: this.maxErrorScore,
|
|
138
145
|
errorScoreDecrement: this.errorScoreDecrement,
|
|
@@ -166,22 +173,6 @@ export class Session {
|
|
|
166
173
|
this._usageCount += 1;
|
|
167
174
|
this._maybeSelfRetire();
|
|
168
175
|
}
|
|
169
|
-
/**
|
|
170
|
-
* With certain status codes: `401`, `403` or `429` we can be certain
|
|
171
|
-
* that the target website is blocking us. This function helps to do this conveniently
|
|
172
|
-
* by retiring the session when such code is received. Optionally, the default status
|
|
173
|
-
* codes can be extended in the second parameter.
|
|
174
|
-
* @param statusCode HTTP status code.
|
|
175
|
-
* @returns Whether the session was retired.
|
|
176
|
-
*/
|
|
177
|
-
retireOnBlockedStatusCodes(statusCode) {
|
|
178
|
-
// eslint-disable-next-line dot-notation -- accessing private property
|
|
179
|
-
const isBlocked = this.sessionPool['blockedStatusCodes'].includes(statusCode);
|
|
180
|
-
if (isBlocked) {
|
|
181
|
-
this.retire();
|
|
182
|
-
}
|
|
183
|
-
return isBlocked;
|
|
184
|
-
}
|
|
185
176
|
/**
|
|
186
177
|
* Saves cookies from an HTTP response to be used with the session.
|
|
187
178
|
* It expects an object with a `headers` property that's either an `Object`
|
|
@@ -192,7 +183,7 @@ export class Session {
|
|
|
192
183
|
setCookiesFromResponse(response) {
|
|
193
184
|
try {
|
|
194
185
|
const cookies = getCookiesFromResponse(response).filter((c) => c);
|
|
195
|
-
this._setCookies(cookies,
|
|
186
|
+
this._setCookies(cookies, response.url);
|
|
196
187
|
}
|
|
197
188
|
catch (e) {
|
|
198
189
|
const err = e;
|
|
@@ -238,7 +229,12 @@ export class Session {
|
|
|
238
229
|
* Sets a cookie within this session for the specific URL.
|
|
239
230
|
*/
|
|
240
231
|
setCookie(rawCookie, url) {
|
|
241
|
-
|
|
232
|
+
try {
|
|
233
|
+
this.cookieJar.setCookieSync(rawCookie, url);
|
|
234
|
+
}
|
|
235
|
+
catch (e) {
|
|
236
|
+
this.log.warning('Could not set cookie.', { url, error: e.message });
|
|
237
|
+
}
|
|
242
238
|
}
|
|
243
239
|
/**
|
|
244
240
|
* Sets cookies.
|
|
@@ -256,7 +252,7 @@ export class Session {
|
|
|
256
252
|
}
|
|
257
253
|
// if invalid cookies are provided just log the exception. No need to retry the request automatically.
|
|
258
254
|
if (errorMessages.length) {
|
|
259
|
-
this.log.
|
|
255
|
+
this.log.warning('Could not set cookies.', { errorMessages });
|
|
260
256
|
}
|
|
261
257
|
}
|
|
262
258
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"session.js","sourceRoot":"","sources":["../../src/session_pool/session.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAG3C,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"session.js","sourceRoot":"","sources":["../../src/session_pool/session.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAG3C,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAEzC,OAAO,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAExD,OAAO,EACH,8BAA8B,EAC9B,sBAAsB,EACtB,8BAA8B,EAC9B,8BAA8B,GACjC,MAAM,oBAAoB,CAAC;AAE5B,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,qBAAqB,EAAE,MAAM,aAAa,CAAC;AA4DpD;;;;;GAKG;AACH,MAAM,OAAO,OAAO;IACP,EAAE,CAAS;IACZ,UAAU,CAAS;IAC3B,QAAQ,CAAa;IACb,cAAc,CAAS;IACvB,oBAAoB,CAAS;IAC7B,UAAU,CAAO;IACjB,UAAU,CAAO;IACjB,WAAW,CAAS;IACpB,cAAc,CAAS;IACvB,WAAW,CAA0C;IACrD,WAAW,CAAS;IACpB,UAAU,CAAa;IACvB,UAAU,CAAY;IACtB,GAAG,CAAgB;IAE3B,IAAI,UAAU;QACV,OAAO,IAAI,CAAC,WAAW,CAAC;IAC5B,CAAC;IAED,IAAI,UAAU;QACV,OAAO,IAAI,CAAC,WAAW,CAAC;IAC5B,CAAC;IAED,IAAI,aAAa;QACb,OAAO,IAAI,CAAC,cAAc,CAAC;IAC/B,CAAC;IAED,IAAI,mBAAmB;QACnB,OAAO,IAAI,CAAC,oBAAoB,CAAC;IACrC,CAAC;IAED,IAAI,SAAS;QACT,OAAO,IAAI,CAAC,UAAU,CAAC;IAC3B,CAAC;IAED,IAAI,SAAS;QACT,OAAO,IAAI,CAAC,UAAU,CAAC;IAC3B,CAAC;IAED,IAAI,aAAa;QACb,OAAO,IAAI,CAAC,cAAc,CAAC;IAC/B,CAAC;IAED,IAAI,SAAS;QACT,OAAO,IAAI,CAAC,UAAU,CAAC;IAC3B,CAAC;IAED,IAAI,SAAS;QACT,OAAO,IAAI,CAAC,UAAU,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,YAAY,OAAuB;QAC/B,EAAE,CACE,OAAO,EACP,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC;YACjB,WAAW,EAAE,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,YAAY,CAAC;YAC/C,EAAE,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;YACtB,SAAS,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;YAC7B,SAAS,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;YAC7B,UAAU,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;YAC9B,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;YAC5B,aAAa,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;YACjC,mBAAmB,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;YACvC,SAAS,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI;YAC3B,SAAS,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI;YAC3B,UAAU,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;YAC9B,UAAU,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;YAC9B,aAAa,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;YACjC,GAAG,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;SAC1B,CAAC,CACL,CAAC;QAEF,MAAM,EACF,WAAW,EACX,EAAE,GAAG,WAAW,oBAAoB,CAAC,EAAE,CAAC,EAAE,EAC1C,SAAS,GAAG,IAAI,SAAS,EAAE,EAC3B,SAAS,GAAG,SAAS,EACrB,UAAU,GAAG,IAAI,EACjB,QAAQ,GAAG,EAAE,EACb,aAAa,GAAG,CAAC,EACjB,mBAAmB,GAAG,GAAG,EACzB,SAAS,GAAG,IAAI,IAAI,EAAE,EACtB,UAAU,GAAG,CAAC,EACd,UAAU,GAAG,CAAC,EACd,aAAa,GAAG,EAAE,EAClB,GAAG,GAAG,cAAc,CAAC,SAAS,EAAE,GACnC,GAAG,OAAO,CAAC;QAEZ,MAAM,EAAE,SAAS,GAAG,8BAA8B,CAAC,UAAU,CAAC,EAAE,GAAG,OAAO,CAAC;QAE3E,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;QAE5C,IAAI,CAAC,UAAU,GAAI,SAAS,CAAC,SAAqB,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC,CAAC;QAC/G,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC;QACb,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,cAAc,GAAG,aAAa,CAAC;QACpC,IAAI,CAAC,oBAAoB,GAAG,mBAAmB,CAAC;QAEhD,WAAW;QACX,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,WAAW,GAAG,UAAU,CAAC,CAAC,qDAAqD;QACpF,IAAI,CAAC,WAAW,GAAG,UAAU,CAAC,CAAC,yDAAyD;QACxF,IAAI,CAAC,cAAc,GAAG,aAAa,CAAC;QACpC,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;IACnC,CAAC;IAED;;;OAGG;IACH,SAAS;QACL,OAAO,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,aAAa,CAAC;IACjD,CAAC;IAED;;;;OAIG;IACH,SAAS;QACL,OAAO,IAAI,CAAC,SAAS,IAAI,IAAI,IAAI,EAAE,CAAC;IACxC,CAAC;IAED;;;OAGG;IACH,sBAAsB;QAClB,OAAO,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,aAAa,CAAC;IACjD,CAAC;IAED;;;OAGG;IACH,QAAQ;QACJ,OAAO,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,IAAI,CAAC,sBAAsB,EAAE,CAAC;IACpF,CAAC;IAED;;;OAGG;IACH,QAAQ;QACJ,IAAI,CAAC,WAAW,IAAI,CAAC,CAAC;QAEtB,IAAI,IAAI,CAAC,WAAW,GAAG,CAAC,EAAE,CAAC;YACvB,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,oBAAoB,CAAC;QAClD,CAAC;QAED,IAAI,CAAC,gBAAgB,EAAE,CAAC;IAC5B,CAAC;IAED;;;OAGG;IACH,QAAQ;QACJ,OAAO;YACH,EAAE,EAAE,IAAI,CAAC,EAAE;YACX,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAG;YACnC,SAAS,EAAE,IAAI,CAAC,UAAU;YAC1B,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,aAAa,EAAE,IAAI,CAAC,aAAa;YACjC,mBAAmB,EAAE,IAAI,CAAC,mBAAmB;YAC7C,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE;YACvC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE;YACvC,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,aAAa,EAAE,IAAI,CAAC,aAAa;YACjC,UAAU,EAAE,IAAI,CAAC,UAAU;SAC9B,CAAC;IACN,CAAC;IAED;;;;;;OAMG;IACH,MAAM;QACF,6DAA6D;QAC7D,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,cAAc,CAAC;QACxC,IAAI,CAAC,WAAW,IAAI,CAAC,CAAC;QAEtB,wDAAwD;QACxD,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,qBAAqB,EAAE,IAAI,CAAC,CAAC;IACvD,CAAC;IAED;;;OAGG;IACH,OAAO;QACH,IAAI,CAAC,WAAW,IAAI,CAAC,CAAC;QACtB,IAAI,CAAC,WAAW,IAAI,CAAC,CAAC;QAEtB,IAAI,CAAC,gBAAgB,EAAE,CAAC;IAC5B,CAAC;IAED;;;;;;OAMG;IACH,sBAAsB,CAAC,QAAkB;QACrC,IAAI,CAAC;YACD,MAAM,OAAO,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;YAClE,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC;QAC5C,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACT,MAAM,GAAG,GAAG,CAAU,CAAC;YACvB,+DAA+D;YAC/D,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,qCAAqC,CAAC,CAAC;QACnE,CAAC;IACL,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,UAAU,CAAC,OAAuB,EAAE,GAAW;QAC3C,MAAM,iBAAiB,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,8BAA8B,CAAC,CAAC,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;QACjG,IAAI,CAAC,WAAW,CAAC,iBAAiB,EAAE,GAAG,CAAC,CAAC;IAC7C,CAAC;IAED;;;OAGG;IACH,UAAU,CAAC,GAAW;QAClB,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;QACnD,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,8BAA8B,CAAC,CAAC,CAAC,CAAC,CAAC;IACjE,CAAC;IAED;;;;;OAKG;IACH,eAAe,CAAC,GAAW;QACvB,OAAO,IAAI,CAAC,SAAS,CAAC,mBAAmB,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;IACvD,CAAC;IAED;;OAEG;IACH,SAAS,CAAC,SAAiB,EAAE,GAAW;QACpC,IAAI,CAAC;YACD,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;QACjD,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACT,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,uBAAuB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAG,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;QACpF,CAAC;IACL,CAAC;IAED;;OAEG;IACO,WAAW,CAAC,OAAiB,EAAE,GAAW;QAChD,MAAM,aAAa,GAAa,EAAE,CAAC;QAEnC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC3B,IAAI,CAAC;gBACD,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC,CAAC;YACtE,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACT,MAAM,GAAG,GAAG,CAAU,CAAC;gBACvB,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YACpC,CAAC;QACL,CAAC;QAED,sGAAsG;QACtG,IAAI,aAAa,CAAC,MAAM,EAAE,CAAC;YACvB,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,wBAAwB,EAAE,EAAE,aAAa,EAAE,CAAC,CAAC;QAClE,CAAC;IACL,CAAC;IAED;;OAEG;IACO,gBAAgB;QACtB,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,CAAC;YACnB,IAAI,CAAC,MAAM,EAAE,CAAC;QAClB,CAAC;IACL,CAAC;CACJ"}
|