@crawlee/core 4.0.0-beta.6 → 4.0.0-beta.61
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -5
- package/autoscaling/autoscaled_pool.d.ts +3 -5
- package/autoscaling/autoscaled_pool.d.ts.map +1 -1
- package/autoscaling/autoscaled_pool.js +3 -9
- package/autoscaling/autoscaled_pool.js.map +1 -1
- package/autoscaling/snapshotter.d.ts +3 -13
- package/autoscaling/snapshotter.d.ts.map +1 -1
- package/autoscaling/snapshotter.js +18 -29
- package/autoscaling/snapshotter.js.map +1 -1
- package/autoscaling/system_status.d.ts +0 -3
- package/autoscaling/system_status.d.ts.map +1 -1
- package/autoscaling/system_status.js +2 -3
- package/autoscaling/system_status.js.map +1 -1
- package/configuration.d.ts +85 -227
- package/configuration.d.ts.map +1 -1
- package/configuration.js +159 -223
- package/configuration.js.map +1 -1
- package/cookie_utils.d.ts +4 -2
- package/cookie_utils.d.ts.map +1 -1
- package/cookie_utils.js +18 -12
- package/cookie_utils.js.map +1 -1
- package/crawlers/context_pipeline.d.ts +71 -0
- package/crawlers/context_pipeline.d.ts.map +1 -0
- package/crawlers/context_pipeline.js +123 -0
- package/crawlers/context_pipeline.js.map +1 -0
- package/crawlers/crawler_commons.d.ts +19 -28
- package/crawlers/crawler_commons.d.ts.map +1 -1
- package/crawlers/crawler_commons.js +12 -20
- package/crawlers/crawler_commons.js.map +1 -1
- package/crawlers/crawler_utils.d.ts +2 -2
- package/crawlers/crawler_utils.d.ts.map +1 -1
- package/crawlers/crawler_utils.js +1 -1
- package/crawlers/crawler_utils.js.map +1 -1
- package/crawlers/error_snapshotter.d.ts +3 -2
- package/crawlers/error_snapshotter.d.ts.map +1 -1
- package/crawlers/error_snapshotter.js +2 -2
- package/crawlers/error_snapshotter.js.map +1 -1
- package/crawlers/error_tracker.d.ts +2 -1
- package/crawlers/error_tracker.d.ts.map +1 -1
- package/crawlers/error_tracker.js.map +1 -1
- package/crawlers/index.d.ts +1 -1
- package/crawlers/index.d.ts.map +1 -1
- package/crawlers/index.js +1 -1
- package/crawlers/index.js.map +1 -1
- package/crawlers/internals/types.d.ts +8 -0
- package/crawlers/internals/types.d.ts.map +1 -0
- package/crawlers/internals/types.js +2 -0
- package/crawlers/internals/types.js.map +1 -0
- package/crawlers/statistics.d.ts +15 -15
- package/crawlers/statistics.d.ts.map +1 -1
- package/crawlers/statistics.js +21 -24
- package/crawlers/statistics.js.map +1 -1
- package/enqueue_links/enqueue_links.d.ts +32 -18
- package/enqueue_links/enqueue_links.d.ts.map +1 -1
- package/enqueue_links/enqueue_links.js +45 -24
- package/enqueue_links/enqueue_links.js.map +1 -1
- package/enqueue_links/shared.d.ts +25 -8
- package/enqueue_links/shared.d.ts.map +1 -1
- package/enqueue_links/shared.js +69 -37
- package/enqueue_links/shared.js.map +1 -1
- package/errors.d.ts +33 -3
- package/errors.d.ts.map +1 -1
- package/errors.js +48 -4
- package/errors.js.map +1 -1
- package/events/event_manager.d.ts +8 -5
- package/events/event_manager.d.ts.map +1 -1
- package/events/event_manager.js +7 -9
- package/events/event_manager.js.map +1 -1
- package/events/local_event_manager.d.ts +14 -4
- package/events/local_event_manager.d.ts.map +1 -1
- package/events/local_event_manager.js +33 -39
- package/events/local_event_manager.js.map +1 -1
- package/index.d.ts +3 -2
- package/index.d.ts.map +1 -1
- package/index.js +2 -1
- package/index.js.map +1 -1
- package/log.d.ts +82 -2
- package/log.d.ts.map +1 -1
- package/log.js +102 -0
- package/log.js.map +1 -1
- package/package.json +9 -10
- package/proxy_configuration.d.ts +14 -148
- package/proxy_configuration.d.ts.map +1 -1
- package/proxy_configuration.js +19 -167
- package/proxy_configuration.js.map +1 -1
- package/recoverable_state.d.ts +121 -0
- package/recoverable_state.d.ts.map +1 -0
- package/recoverable_state.js +142 -0
- package/recoverable_state.js.map +1 -0
- package/request.d.ts +74 -10
- package/request.d.ts.map +1 -1
- package/request.js +85 -23
- package/request.js.map +1 -1
- package/router.d.ts.map +1 -1
- package/router.js.map +1 -1
- package/serialization.js +1 -1
- package/serialization.js.map +1 -1
- package/service_locator.d.ts +157 -0
- package/service_locator.d.ts.map +1 -0
- package/service_locator.js +234 -0
- package/service_locator.js.map +1 -0
- package/session_pool/index.d.ts +0 -1
- package/session_pool/index.d.ts.map +1 -1
- package/session_pool/index.js +0 -1
- package/session_pool/index.js.map +1 -1
- package/session_pool/session.d.ts +26 -72
- package/session_pool/session.d.ts.map +1 -1
- package/session_pool/session.js +36 -98
- package/session_pool/session.js.map +1 -1
- package/session_pool/session_pool.d.ts +65 -71
- package/session_pool/session_pool.d.ts.map +1 -1
- package/session_pool/session_pool.js +101 -100
- package/session_pool/session_pool.js.map +1 -1
- package/storages/dataset.d.ts +90 -46
- package/storages/dataset.d.ts.map +1 -1
- package/storages/dataset.js +149 -121
- package/storages/dataset.js.map +1 -1
- package/storages/index.d.ts +3 -1
- package/storages/index.d.ts.map +1 -1
- package/storages/index.js +3 -1
- package/storages/index.js.map +1 -1
- package/storages/key_value_store.d.ts +104 -22
- package/storages/key_value_store.d.ts.map +1 -1
- package/storages/key_value_store.js +166 -51
- package/storages/key_value_store.js.map +1 -1
- package/storages/request_list.d.ts +9 -9
- package/storages/request_list.d.ts.map +1 -1
- package/storages/request_list.js +13 -8
- package/storages/request_list.js.map +1 -1
- package/storages/request_list_adapter.d.ts +58 -0
- package/storages/request_list_adapter.d.ts.map +1 -0
- package/storages/request_list_adapter.js +81 -0
- package/storages/request_list_adapter.js.map +1 -0
- package/storages/request_manager_tandem.d.ts +68 -0
- package/storages/request_manager_tandem.d.ts.map +1 -0
- package/storages/request_manager_tandem.js +124 -0
- package/storages/request_manager_tandem.js.map +1 -0
- package/storages/request_provider.d.ts +87 -22
- package/storages/request_provider.d.ts.map +1 -1
- package/storages/request_provider.js +127 -77
- package/storages/request_provider.js.map +1 -1
- package/storages/request_queue.d.ts +1 -3
- package/storages/request_queue.d.ts.map +1 -1
- package/storages/request_queue.js +2 -4
- package/storages/request_queue.js.map +1 -1
- package/storages/request_queue_v2.d.ts +3 -3
- package/storages/request_queue_v2.d.ts.map +1 -1
- package/storages/request_queue_v2.js +4 -5
- package/storages/request_queue_v2.js.map +1 -1
- package/storages/sitemap_request_list.d.ts +5 -5
- package/storages/sitemap_request_list.d.ts.map +1 -1
- package/storages/sitemap_request_list.js +10 -7
- package/storages/sitemap_request_list.js.map +1 -1
- package/storages/storage_instance_manager.d.ts +91 -0
- package/storages/storage_instance_manager.d.ts.map +1 -0
- package/storages/storage_instance_manager.js +236 -0
- package/storages/storage_instance_manager.js.map +1 -0
- package/storages/utils.d.ts +47 -1
- package/storages/utils.d.ts.map +1 -1
- package/storages/utils.js +57 -5
- package/storages/utils.js.map +1 -1
- package/typedefs.d.ts +1 -1
- package/typedefs.d.ts.map +1 -1
- package/validators.d.ts +4 -0
- package/validators.d.ts.map +1 -1
- package/validators.js +4 -0
- package/validators.js.map +1 -1
- package/crawlers/crawler_extension.d.ts +0 -12
- package/crawlers/crawler_extension.d.ts.map +0 -1
- package/crawlers/crawler_extension.js +0 -14
- package/crawlers/crawler_extension.js.map +0 -1
- package/http_clients/base-http-client.d.ts +0 -134
- package/http_clients/base-http-client.d.ts.map +0 -1
- package/http_clients/base-http-client.js +0 -33
- package/http_clients/base-http-client.js.map +0 -1
- package/http_clients/form-data-like.d.ts +0 -67
- package/http_clients/form-data-like.d.ts.map +0 -1
- package/http_clients/form-data-like.js +0 -5
- package/http_clients/form-data-like.js.map +0 -1
- package/http_clients/got-scraping-http-client.d.ts +0 -15
- package/http_clients/got-scraping-http-client.d.ts.map +0 -1
- package/http_clients/got-scraping-http-client.js +0 -69
- package/http_clients/got-scraping-http-client.js.map +0 -1
- package/http_clients/index.d.ts +0 -3
- package/http_clients/index.d.ts.map +0 -1
- package/http_clients/index.js +0 -3
- package/http_clients/index.js.map +0 -1
- package/session_pool/events.d.ts +0 -3
- package/session_pool/events.d.ts.map +0 -1
- package/session_pool/events.js +0 -3
- package/session_pool/events.js.map +0 -1
- package/storages/storage_manager.d.ts +0 -58
- package/storages/storage_manager.d.ts.map +0 -1
- package/storages/storage_manager.js +0 -105
- package/storages/storage_manager.js.map +0 -1
- package/tsconfig.build.tsbuildinfo +0 -1
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import type { StorageClient } from '@crawlee/types';
|
|
2
|
+
import { Configuration } from './configuration.js';
|
|
3
|
+
import type { EventManager } from './events/event_manager.js';
|
|
4
|
+
import type { CrawleeLogger } from './log.js';
|
|
5
|
+
import { StorageInstanceManager } from './storages/storage_instance_manager.js';
|
|
6
|
+
interface ServiceLocatorInterface {
|
|
7
|
+
/**
|
|
8
|
+
* Get the configuration.
|
|
9
|
+
* Creates a default Configuration instance if none has been set.
|
|
10
|
+
*/
|
|
11
|
+
getConfiguration(): Configuration;
|
|
12
|
+
/**
|
|
13
|
+
* Set the configuration.
|
|
14
|
+
*
|
|
15
|
+
* @param configuration The configuration to set
|
|
16
|
+
* @throws {ServiceConflictError} If a different configuration has already been retrieved
|
|
17
|
+
*/
|
|
18
|
+
setConfiguration(configuration: Configuration): void;
|
|
19
|
+
/**
|
|
20
|
+
* Get the event manager.
|
|
21
|
+
* Creates a default LocalEventManager instance if none has been set.
|
|
22
|
+
*/
|
|
23
|
+
getEventManager(): EventManager;
|
|
24
|
+
/**
|
|
25
|
+
* Set the event manager.
|
|
26
|
+
*
|
|
27
|
+
* @param eventManager The event manager to set
|
|
28
|
+
* @throws {ServiceConflictError} If a different event manager has already been retrieved
|
|
29
|
+
*/
|
|
30
|
+
setEventManager(eventManager: EventManager): void;
|
|
31
|
+
/**
|
|
32
|
+
* Get the storage client.
|
|
33
|
+
* Creates a default MemoryStorage instance if none has been set.
|
|
34
|
+
*/
|
|
35
|
+
getStorageClient(): StorageClient;
|
|
36
|
+
/**
|
|
37
|
+
* Set the storage client.
|
|
38
|
+
*
|
|
39
|
+
* @param storageClient The storage client to set
|
|
40
|
+
* @throws {ServiceConflictError} If a different storage client has already been retrieved
|
|
41
|
+
*/
|
|
42
|
+
setStorageClient(storageClient: StorageClient): void;
|
|
43
|
+
/**
|
|
44
|
+
* Get the logger.
|
|
45
|
+
* Returns the default `@apify/log` logger if none has been set.
|
|
46
|
+
*/
|
|
47
|
+
getLogger(): CrawleeLogger;
|
|
48
|
+
/**
|
|
49
|
+
* Set the logger.
|
|
50
|
+
*
|
|
51
|
+
* @param logger The logger to set
|
|
52
|
+
* @throws {ServiceConflictError} If a different logger has already been retrieved
|
|
53
|
+
*/
|
|
54
|
+
setLogger(logger: CrawleeLogger): void;
|
|
55
|
+
/**
|
|
56
|
+
* Get a child logger with the given prefix.
|
|
57
|
+
* Equivalent to `getLogger().child({ prefix })`.
|
|
58
|
+
*/
|
|
59
|
+
getChildLog(prefix: string): CrawleeLogger;
|
|
60
|
+
/**
|
|
61
|
+
* Get the storage instance manager (shared across all storage types).
|
|
62
|
+
*/
|
|
63
|
+
getStorageInstanceManager(): StorageInstanceManager;
|
|
64
|
+
/**
|
|
65
|
+
* Resets the service locator to its initial state.
|
|
66
|
+
* Used mainly for testing purposes.
|
|
67
|
+
* @internal
|
|
68
|
+
*/
|
|
69
|
+
reset(): void;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Service locator for managing the services used by Crawlee.
|
|
73
|
+
*
|
|
74
|
+
* All services are initialized to their default value lazily.
|
|
75
|
+
*
|
|
76
|
+
* There are two primary usage patterns:
|
|
77
|
+
*
|
|
78
|
+
* **1. Global service locator (for default services):**
|
|
79
|
+
* ```typescript
|
|
80
|
+
* import { serviceLocator, BasicCrawler } from 'crawlee';
|
|
81
|
+
*
|
|
82
|
+
* // Optionally configure global services before creating crawlers
|
|
83
|
+
* serviceLocator.setStorageClient(myCustomClient);
|
|
84
|
+
*
|
|
85
|
+
* // Crawler uses global services
|
|
86
|
+
* const crawler = new BasicCrawler({ ... });
|
|
87
|
+
* ```
|
|
88
|
+
*
|
|
89
|
+
* **2. Per-crawler services (recommended for isolation):**
|
|
90
|
+
* ```typescript
|
|
91
|
+
* import { BasicCrawler, Configuration, LocalEventManager } from 'crawlee';
|
|
92
|
+
* import { MemoryStorage } from '@crawlee/memory-storage';
|
|
93
|
+
*
|
|
94
|
+
* const crawler = new BasicCrawler({
|
|
95
|
+
* requestHandler: async ({ request }) => { ... },
|
|
96
|
+
* configuration: new Configuration({ ... }), // custom config
|
|
97
|
+
* storageClient: new MemoryStorage(), // custom storage
|
|
98
|
+
* eventManager: LocalEventManager.fromConfig(), // custom events
|
|
99
|
+
* });
|
|
100
|
+
* // Crawler has its own isolated ServiceLocator instance
|
|
101
|
+
* ```
|
|
102
|
+
*/
|
|
103
|
+
export declare class ServiceLocator implements ServiceLocatorInterface {
|
|
104
|
+
private configuration?;
|
|
105
|
+
private eventManager?;
|
|
106
|
+
private storageClient?;
|
|
107
|
+
private logger?;
|
|
108
|
+
/**
|
|
109
|
+
* Unified storage instance manager for Dataset, KeyValueStore, and RequestQueue.
|
|
110
|
+
* Shared across all ServiceLocator instances (global singleton), matching crawlee-python.
|
|
111
|
+
* Per-crawler isolation is achieved via `clientCacheKey`, not separate manager instances.
|
|
112
|
+
*/
|
|
113
|
+
private static storageInstanceManager?;
|
|
114
|
+
/**
|
|
115
|
+
* Creates a new ServiceLocator instance.
|
|
116
|
+
*
|
|
117
|
+
* @param configuration Optional configuration instance to use
|
|
118
|
+
* @param eventManager Optional event manager instance to use
|
|
119
|
+
* @param storageClient Optional storage client instance to use
|
|
120
|
+
* @param logger Optional logger instance to use
|
|
121
|
+
*/
|
|
122
|
+
constructor(configuration?: Configuration, eventManager?: EventManager, storageClient?: StorageClient, logger?: CrawleeLogger);
|
|
123
|
+
getConfiguration(): Configuration;
|
|
124
|
+
setConfiguration(configuration: Configuration): void;
|
|
125
|
+
getEventManager(): EventManager;
|
|
126
|
+
setEventManager(eventManager: EventManager): void;
|
|
127
|
+
getStorageClient(): StorageClient;
|
|
128
|
+
setStorageClient(storageClient: StorageClient): void;
|
|
129
|
+
getLogger(): CrawleeLogger;
|
|
130
|
+
setLogger(logger: CrawleeLogger): void;
|
|
131
|
+
getChildLog(prefix: string): CrawleeLogger;
|
|
132
|
+
getStorageInstanceManager(): StorageInstanceManager;
|
|
133
|
+
reset(): void;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Wraps all methods on `target` so that any code they invoke will see the given
|
|
137
|
+
* `serviceLocator` via `AsyncLocalStorage`, rather than the global one.
|
|
138
|
+
*
|
|
139
|
+
* Walks the prototype chain and replaces each method on the *instance* (not the prototype)
|
|
140
|
+
* with a wrapper that calls `serviceLocatorStorage.run(serviceLocator, originalMethod)`.
|
|
141
|
+
*
|
|
142
|
+
* The `AsyncLocalStorage` context propagates through the entire sync/async call tree of each
|
|
143
|
+
* wrapped method — including `super` calls, since the prototype methods execute within the
|
|
144
|
+
* context established by the instance-level wrapper.
|
|
145
|
+
*
|
|
146
|
+
* @internal
|
|
147
|
+
* @returns Scope control functions: `run` executes a callback within the scoped context,
|
|
148
|
+
* `enterScope`/`exitScope` allow entering/leaving the scope imperatively (e.g., for constructor bodies).
|
|
149
|
+
*/
|
|
150
|
+
export declare function bindMethodsToServiceLocator(serviceLocator: ServiceLocator, target: {}): {
|
|
151
|
+
run: <T>(fn: () => T) => T;
|
|
152
|
+
enterScope: () => void;
|
|
153
|
+
exitScope: () => void;
|
|
154
|
+
};
|
|
155
|
+
export declare const serviceLocator: ServiceLocatorInterface;
|
|
156
|
+
export {};
|
|
157
|
+
//# sourceMappingURL=service_locator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"service_locator.d.ts","sourceRoot":"","sources":["../src/service_locator.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAIpD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAEnD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AAE9D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAE9C,OAAO,EAAE,sBAAsB,EAAE,MAAM,wCAAwC,CAAC;AAEhF,UAAU,uBAAuB;IAC7B;;;OAGG;IACH,gBAAgB,IAAI,aAAa,CAAC;IAElC;;;;;OAKG;IACH,gBAAgB,CAAC,aAAa,EAAE,aAAa,GAAG,IAAI,CAAC;IAErD;;;OAGG;IACH,eAAe,IAAI,YAAY,CAAC;IAEhC;;;;;OAKG;IACH,eAAe,CAAC,YAAY,EAAE,YAAY,GAAG,IAAI,CAAC;IAElD;;;OAGG;IACH,gBAAgB,IAAI,aAAa,CAAC;IAElC;;;;;OAKG;IACH,gBAAgB,CAAC,aAAa,EAAE,aAAa,GAAG,IAAI,CAAC;IAErD;;;OAGG;IACH,SAAS,IAAI,aAAa,CAAC;IAE3B;;;;;OAKG;IACH,SAAS,CAAC,MAAM,EAAE,aAAa,GAAG,IAAI,CAAC;IAEvC;;;OAGG;IACH,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,aAAa,CAAC;IAE3C;;OAEG;IACH,yBAAyB,IAAI,sBAAsB,CAAC;IAEpD;;;;OAIG;IACH,KAAK,IAAI,IAAI,CAAC;CACjB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,qBAAa,cAAe,YAAW,uBAAuB;IAC1D,OAAO,CAAC,aAAa,CAAC,CAAgB;IACtC,OAAO,CAAC,YAAY,CAAC,CAAe;IACpC,OAAO,CAAC,aAAa,CAAC,CAAgB;IACtC,OAAO,CAAC,MAAM,CAAC,CAAgB;IAE/B;;;;OAIG;IACH,OAAO,CAAC,MAAM,CAAC,sBAAsB,CAAC,CAAyB;IAE/D;;;;;;;OAOG;gBAEC,aAAa,CAAC,EAAE,aAAa,EAC7B,YAAY,CAAC,EAAE,YAAY,EAC3B,aAAa,CAAC,EAAE,aAAa,EAC7B,MAAM,CAAC,EAAE,aAAa;IAQ1B,gBAAgB,IAAI,aAAa;IAQjC,gBAAgB,CAAC,aAAa,EAAE,aAAa,GAAG,IAAI;IAcpD,eAAe,IAAI,YAAY;IAc/B,eAAe,CAAC,YAAY,EAAE,YAAY,GAAG,IAAI;IAcjD,gBAAgB,IAAI,aAAa;IAkBjC,gBAAgB,CAAC,aAAa,EAAE,aAAa,GAAG,IAAI;IAcpD,SAAS,IAAI,aAAa;IAO1B,SAAS,CAAC,MAAM,EAAE,aAAa,GAAG,IAAI;IAYtC,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,aAAa;IAI1C,yBAAyB,IAAI,sBAAsB;IAOnD,KAAK,IAAI,IAAI;CAQhB;AASD;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,2BAA2B,CACvC,cAAc,EAAE,cAAc,EAC9B,MAAM,EAAE,EAAE,GACX;IAAE,GAAG,EAAE,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;IAAC,UAAU,EAAE,MAAM,IAAI,CAAC;IAAC,SAAS,EAAE,MAAM,IAAI,CAAA;CAAE,CA4C/E;AAED,eAAO,MAAM,cAAc,yBAczB,CAAC"}
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
import { AsyncLocalStorage } from 'node:async_hooks';
|
|
2
|
+
import { MemoryStorage } from '@crawlee/memory-storage';
|
|
3
|
+
import log from '@apify/log';
|
|
4
|
+
import { Configuration } from './configuration.js';
|
|
5
|
+
import { ServiceConflictError } from './errors.js';
|
|
6
|
+
import { LocalEventManager } from './events/local_event_manager.js';
|
|
7
|
+
import { ApifyLogAdapter } from './log.js';
|
|
8
|
+
import { StorageInstanceManager } from './storages/storage_instance_manager.js';
|
|
9
|
+
/**
|
|
10
|
+
* Service locator for managing the services used by Crawlee.
|
|
11
|
+
*
|
|
12
|
+
* All services are initialized to their default value lazily.
|
|
13
|
+
*
|
|
14
|
+
* There are two primary usage patterns:
|
|
15
|
+
*
|
|
16
|
+
* **1. Global service locator (for default services):**
|
|
17
|
+
* ```typescript
|
|
18
|
+
* import { serviceLocator, BasicCrawler } from 'crawlee';
|
|
19
|
+
*
|
|
20
|
+
* // Optionally configure global services before creating crawlers
|
|
21
|
+
* serviceLocator.setStorageClient(myCustomClient);
|
|
22
|
+
*
|
|
23
|
+
* // Crawler uses global services
|
|
24
|
+
* const crawler = new BasicCrawler({ ... });
|
|
25
|
+
* ```
|
|
26
|
+
*
|
|
27
|
+
* **2. Per-crawler services (recommended for isolation):**
|
|
28
|
+
* ```typescript
|
|
29
|
+
* import { BasicCrawler, Configuration, LocalEventManager } from 'crawlee';
|
|
30
|
+
* import { MemoryStorage } from '@crawlee/memory-storage';
|
|
31
|
+
*
|
|
32
|
+
* const crawler = new BasicCrawler({
|
|
33
|
+
* requestHandler: async ({ request }) => { ... },
|
|
34
|
+
* configuration: new Configuration({ ... }), // custom config
|
|
35
|
+
* storageClient: new MemoryStorage(), // custom storage
|
|
36
|
+
* eventManager: LocalEventManager.fromConfig(), // custom events
|
|
37
|
+
* });
|
|
38
|
+
* // Crawler has its own isolated ServiceLocator instance
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
export class ServiceLocator {
|
|
42
|
+
configuration;
|
|
43
|
+
eventManager;
|
|
44
|
+
storageClient;
|
|
45
|
+
logger;
|
|
46
|
+
/**
|
|
47
|
+
* Unified storage instance manager for Dataset, KeyValueStore, and RequestQueue.
|
|
48
|
+
* Shared across all ServiceLocator instances (global singleton), matching crawlee-python.
|
|
49
|
+
* Per-crawler isolation is achieved via `clientCacheKey`, not separate manager instances.
|
|
50
|
+
*/
|
|
51
|
+
static storageInstanceManager;
|
|
52
|
+
/**
|
|
53
|
+
* Creates a new ServiceLocator instance.
|
|
54
|
+
*
|
|
55
|
+
* @param configuration Optional configuration instance to use
|
|
56
|
+
* @param eventManager Optional event manager instance to use
|
|
57
|
+
* @param storageClient Optional storage client instance to use
|
|
58
|
+
* @param logger Optional logger instance to use
|
|
59
|
+
*/
|
|
60
|
+
constructor(configuration, eventManager, storageClient, logger) {
|
|
61
|
+
this.configuration = configuration;
|
|
62
|
+
this.eventManager = eventManager;
|
|
63
|
+
this.storageClient = storageClient;
|
|
64
|
+
this.logger = logger;
|
|
65
|
+
}
|
|
66
|
+
getConfiguration() {
|
|
67
|
+
if (!this.configuration) {
|
|
68
|
+
this.getLogger().debug('No configuration set, implicitly creating and using default Configuration.');
|
|
69
|
+
this.configuration = new Configuration();
|
|
70
|
+
}
|
|
71
|
+
return this.configuration;
|
|
72
|
+
}
|
|
73
|
+
setConfiguration(configuration) {
|
|
74
|
+
// Same instance, no need to do anything
|
|
75
|
+
if (this.configuration === configuration) {
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
// Already have a different configuration that was retrieved
|
|
79
|
+
if (this.configuration) {
|
|
80
|
+
throw new ServiceConflictError('Configuration', configuration, this.configuration);
|
|
81
|
+
}
|
|
82
|
+
this.configuration = configuration;
|
|
83
|
+
}
|
|
84
|
+
getEventManager() {
|
|
85
|
+
if (!this.eventManager) {
|
|
86
|
+
this.getLogger().debug('No event manager set, implicitly creating and using default LocalEventManager.');
|
|
87
|
+
if (!this.configuration) {
|
|
88
|
+
this.getLogger().warning('Implicit creation of event manager will implicitly set configuration as side effect. ' +
|
|
89
|
+
'It is advised to explicitly first set the configuration instead.');
|
|
90
|
+
}
|
|
91
|
+
this.eventManager = LocalEventManager.fromConfig(this.getConfiguration());
|
|
92
|
+
}
|
|
93
|
+
return this.eventManager;
|
|
94
|
+
}
|
|
95
|
+
setEventManager(eventManager) {
|
|
96
|
+
// Same instance, no need to do anything
|
|
97
|
+
if (this.eventManager === eventManager) {
|
|
98
|
+
return;
|
|
99
|
+
}
|
|
100
|
+
// Already have a different event manager that was retrieved
|
|
101
|
+
if (this.eventManager) {
|
|
102
|
+
throw new ServiceConflictError('EventManager', eventManager, this.eventManager);
|
|
103
|
+
}
|
|
104
|
+
this.eventManager = eventManager;
|
|
105
|
+
}
|
|
106
|
+
getStorageClient() {
|
|
107
|
+
if (!this.storageClient) {
|
|
108
|
+
this.getLogger().debug('No storage client set, implicitly creating and using default MemoryStorage.');
|
|
109
|
+
if (!this.configuration) {
|
|
110
|
+
this.getLogger().warning('Implicit creation of storage client will implicitly set configuration as side effect. ' +
|
|
111
|
+
'It is advised to explicitly first set the configuration instead.');
|
|
112
|
+
}
|
|
113
|
+
const config = this.getConfiguration();
|
|
114
|
+
this.storageClient = new MemoryStorage({
|
|
115
|
+
persistStorage: config.persistStorage,
|
|
116
|
+
logger: this.getLogger().child({ prefix: 'MemoryStorage' }),
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
return this.storageClient;
|
|
120
|
+
}
|
|
121
|
+
setStorageClient(storageClient) {
|
|
122
|
+
// Same instance, no need to do anything
|
|
123
|
+
if (this.storageClient === storageClient) {
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
// Already have a different storage client that was retrieved
|
|
127
|
+
if (this.storageClient) {
|
|
128
|
+
throw new ServiceConflictError('StorageClient', storageClient, this.storageClient);
|
|
129
|
+
}
|
|
130
|
+
this.storageClient = storageClient;
|
|
131
|
+
}
|
|
132
|
+
getLogger() {
|
|
133
|
+
if (!this.logger) {
|
|
134
|
+
this.logger = new ApifyLogAdapter(log);
|
|
135
|
+
}
|
|
136
|
+
return this.logger;
|
|
137
|
+
}
|
|
138
|
+
setLogger(logger) {
|
|
139
|
+
if (this.logger === logger) {
|
|
140
|
+
return;
|
|
141
|
+
}
|
|
142
|
+
if (this.logger) {
|
|
143
|
+
throw new ServiceConflictError('Logger', logger, this.logger);
|
|
144
|
+
}
|
|
145
|
+
this.logger = logger;
|
|
146
|
+
}
|
|
147
|
+
getChildLog(prefix) {
|
|
148
|
+
return this.getLogger().child({ prefix });
|
|
149
|
+
}
|
|
150
|
+
getStorageInstanceManager() {
|
|
151
|
+
if (!ServiceLocator.storageInstanceManager) {
|
|
152
|
+
ServiceLocator.storageInstanceManager = new StorageInstanceManager();
|
|
153
|
+
}
|
|
154
|
+
return ServiceLocator.storageInstanceManager;
|
|
155
|
+
}
|
|
156
|
+
reset() {
|
|
157
|
+
this.configuration = undefined;
|
|
158
|
+
this.eventManager = undefined;
|
|
159
|
+
this.storageClient = undefined;
|
|
160
|
+
this.logger = undefined;
|
|
161
|
+
ServiceLocator.storageInstanceManager?.clearCache();
|
|
162
|
+
ServiceLocator.storageInstanceManager = undefined;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Used as the default service provider when crawlers don't specify custom services.
|
|
167
|
+
*/
|
|
168
|
+
const globalServiceLocator = new ServiceLocator();
|
|
169
|
+
const serviceLocatorStorage = new AsyncLocalStorage();
|
|
170
|
+
/**
|
|
171
|
+
* Wraps all methods on `target` so that any code they invoke will see the given
|
|
172
|
+
* `serviceLocator` via `AsyncLocalStorage`, rather than the global one.
|
|
173
|
+
*
|
|
174
|
+
* Walks the prototype chain and replaces each method on the *instance* (not the prototype)
|
|
175
|
+
* with a wrapper that calls `serviceLocatorStorage.run(serviceLocator, originalMethod)`.
|
|
176
|
+
*
|
|
177
|
+
* The `AsyncLocalStorage` context propagates through the entire sync/async call tree of each
|
|
178
|
+
* wrapped method — including `super` calls, since the prototype methods execute within the
|
|
179
|
+
* context established by the instance-level wrapper.
|
|
180
|
+
*
|
|
181
|
+
* @internal
|
|
182
|
+
* @returns Scope control functions: `run` executes a callback within the scoped context,
|
|
183
|
+
* `enterScope`/`exitScope` allow entering/leaving the scope imperatively (e.g., for constructor bodies).
|
|
184
|
+
*/
|
|
185
|
+
export function bindMethodsToServiceLocator(serviceLocator, target) {
|
|
186
|
+
let proto = Object.getPrototypeOf(target);
|
|
187
|
+
while (proto !== null && proto !== Object.prototype) {
|
|
188
|
+
const propertyKeys = [...Object.getOwnPropertyNames(proto), ...Object.getOwnPropertySymbols(proto)];
|
|
189
|
+
for (const propertyKey of propertyKeys) {
|
|
190
|
+
const descriptor = Object.getOwnPropertyDescriptor(proto, propertyKey);
|
|
191
|
+
// We use property descriptors rather than accessing target[propertyKey] directly,
|
|
192
|
+
// because that would trigger getters and cause unwanted side effects.
|
|
193
|
+
// Skip getters, setters, and constructors — only wrap regular methods.
|
|
194
|
+
if (propertyKey === 'constructor' ||
|
|
195
|
+
!descriptor ||
|
|
196
|
+
descriptor.get ||
|
|
197
|
+
descriptor.set ||
|
|
198
|
+
typeof descriptor.value !== 'function')
|
|
199
|
+
continue;
|
|
200
|
+
const original = descriptor.value;
|
|
201
|
+
target[propertyKey] = (...args) => {
|
|
202
|
+
return serviceLocatorStorage.run(serviceLocator, () => {
|
|
203
|
+
return original.apply(target, args);
|
|
204
|
+
});
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
proto = Object.getPrototypeOf(proto);
|
|
208
|
+
}
|
|
209
|
+
let previousStore;
|
|
210
|
+
return {
|
|
211
|
+
run: (fn) => serviceLocatorStorage.run(serviceLocator, fn),
|
|
212
|
+
enterScope: () => {
|
|
213
|
+
previousStore = serviceLocatorStorage.getStore();
|
|
214
|
+
serviceLocatorStorage.enterWith(serviceLocator);
|
|
215
|
+
},
|
|
216
|
+
exitScope: () => {
|
|
217
|
+
serviceLocatorStorage.enterWith(previousStore); // casting to any so that `undefined` is accepted - this "unsets" the AsyncLocalStorage
|
|
218
|
+
},
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
export const serviceLocator = new Proxy({}, {
|
|
222
|
+
get(_target, prop) {
|
|
223
|
+
const active = serviceLocatorStorage.getStore() ?? globalServiceLocator;
|
|
224
|
+
const value = Reflect.get(active, prop, active);
|
|
225
|
+
if (typeof value === 'function') {
|
|
226
|
+
return value.bind(active);
|
|
227
|
+
}
|
|
228
|
+
return value;
|
|
229
|
+
},
|
|
230
|
+
set(_target, prop) {
|
|
231
|
+
throw new TypeError(`Cannot set property '${String(prop)}' on serviceLocator directly. Use the setter methods (e.g. setConfiguration(), setStorageClient()) instead.`);
|
|
232
|
+
},
|
|
233
|
+
});
|
|
234
|
+
//# sourceMappingURL=service_locator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"service_locator.js","sourceRoot":"","sources":["../src/service_locator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAErD,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AAGxD,OAAO,GAAG,MAAM,YAAY,CAAC;AAE7B,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AAEpE,OAAO,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAC3C,OAAO,EAAE,sBAAsB,EAAE,MAAM,wCAAwC,CAAC;AA8EhF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,MAAM,OAAO,cAAc;IACf,aAAa,CAAiB;IAC9B,YAAY,CAAgB;IAC5B,aAAa,CAAiB;IAC9B,MAAM,CAAiB;IAE/B;;;;OAIG;IACK,MAAM,CAAC,sBAAsB,CAA0B;IAE/D;;;;;;;OAOG;IACH,YACI,aAA6B,EAC7B,YAA2B,EAC3B,aAA6B,EAC7B,MAAsB;QAEtB,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;QACnC,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;QACnC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACzB,CAAC;IAED,gBAAgB;QACZ,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC;YACtB,IAAI,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,4EAA4E,CAAC,CAAC;YACrG,IAAI,CAAC,aAAa,GAAG,IAAI,aAAa,EAAE,CAAC;QAC7C,CAAC;QACD,OAAO,IAAI,CAAC,aAAa,CAAC;IAC9B,CAAC;IAED,gBAAgB,CAAC,aAA4B;QACzC,wCAAwC;QACxC,IAAI,IAAI,CAAC,aAAa,KAAK,aAAa,EAAE,CAAC;YACvC,OAAO;QACX,CAAC;QAED,4DAA4D;QAC5D,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACrB,MAAM,IAAI,oBAAoB,CAAC,eAAe,EAAE,aAAa,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC;QACvF,CAAC;QAED,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;IACvC,CAAC;IAED,eAAe;QACX,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACrB,IAAI,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,gFAAgF,CAAC,CAAC;YACzG,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC;gBACtB,IAAI,CAAC,SAAS,EAAE,CAAC,OAAO,CACpB,uFAAuF;oBACnF,kEAAkE,CACzE,CAAC;YACN,CAAC;YACD,IAAI,CAAC,YAAY,GAAG,iBAAiB,CAAC,UAAU,CAAC,IAAI,CAAC,gBAAgB,EAAE,CAAC,CAAC;QAC9E,CAAC;QACD,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED,eAAe,CAAC,YAA0B;QACtC,wCAAwC;QACxC,IAAI,IAAI,CAAC,YAAY,KAAK,YAAY,EAAE,CAAC;YACrC,OAAO;QACX,CAAC;QAED,4DAA4D;QAC5D,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACpB,MAAM,IAAI,oBAAoB,CAAC,cAAc,EAAE,YAAY,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC;QACpF,CAAC;QAED,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;IACrC,CAAC;IAED,gBAAgB;QACZ,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC;YACtB,IAAI,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,6EAA6E,CAAC,CAAC;YACtG,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC;gBACtB,IAAI,CAAC,SAAS,EAAE,CAAC,OAAO,CACpB,wFAAwF;oBACpF,kEAAkE,CACzE,CAAC;YACN,CAAC;YACD,MAAM,MAAM,GAAG,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACvC,IAAI,CAAC,aAAa,GAAG,IAAI,aAAa,CAAC;gBACnC,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,MAAM,EAAE,IAAI,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,eAAe,EAAE,CAAC;aAC9D,CAAC,CAAC;QACP,CAAC;QACD,OAAO,IAAI,CAAC,aAAa,CAAC;IAC9B,CAAC;IAED,gBAAgB,CAAC,aAA4B;QACzC,wCAAwC;QACxC,IAAI,IAAI,CAAC,aAAa,KAAK,aAAa,EAAE,CAAC;YACvC,OAAO;QACX,CAAC;QAED,6DAA6D;QAC7D,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACrB,MAAM,IAAI,oBAAoB,CAAC,eAAe,EAAE,aAAa,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC;QACvF,CAAC;QAED,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;IACvC,CAAC;IAED,SAAS;QACL,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACf,IAAI,CAAC,MAAM,GAAG,IAAI,eAAe,CAAC,GAAG,CAAC,CAAC;QAC3C,CAAC;QACD,OAAO,IAAI,CAAC,MAAM,CAAC;IACvB,CAAC;IAED,SAAS,CAAC,MAAqB;QAC3B,IAAI,IAAI,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YACzB,OAAO;QACX,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YACd,MAAM,IAAI,oBAAoB,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QAClE,CAAC;QAED,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACzB,CAAC;IAED,WAAW,CAAC,MAAc;QACtB,OAAO,IAAI,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED,yBAAyB;QACrB,IAAI,CAAC,cAAc,CAAC,sBAAsB,EAAE,CAAC;YACzC,cAAc,CAAC,sBAAsB,GAAG,IAAI,sBAAsB,EAAE,CAAC;QACzE,CAAC;QACD,OAAO,cAAc,CAAC,sBAAsB,CAAC;IACjD,CAAC;IAED,KAAK;QACD,IAAI,CAAC,aAAa,GAAG,SAAS,CAAC;QAC/B,IAAI,CAAC,YAAY,GAAG,SAAS,CAAC;QAC9B,IAAI,CAAC,aAAa,GAAG,SAAS,CAAC;QAC/B,IAAI,CAAC,MAAM,GAAG,SAAS,CAAC;QACxB,cAAc,CAAC,sBAAsB,EAAE,UAAU,EAAE,CAAC;QACpD,cAAc,CAAC,sBAAsB,GAAG,SAAS,CAAC;IACtD,CAAC;CACJ;AAED;;GAEG;AACH,MAAM,oBAAoB,GAAG,IAAI,cAAc,EAAE,CAAC;AAElD,MAAM,qBAAqB,GAAG,IAAI,iBAAiB,EAA2B,CAAC;AAE/E;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,2BAA2B,CACvC,cAA8B,EAC9B,MAAU;IAEV,IAAI,KAAK,GAAG,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;IAE1C,OAAO,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,MAAM,CAAC,SAAS,EAAE,CAAC;QAClD,MAAM,YAAY,GAAG,CAAC,GAAG,MAAM,CAAC,mBAAmB,CAAC,KAAK,CAAC,EAAE,GAAG,MAAM,CAAC,qBAAqB,CAAC,KAAK,CAAC,CAAC,CAAC;QAEpG,KAAK,MAAM,WAAW,IAAI,YAAY,EAAE,CAAC;YACrC,MAAM,UAAU,GAAG,MAAM,CAAC,wBAAwB,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;YAEvE,kFAAkF;YAClF,sEAAsE;YACtE,uEAAuE;YACvE,IACI,WAAW,KAAK,aAAa;gBAC7B,CAAC,UAAU;gBACX,UAAU,CAAC,GAAG;gBACd,UAAU,CAAC,GAAG;gBACd,OAAO,UAAU,CAAC,KAAK,KAAK,UAAU;gBAEtC,SAAS;YAEb,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,CAAC;YACjC,MAA2C,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,IAAW,EAAE,EAAE;gBAC3E,OAAO,qBAAqB,CAAC,GAAG,CAAC,cAAc,EAAE,GAAG,EAAE;oBAClD,OAAO,QAAQ,CAAC,KAAK,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;gBACxC,CAAC,CAAC,CAAC;YACP,CAAC,CAAC;QACN,CAAC;QAED,KAAK,GAAG,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,aAAkD,CAAC;IAEvD,OAAO;QACH,GAAG,EAAE,CAAI,EAAW,EAAK,EAAE,CAAC,qBAAqB,CAAC,GAAG,CAAC,cAAc,EAAE,EAAE,CAAC;QACzE,UAAU,EAAE,GAAG,EAAE;YACb,aAAa,GAAG,qBAAqB,CAAC,QAAQ,EAAE,CAAC;YACjD,qBAAqB,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;QACpD,CAAC;QACD,SAAS,EAAE,GAAG,EAAE;YACZ,qBAAqB,CAAC,SAAS,CAAC,aAAoB,CAAC,CAAC,CAAC,uFAAuF;QAClJ,CAAC;KACJ,CAAC;AACN,CAAC;AAED,MAAM,CAAC,MAAM,cAAc,GAAG,IAAI,KAAK,CAAC,EAA6B,EAAE;IACnE,GAAG,CAAC,OAAO,EAAE,IAAI;QACb,MAAM,MAAM,GAAG,qBAAqB,CAAC,QAAQ,EAAE,IAAI,oBAAoB,CAAC;QACxE,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;QAChD,IAAI,OAAO,KAAK,KAAK,UAAU,EAAE,CAAC;YAC9B,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC9B,CAAC;QACD,OAAO,KAAK,CAAC;IACjB,CAAC;IACD,GAAG,CAAC,OAAO,EAAE,IAAI;QACb,MAAM,IAAI,SAAS,CACf,wBAAwB,MAAM,CAAC,IAAI,CAAC,6GAA6G,CACpJ,CAAC;IACN,CAAC;CACJ,CAAC,CAAC"}
|
package/session_pool/index.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/session_pool/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC;AAC5B,cAAc,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/session_pool/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC;AAC5B,cAAc,cAAc,CAAC;AAC7B,cAAc,mBAAmB,CAAC;AAClC,cAAc,aAAa,CAAC"}
|
package/session_pool/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/session_pool/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC;AAC5B,cAAc,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/session_pool/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC;AAC5B,cAAc,cAAc,CAAC;AAC7B,cAAc,mBAAmB,CAAC;AAClC,cAAc,aAAa,CAAC"}
|
|
@@ -1,23 +1,6 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
import type { Cookie, SerializedCookieJar } from 'tough-cookie';
|
|
1
|
+
import type { Dictionary, ISession, ProxyInfo, SessionState } from '@crawlee/types';
|
|
3
2
|
import { CookieJar } from 'tough-cookie';
|
|
4
|
-
import type {
|
|
5
|
-
import type { ResponseLike } from '../cookie_utils.js';
|
|
6
|
-
/**
|
|
7
|
-
* Persistable {@link Session} state.
|
|
8
|
-
*/
|
|
9
|
-
export interface SessionState {
|
|
10
|
-
id: string;
|
|
11
|
-
cookieJar: SerializedCookieJar;
|
|
12
|
-
userData: object;
|
|
13
|
-
errorScore: number;
|
|
14
|
-
maxErrorScore: number;
|
|
15
|
-
errorScoreDecrement: number;
|
|
16
|
-
usageCount: number;
|
|
17
|
-
maxUsageCount: number;
|
|
18
|
-
expiresAt: string;
|
|
19
|
-
createdAt: string;
|
|
20
|
-
}
|
|
3
|
+
import type { CrawleeLogger } from '../log.js';
|
|
21
4
|
export interface SessionOptions {
|
|
22
5
|
/** Id of session used for generating fingerprints. It is used as proxy session name. */
|
|
23
6
|
id?: string;
|
|
@@ -58,11 +41,16 @@ export interface SessionOptions {
|
|
|
58
41
|
* @default 50
|
|
59
42
|
*/
|
|
60
43
|
maxUsageCount?: number;
|
|
61
|
-
/**
|
|
62
|
-
|
|
63
|
-
|
|
44
|
+
/**
|
|
45
|
+
* Marks the session as already retired. Used when restoring a previously persisted session
|
|
46
|
+
* so that `isUsable()` reflects the terminal state regardless of error score or usage count.
|
|
47
|
+
* @default false
|
|
48
|
+
*/
|
|
49
|
+
retired?: boolean;
|
|
50
|
+
log?: CrawleeLogger;
|
|
64
51
|
errorScore?: number;
|
|
65
52
|
cookieJar?: CookieJar;
|
|
53
|
+
proxyInfo?: ProxyInfo;
|
|
66
54
|
}
|
|
67
55
|
/**
|
|
68
56
|
* Sessions are used to store information such as cookies and can be used for generating fingerprints and proxy sessions.
|
|
@@ -70,9 +58,8 @@ export interface SessionOptions {
|
|
|
70
58
|
* Session internal state can be enriched with custom user data for example some authorization tokens and specific headers in general.
|
|
71
59
|
* @category Scaling
|
|
72
60
|
*/
|
|
73
|
-
export declare class Session {
|
|
61
|
+
export declare class Session implements ISession {
|
|
74
62
|
readonly id: string;
|
|
75
|
-
private maxAgeSecs;
|
|
76
63
|
userData: Dictionary;
|
|
77
64
|
private _maxErrorScore;
|
|
78
65
|
private _errorScoreDecrement;
|
|
@@ -80,8 +67,9 @@ export declare class Session {
|
|
|
80
67
|
private _expiresAt;
|
|
81
68
|
private _usageCount;
|
|
82
69
|
private _maxUsageCount;
|
|
83
|
-
private sessionPool;
|
|
84
70
|
private _errorScore;
|
|
71
|
+
private _retired;
|
|
72
|
+
private _proxyInfo?;
|
|
85
73
|
private _cookieJar;
|
|
86
74
|
private log;
|
|
87
75
|
get errorScore(): number;
|
|
@@ -92,10 +80,16 @@ export declare class Session {
|
|
|
92
80
|
get createdAt(): Date;
|
|
93
81
|
get maxUsageCount(): number;
|
|
94
82
|
get cookieJar(): CookieJar;
|
|
83
|
+
get proxyInfo(): ProxyInfo | undefined;
|
|
84
|
+
/**
|
|
85
|
+
* `true` once {@link Session.retire|`retire()`} has been called. Retirement is terminal:
|
|
86
|
+
* a retired session is never picked by the pool and cannot be revived via `markGood()`.
|
|
87
|
+
*/
|
|
88
|
+
get retired(): boolean;
|
|
95
89
|
/**
|
|
96
90
|
* Session configuration.
|
|
97
91
|
*/
|
|
98
|
-
constructor(options
|
|
92
|
+
constructor(options?: SessionOptions);
|
|
99
93
|
/**
|
|
100
94
|
* Indicates whether the session is blocked.
|
|
101
95
|
* Session is blocked once it reaches the `maxErrorScore`.
|
|
@@ -114,7 +108,7 @@ export declare class Session {
|
|
|
114
108
|
isMaxUsageCountReached(): boolean;
|
|
115
109
|
/**
|
|
116
110
|
* Indicates whether the session can be used for next requests.
|
|
117
|
-
* Session is usable when it is not expired, not blocked and the maximum usage count has not be reached.
|
|
111
|
+
* Session is usable when it is not retired, not expired, not blocked and the maximum usage count has not be reached.
|
|
118
112
|
*/
|
|
119
113
|
isUsable(): boolean;
|
|
120
114
|
/**
|
|
@@ -128,11 +122,11 @@ export declare class Session {
|
|
|
128
122
|
*/
|
|
129
123
|
getState(): SessionState;
|
|
130
124
|
/**
|
|
131
|
-
*
|
|
132
|
-
*
|
|
133
|
-
*
|
|
134
|
-
*
|
|
135
|
-
*
|
|
125
|
+
* Permanently retires the session — `isUsable()` will return `false` from here on,
|
|
126
|
+
* and no `markGood()` / `markBad()` can revive it. Calling `retire()` again is a no-op.
|
|
127
|
+
*
|
|
128
|
+
* Use this when you're confident the session itself is the problem (e.g. a `403` response).
|
|
129
|
+
* For transient external failures (such as `5XX` responses), use `markBad()` instead.
|
|
136
130
|
*/
|
|
137
131
|
retire(): void;
|
|
138
132
|
/**
|
|
@@ -140,42 +134,6 @@ export declare class Session {
|
|
|
140
134
|
* Should be used when the session has been used unsuccessfully. For example because of timeouts.
|
|
141
135
|
*/
|
|
142
136
|
markBad(): void;
|
|
143
|
-
/**
|
|
144
|
-
* With certain status codes: `401`, `403` or `429` we can be certain
|
|
145
|
-
* that the target website is blocking us. This function helps to do this conveniently
|
|
146
|
-
* by retiring the session when such code is received. Optionally, the default status
|
|
147
|
-
* codes can be extended in the second parameter.
|
|
148
|
-
* @param statusCode HTTP status code.
|
|
149
|
-
* @returns Whether the session was retired.
|
|
150
|
-
*/
|
|
151
|
-
retireOnBlockedStatusCodes(statusCode: number): boolean;
|
|
152
|
-
/**
|
|
153
|
-
* Saves cookies from an HTTP response to be used with the session.
|
|
154
|
-
* It expects an object with a `headers` property that's either an `Object`
|
|
155
|
-
* (typical Node.js responses) or a `Function` (Puppeteer Response).
|
|
156
|
-
*
|
|
157
|
-
* It then parses and saves the cookies from the `set-cookie` header, if available.
|
|
158
|
-
*/
|
|
159
|
-
setCookiesFromResponse(response: ResponseLike): void;
|
|
160
|
-
/**
|
|
161
|
-
* Saves an array with cookie objects to be used with the session.
|
|
162
|
-
* The objects should be in the format that
|
|
163
|
-
* [Puppeteer uses](https://pptr.dev/#?product=Puppeteer&version=v2.0.0&show=api-pagecookiesurls),
|
|
164
|
-
* but you can also use this function to set cookies manually:
|
|
165
|
-
*
|
|
166
|
-
* ```
|
|
167
|
-
* [
|
|
168
|
-
* { name: 'cookie1', value: 'my-cookie' },
|
|
169
|
-
* { name: 'cookie2', value: 'your-cookie' }
|
|
170
|
-
* ]
|
|
171
|
-
* ```
|
|
172
|
-
*/
|
|
173
|
-
setCookies(cookies: CookieObject[], url: string): void;
|
|
174
|
-
/**
|
|
175
|
-
* Returns cookies in a format compatible with puppeteer/playwright and ready to be used with `page.setCookie`.
|
|
176
|
-
* @param url website url. Only cookies stored for this url will be returned
|
|
177
|
-
*/
|
|
178
|
-
getCookies(url: string): CookieObject[];
|
|
179
137
|
/**
|
|
180
138
|
* Returns cookies saved with the session in the typical
|
|
181
139
|
* key1=value1; key2=value2 format, ready to be used in
|
|
@@ -187,10 +145,6 @@ export declare class Session {
|
|
|
187
145
|
* Sets a cookie within this session for the specific URL.
|
|
188
146
|
*/
|
|
189
147
|
setCookie(rawCookie: string, url: string): void;
|
|
190
|
-
/**
|
|
191
|
-
* Sets cookies.
|
|
192
|
-
*/
|
|
193
|
-
protected _setCookies(cookies: Cookie[], url: string): void;
|
|
194
148
|
/**
|
|
195
149
|
* Checks if session is not usable. if it is not retires the session.
|
|
196
150
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"session.d.ts","sourceRoot":"","sources":["../../src/session_pool/session.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"session.d.ts","sourceRoot":"","sources":["../../src/session_pool/session.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,QAAQ,EAAE,SAAS,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAEpF,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAKzC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAG/C,MAAM,WAAW,cAAc;IAC3B,wFAAwF;IACxF,EAAE,CAAC,EAAE,MAAM,CAAC;IAEZ;;;OAGG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB,+EAA+E;IAC/E,QAAQ,CAAC,EAAE,UAAU,CAAC;IAEtB;;;;;;OAMG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB;;;;;OAKG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAE7B,wBAAwB;IACxB,SAAS,CAAC,EAAE,IAAI,CAAC;IAEjB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,IAAI,CAAC;IAEjB;;;OAGG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB;;;;OAIG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB;;;;OAIG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAElB,GAAG,CAAC,EAAE,aAAa,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,SAAS,CAAC,EAAE,SAAS,CAAC;CACzB;AAED;;;;;GAKG;AACH,qBAAa,OAAQ,YAAW,QAAQ;IACpC,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,UAAU,CAAC;IACrB,OAAO,CAAC,cAAc,CAAS;IAC/B,OAAO,CAAC,oBAAoB,CAAS;IACrC,OAAO,CAAC,UAAU,CAAO;IACzB,OAAO,CAAC,UAAU,CAAO;IACzB,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,cAAc,CAAS;IAC/B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,UAAU,CAAC,CAAY;IAC/B,OAAO,CAAC,UAAU,CAAY;IAC9B,OAAO,CAAC,GAAG,CAAgB;IAE3B,IAAI,UAAU,WAEb;IAED,IAAI,UAAU,WAEb;IAED,IAAI,aAAa,WAEhB;IAED,IAAI,mBAAmB,WAEtB;IAED,IAAI,SAAS,SAEZ;IAED,IAAI,SAAS,SAEZ;IAED,IAAI,aAAa,WAEhB;IAED,IAAI,SAAS,cAEZ;IAED,IAAI,SAAS,0BAEZ;IAED;;;OAGG;IACH,IAAI,OAAO,YAEV;IAED;;OAEG;gBACS,OAAO,GAAE,cAAmB;IAyDxC;;;OAGG;IACH,SAAS,IAAI,OAAO;IAIpB;;;;OAIG;IACH,SAAS,IAAI,OAAO;IAIpB;;;OAGG;IACH,sBAAsB,IAAI,OAAO;IAIjC;;;OAGG;IACH,QAAQ,IAAI,OAAO;IAInB;;;OAGG;IACH,QAAQ;IAUR;;;OAGG;IACH,QAAQ,IAAI,YAAY;IAiBxB;;;;;;OAMG;IACH,MAAM;IAON;;;OAGG;IACH,OAAO;IAOP;;;;;OAKG;IACH,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM;IAIpC;;OAEG;IACH,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,IAAI;IAQ/C;;OAEG;IACH,SAAS,CAAC,gBAAgB,IAAI,IAAI;CAKrC"}
|