@crawlee/core 4.0.0-beta.2 → 4.0.0-beta.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -1
- package/autoscaling/snapshotter.d.ts.map +1 -1
- package/autoscaling/snapshotter.js +4 -11
- package/autoscaling/snapshotter.js.map +1 -1
- package/configuration.d.ts +0 -7
- package/configuration.d.ts.map +1 -1
- package/configuration.js +0 -4
- package/configuration.js.map +1 -1
- package/cookie_utils.d.ts +1 -1
- package/cookie_utils.d.ts.map +1 -1
- package/cookie_utils.js +4 -6
- package/cookie_utils.js.map +1 -1
- package/crawlers/context_pipeline.d.ts +61 -0
- package/crawlers/context_pipeline.d.ts.map +1 -0
- package/crawlers/context_pipeline.js +99 -0
- package/crawlers/context_pipeline.js.map +1 -0
- package/crawlers/crawler_commons.d.ts +12 -16
- package/crawlers/crawler_commons.d.ts.map +1 -1
- package/crawlers/crawler_commons.js +0 -8
- package/crawlers/crawler_commons.js.map +1 -1
- package/crawlers/error_snapshotter.d.ts +3 -2
- package/crawlers/error_snapshotter.d.ts.map +1 -1
- package/crawlers/error_snapshotter.js +2 -2
- package/crawlers/error_snapshotter.js.map +1 -1
- package/crawlers/error_tracker.d.ts +2 -1
- package/crawlers/error_tracker.d.ts.map +1 -1
- package/crawlers/error_tracker.js.map +1 -1
- package/crawlers/index.d.ts +1 -1
- package/crawlers/index.d.ts.map +1 -1
- package/crawlers/index.js +1 -1
- package/crawlers/index.js.map +1 -1
- package/crawlers/internals/types.d.ts +8 -0
- package/crawlers/internals/types.d.ts.map +1 -0
- package/crawlers/internals/types.js +2 -0
- package/crawlers/internals/types.js.map +1 -0
- package/enqueue_links/enqueue_links.d.ts +22 -15
- package/enqueue_links/enqueue_links.d.ts.map +1 -1
- package/enqueue_links/enqueue_links.js +33 -14
- package/enqueue_links/enqueue_links.js.map +1 -1
- package/enqueue_links/shared.d.ts +8 -2
- package/enqueue_links/shared.d.ts.map +1 -1
- package/enqueue_links/shared.js +40 -18
- package/enqueue_links/shared.js.map +1 -1
- package/errors.d.ts +12 -0
- package/errors.d.ts.map +1 -1
- package/errors.js +20 -0
- package/errors.js.map +1 -1
- package/events/local_event_manager.d.ts +0 -2
- package/events/local_event_manager.d.ts.map +1 -1
- package/events/local_event_manager.js +3 -32
- package/events/local_event_manager.js.map +1 -1
- package/http_clients/base-http-client.d.ts +26 -69
- package/http_clients/base-http-client.d.ts.map +1 -1
- package/http_clients/base-http-client.js +20 -9
- package/http_clients/base-http-client.js.map +1 -1
- package/http_clients/got-scraping-http-client.d.ts +10 -3
- package/http_clients/got-scraping-http-client.d.ts.map +1 -1
- package/http_clients/got-scraping-http-client.js +86 -39
- package/http_clients/got-scraping-http-client.js.map +1 -1
- package/index.d.ts +1 -0
- package/index.d.ts.map +1 -1
- package/index.js +1 -0
- package/index.js.map +1 -1
- package/log.d.ts +4 -2
- package/log.d.ts.map +1 -1
- package/log.js.map +1 -1
- package/package.json +6 -6
- package/proxy_configuration.d.ts +21 -40
- package/proxy_configuration.d.ts.map +1 -1
- package/proxy_configuration.js +15 -51
- package/proxy_configuration.js.map +1 -1
- package/recoverable_state.d.ts +123 -0
- package/recoverable_state.d.ts.map +1 -0
- package/recoverable_state.js +140 -0
- package/recoverable_state.js.map +1 -0
- package/request.d.ts +31 -5
- package/request.d.ts.map +1 -1
- package/request.js +35 -12
- package/request.js.map +1 -1
- package/session_pool/session.d.ts +6 -2
- package/session_pool/session.d.ts.map +1 -1
- package/session_pool/session.js +9 -2
- package/session_pool/session.js.map +1 -1
- package/session_pool/session_pool.d.ts +9 -2
- package/session_pool/session_pool.d.ts.map +1 -1
- package/session_pool/session_pool.js +13 -1
- package/session_pool/session_pool.js.map +1 -1
- package/storages/dataset.d.ts +7 -0
- package/storages/dataset.d.ts.map +1 -1
- package/storages/dataset.js +10 -1
- package/storages/dataset.js.map +1 -1
- package/storages/index.d.ts +2 -0
- package/storages/index.d.ts.map +1 -1
- package/storages/index.js +2 -0
- package/storages/index.js.map +1 -1
- package/storages/key_value_store.d.ts +13 -1
- package/storages/key_value_store.d.ts.map +1 -1
- package/storages/key_value_store.js +11 -6
- package/storages/key_value_store.js.map +1 -1
- package/storages/request_list.d.ts +0 -6
- package/storages/request_list.d.ts.map +1 -1
- package/storages/request_list.js.map +1 -1
- package/storages/request_list_adapter.d.ts +58 -0
- package/storages/request_list_adapter.d.ts.map +1 -0
- package/storages/request_list_adapter.js +81 -0
- package/storages/request_list_adapter.js.map +1 -0
- package/storages/request_manager_tandem.d.ts +68 -0
- package/storages/request_manager_tandem.d.ts.map +1 -0
- package/storages/request_manager_tandem.js +124 -0
- package/storages/request_manager_tandem.js.map +1 -0
- package/storages/request_provider.d.ts +68 -4
- package/storages/request_provider.d.ts.map +1 -1
- package/storages/request_provider.js +74 -44
- package/storages/request_provider.js.map +1 -1
- package/storages/request_queue_v2.d.ts +2 -2
- package/storages/request_queue_v2.d.ts.map +1 -1
- package/storages/request_queue_v2.js.map +1 -1
- package/storages/sitemap_request_list.d.ts.map +1 -1
- package/storages/sitemap_request_list.js.map +1 -1
- package/storages/storage_manager.d.ts.map +1 -1
- package/storages/storage_manager.js +1 -1
- package/storages/storage_manager.js.map +1 -1
- package/typedefs.d.ts +1 -1
- package/typedefs.d.ts.map +1 -1
- package/crawlers/crawler_extension.d.ts +0 -12
- package/crawlers/crawler_extension.d.ts.map +0 -1
- package/crawlers/crawler_extension.js +0 -14
- package/crawlers/crawler_extension.js.map +0 -1
- package/tsconfig.build.tsbuildinfo +0 -1
package/log.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"log.js","sourceRoot":"","sources":["../src/log.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"log.js","sourceRoot":"","sources":["../src/log.ts"],"names":[],"mappings":"AACA,OAAO,GAAG,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEhF,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/core",
|
|
3
|
-
"version": "4.0.0-beta.
|
|
3
|
+
"version": "4.0.0-beta.20",
|
|
4
4
|
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=22.0.0"
|
|
@@ -53,9 +53,9 @@
|
|
|
53
53
|
"@apify/pseudo_url": "^2.0.59",
|
|
54
54
|
"@apify/timeout": "^0.3.2",
|
|
55
55
|
"@apify/utilities": "^2.15.5",
|
|
56
|
-
"@crawlee/memory-storage": "4.0.0-beta.
|
|
57
|
-
"@crawlee/types": "4.0.0-beta.
|
|
58
|
-
"@crawlee/utils": "4.0.0-beta.
|
|
56
|
+
"@crawlee/memory-storage": "4.0.0-beta.20",
|
|
57
|
+
"@crawlee/types": "4.0.0-beta.20",
|
|
58
|
+
"@crawlee/utils": "4.0.0-beta.20",
|
|
59
59
|
"@sapphire/async-queue": "^1.5.5",
|
|
60
60
|
"@vladfrangu/async_event_emitter": "^2.4.6",
|
|
61
61
|
"csv-stringify": "^6.5.2",
|
|
@@ -66,7 +66,7 @@
|
|
|
66
66
|
"ow": "^2.0.0",
|
|
67
67
|
"stream-json": "^1.9.1",
|
|
68
68
|
"tldts": "^7.0.6",
|
|
69
|
-
"tough-cookie": "^
|
|
69
|
+
"tough-cookie": "^6.0.0",
|
|
70
70
|
"tslib": "^2.8.1",
|
|
71
71
|
"type-fest": "^4.41.0"
|
|
72
72
|
},
|
|
@@ -77,5 +77,5 @@
|
|
|
77
77
|
}
|
|
78
78
|
}
|
|
79
79
|
},
|
|
80
|
-
"gitHead": "
|
|
80
|
+
"gitHead": "8e04fea33d3b7ca5a386a941c990e9212275e221"
|
|
81
81
|
}
|
package/proxy_configuration.d.ts
CHANGED
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
import type { Request } from './request.js';
|
|
2
2
|
export interface ProxyConfigurationFunction {
|
|
3
|
-
(
|
|
3
|
+
(options?: {
|
|
4
4
|
request?: Request;
|
|
5
5
|
}): string | null | Promise<string | null>;
|
|
6
6
|
}
|
|
7
|
+
type UrlList = (string | null)[];
|
|
7
8
|
export interface ProxyConfigurationOptions {
|
|
8
9
|
/**
|
|
9
10
|
* An array of custom proxy URLs to be rotated.
|
|
10
11
|
* Custom proxies are not compatible with Apify Proxy and an attempt to use both
|
|
11
12
|
* configuration options will cause an error to be thrown on initialize.
|
|
12
13
|
*/
|
|
13
|
-
proxyUrls?:
|
|
14
|
+
proxyUrls?: UrlList;
|
|
14
15
|
/**
|
|
15
|
-
* Custom function that allows you to generate the new proxy URL dynamically. It gets
|
|
16
|
+
* Custom function that allows you to generate the new proxy URL dynamically. It gets an optional parameter with the `Request` object when applicable.
|
|
16
17
|
* Can return either stringified proxy URL or `null` if the proxy should not be used. Can be asynchronous.
|
|
17
18
|
*
|
|
18
19
|
* This function is used to generate the URL when {@link ProxyConfiguration.newUrl} or {@link ProxyConfiguration.newProxyInfo} is called.
|
|
@@ -29,7 +30,7 @@ export interface ProxyConfigurationOptions {
|
|
|
29
30
|
*
|
|
30
31
|
* Use `null` as a proxy URL to disable the proxy for the given tier.
|
|
31
32
|
*/
|
|
32
|
-
tieredProxyUrls?:
|
|
33
|
+
tieredProxyUrls?: UrlList[];
|
|
33
34
|
}
|
|
34
35
|
export interface TieredProxy {
|
|
35
36
|
proxyUrl: string | null;
|
|
@@ -57,19 +58,12 @@ export interface TieredProxy {
|
|
|
57
58
|
* requestHandler({ proxyInfo }) {
|
|
58
59
|
* // Getting used proxy URL
|
|
59
60
|
* const proxyUrl = proxyInfo.url;
|
|
60
|
-
*
|
|
61
|
-
* // Getting ID of used Session
|
|
62
|
-
* const sessionIdentifier = proxyInfo.sessionId;
|
|
63
61
|
* }
|
|
64
62
|
* })
|
|
65
63
|
*
|
|
66
64
|
* ```
|
|
67
65
|
*/
|
|
68
66
|
export interface ProxyInfo {
|
|
69
|
-
/**
|
|
70
|
-
* The identifier of used {@link Session}, if used.
|
|
71
|
-
*/
|
|
72
|
-
sessionId?: string;
|
|
73
67
|
/**
|
|
74
68
|
* The URL of the proxy.
|
|
75
69
|
*/
|
|
@@ -94,6 +88,12 @@ export interface ProxyInfo {
|
|
|
94
88
|
* Proxy tier for the current proxy, if applicable (only for `tieredProxyUrls`).
|
|
95
89
|
*/
|
|
96
90
|
proxyTier?: number;
|
|
91
|
+
/**
|
|
92
|
+
* When `true`, the proxy is likely intercepting HTTPS traffic and is able to view and modify its content.
|
|
93
|
+
*
|
|
94
|
+
* @default false
|
|
95
|
+
*/
|
|
96
|
+
ignoreTlsErrors?: boolean;
|
|
97
97
|
}
|
|
98
98
|
interface TieredProxyOptions {
|
|
99
99
|
request?: Request;
|
|
@@ -156,9 +156,9 @@ declare class ProxyTierTracker {
|
|
|
156
156
|
export declare class ProxyConfiguration {
|
|
157
157
|
isManInTheMiddle: boolean;
|
|
158
158
|
protected nextCustomUrlIndex: number;
|
|
159
|
-
protected proxyUrls?:
|
|
160
|
-
protected tieredProxyUrls?:
|
|
161
|
-
protected usedProxyUrls: Map<string, string>;
|
|
159
|
+
protected proxyUrls?: UrlList;
|
|
160
|
+
protected tieredProxyUrls?: UrlList[];
|
|
161
|
+
protected usedProxyUrls: Map<string, string | null>;
|
|
162
162
|
protected newUrlFunction?: ProxyConfigurationFunction;
|
|
163
163
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
164
164
|
protected log: import("@apify/log").Log;
|
|
@@ -190,25 +190,16 @@ export declare class ProxyConfiguration {
|
|
|
190
190
|
* the currently used proxy via the requestHandler parameter `proxyInfo`.
|
|
191
191
|
* Use it if you want to work with a rich representation of a proxy URL.
|
|
192
192
|
* If you need the URL string only, use {@link ProxyConfiguration.newUrl}.
|
|
193
|
-
* @param [sessionId]
|
|
194
|
-
* Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
|
|
195
|
-
* you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
|
|
196
|
-
* When the provided sessionId is a number, it's converted to a string. Property sessionId of
|
|
197
|
-
* {@link ProxyInfo} is always returned as a type string.
|
|
198
193
|
*
|
|
199
|
-
* All the HTTP requests going through the proxy with the same session identifier
|
|
200
|
-
* will use the same target proxy server (i.e. the same IP address).
|
|
201
|
-
* The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
|
|
202
194
|
* @return Represents information about used proxy and its configuration.
|
|
203
195
|
*/
|
|
204
|
-
newProxyInfo(
|
|
196
|
+
newProxyInfo(options?: TieredProxyOptions): Promise<ProxyInfo | undefined>;
|
|
205
197
|
/**
|
|
206
|
-
* Given a
|
|
207
|
-
* @param _sessionId Session identifier
|
|
198
|
+
* Given a request / proxy tier, this function returns a new proxy URL based on the provided configuration options.
|
|
208
199
|
* @param options Options for the tiered proxy rotation
|
|
209
200
|
* @returns An object with the proxy URL and the proxy tier used.
|
|
210
201
|
*/
|
|
211
|
-
protected _handleTieredUrl(
|
|
202
|
+
protected _handleTieredUrl(options?: TieredProxyOptions): TieredProxy;
|
|
212
203
|
/**
|
|
213
204
|
* Given a `Request` object, this function returns the tier of the proxy that should be used for the request.
|
|
214
205
|
*
|
|
@@ -216,27 +207,17 @@ export declare class ProxyConfiguration {
|
|
|
216
207
|
*/
|
|
217
208
|
protected predictProxyTier(request: Request): number | null;
|
|
218
209
|
/**
|
|
219
|
-
* Returns a new proxy URL based on provided configuration options
|
|
220
|
-
* @param [sessionId]
|
|
221
|
-
* Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
|
|
222
|
-
* you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
|
|
223
|
-
* When the provided sessionId is a number, it's converted to a string.
|
|
210
|
+
* Returns a new proxy URL based on provided configuration options.
|
|
224
211
|
*
|
|
225
|
-
* All the HTTP requests going through the proxy with the same session identifier
|
|
226
|
-
* will use the same target proxy server (i.e. the same IP address).
|
|
227
|
-
* The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
|
|
228
212
|
* @return A string with a proxy URL, including authentication credentials and port number.
|
|
229
213
|
* For example, `http://bob:password123@proxy.example.com:8000`
|
|
230
214
|
*/
|
|
231
|
-
newUrl(
|
|
232
|
-
|
|
233
|
-
* Handles custom url rotation with session
|
|
234
|
-
*/
|
|
235
|
-
protected _handleCustomUrl(sessionId?: string): string;
|
|
215
|
+
newUrl(options?: TieredProxyOptions): Promise<string | undefined>;
|
|
216
|
+
protected _handleProxyUrlsList(): string | null;
|
|
236
217
|
/**
|
|
237
218
|
* Calls the custom newUrlFunction and checks format of its return value
|
|
238
219
|
*/
|
|
239
|
-
protected _callNewUrlFunction(
|
|
220
|
+
protected _callNewUrlFunction(options?: {
|
|
240
221
|
request?: Request;
|
|
241
222
|
}): Promise<string | null>;
|
|
242
223
|
protected _throwCannotCombineCustomMethods(): never;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"proxy_configuration.d.ts","sourceRoot":"","sources":["../src/proxy_configuration.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"proxy_configuration.d.ts","sourceRoot":"","sources":["../src/proxy_configuration.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,WAAW,0BAA0B;IACvC,CAAC,OAAO,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,OAAO,CAAA;KAAE,GAAG,MAAM,GAAG,IAAI,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;CAC7E;AAED,KAAK,OAAO,GAAG,CAAC,MAAM,GAAG,IAAI,CAAC,EAAE,CAAC;AAEjC,MAAM,WAAW,yBAAyB;IACtC;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,0BAA0B,CAAC;IAE5C;;;;;;;;;;OAUG;IACH,eAAe,CAAC,EAAE,OAAO,EAAE,CAAC;CAC/B;AAED,MAAM,WAAW,WAAW;IACxB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,MAAM,WAAW,SAAS;IACtB;;OAEG;IACH,GAAG,EAAE,MAAM,CAAC;IAEZ;;OAEG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,IAAI,EAAE,MAAM,GAAG,MAAM,CAAC;IAEtB;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;;;OAIG;IACH,eAAe,CAAC,EAAE,OAAO,CAAC;CAC7B;AAED,UAAU,kBAAkB;IACxB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;GAIG;AACH,cAAM,gBAAgB;IAClB,OAAO,CAAC,SAAS,CAAW;IAC5B,OAAO,CAAC,WAAW,CAAS;gBAEhB,eAAe,EAAE,CAAC,MAAM,GAAG,IAAI,CAAC,EAAE,EAAE;IAKhD;;OAEG;IACH,OAAO,CAAC,WAAW;IAgBnB;;;;;OAKG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM;IAIrB;;;OAGG;IACH,WAAW;CAId;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,qBAAa,kBAAkB;IAC3B,gBAAgB,UAAS;IACzB,SAAS,CAAC,kBAAkB,SAAK;IACjC,SAAS,CAAC,SAAS,CAAC,EAAE,OAAO,CAAC;IAC9B,SAAS,CAAC,eAAe,CAAC,EAAE,OAAO,EAAE,CAAC;IACtC,SAAS,CAAC,aAAa,6BAAoC;IAC3D,SAAS,CAAC,cAAc,CAAC,EAAE,0BAA0B,CAAC;IACtD,SAAS,CAAC,GAAG,2BAA+C;IAC5D,SAAS,CAAC,WAAW,gCAAuC;IAE5D;;;;;;;;;;;;;;;;;;;OAmBG;gBACS,OAAO,GAAE,yBAA8B;IAwBnD;;;;;;;;OAQG;IACG,YAAY,CAAC,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,SAAS,GAAG,SAAS,CAAC;IAyBhF;;;;OAIG;IACH,SAAS,CAAC,gBAAgB,CAAC,OAAO,CAAC,EAAE,kBAAkB,GAAG,WAAW;IAwBrE;;;;OAIG;IACH,SAAS,CAAC,gBAAgB,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM,GAAG,IAAI;IAiC3D;;;;;OAKG;IACG,MAAM,CAAC,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC;IAYvE,SAAS,CAAC,oBAAoB,IAAI,MAAM,GAAG,IAAI;IAI/C;;OAEG;cACa,mBAAmB,CAAC,OAAO,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,OAAO,CAAA;KAAE;IAcnE,SAAS,CAAC,gCAAgC,IAAI,KAAK;IAMnD,SAAS,CAAC,uBAAuB,IAAI,KAAK;CAG7C"}
|
package/proxy_configuration.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import ow from 'ow';
|
|
2
2
|
import log from '@apify/log';
|
|
3
|
-
import { cryptoRandomObjectId } from '@apify/utilities';
|
|
4
3
|
/**
|
|
5
4
|
* Internal class for tracking the proxy tier history for a specific domain.
|
|
6
5
|
*
|
|
@@ -110,7 +109,7 @@ export class ProxyConfiguration {
|
|
|
110
109
|
constructor(options = {}) {
|
|
111
110
|
const { validateRequired, ...rest } = options;
|
|
112
111
|
ow(rest, ow.object.exactShape({
|
|
113
|
-
proxyUrls: ow.optional.array.nonEmpty.ofType(ow.string.url),
|
|
112
|
+
proxyUrls: ow.optional.array.nonEmpty.ofType(ow.any(ow.string.url, ow.null)),
|
|
114
113
|
newUrlFunction: ow.optional.function,
|
|
115
114
|
tieredProxyUrls: ow.optional.array.nonEmpty.ofType(ow.array.nonEmpty.ofType(ow.any(ow.string.url, ow.null))),
|
|
116
115
|
}));
|
|
@@ -129,35 +128,24 @@ export class ProxyConfiguration {
|
|
|
129
128
|
* the currently used proxy via the requestHandler parameter `proxyInfo`.
|
|
130
129
|
* Use it if you want to work with a rich representation of a proxy URL.
|
|
131
130
|
* If you need the URL string only, use {@link ProxyConfiguration.newUrl}.
|
|
132
|
-
* @param [sessionId]
|
|
133
|
-
* Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
|
|
134
|
-
* you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
|
|
135
|
-
* When the provided sessionId is a number, it's converted to a string. Property sessionId of
|
|
136
|
-
* {@link ProxyInfo} is always returned as a type string.
|
|
137
131
|
*
|
|
138
|
-
* All the HTTP requests going through the proxy with the same session identifier
|
|
139
|
-
* will use the same target proxy server (i.e. the same IP address).
|
|
140
|
-
* The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
|
|
141
132
|
* @return Represents information about used proxy and its configuration.
|
|
142
133
|
*/
|
|
143
|
-
async newProxyInfo(
|
|
144
|
-
if (typeof sessionId === 'number')
|
|
145
|
-
sessionId = `${sessionId}`;
|
|
134
|
+
async newProxyInfo(options) {
|
|
146
135
|
let url;
|
|
147
136
|
let tier;
|
|
148
137
|
if (this.tieredProxyUrls) {
|
|
149
|
-
const { proxyUrl, proxyTier } = this._handleTieredUrl(
|
|
138
|
+
const { proxyUrl, proxyTier } = this._handleTieredUrl(options);
|
|
150
139
|
url = proxyUrl ?? undefined;
|
|
151
140
|
tier = proxyTier;
|
|
152
141
|
}
|
|
153
142
|
else {
|
|
154
|
-
url = await this.newUrl(
|
|
143
|
+
url = await this.newUrl(options);
|
|
155
144
|
}
|
|
156
145
|
if (!url)
|
|
157
146
|
return undefined;
|
|
158
147
|
const { username, password, port, hostname } = new URL(url);
|
|
159
148
|
return {
|
|
160
|
-
sessionId,
|
|
161
149
|
url,
|
|
162
150
|
username: decodeURIComponent(username),
|
|
163
151
|
password: decodeURIComponent(password),
|
|
@@ -167,12 +155,11 @@ export class ProxyConfiguration {
|
|
|
167
155
|
};
|
|
168
156
|
}
|
|
169
157
|
/**
|
|
170
|
-
* Given a
|
|
171
|
-
* @param _sessionId Session identifier
|
|
158
|
+
* Given a request / proxy tier, this function returns a new proxy URL based on the provided configuration options.
|
|
172
159
|
* @param options Options for the tiered proxy rotation
|
|
173
160
|
* @returns An object with the proxy URL and the proxy tier used.
|
|
174
161
|
*/
|
|
175
|
-
_handleTieredUrl(
|
|
162
|
+
_handleTieredUrl(options) {
|
|
176
163
|
if (!this.tieredProxyUrls)
|
|
177
164
|
throw new Error('Tiered proxy URLs are not set');
|
|
178
165
|
if (!options || (!options?.request && options?.proxyTier === undefined)) {
|
|
@@ -218,51 +205,28 @@ export class ProxyConfiguration {
|
|
|
218
205
|
return tierPrediction;
|
|
219
206
|
}
|
|
220
207
|
/**
|
|
221
|
-
* Returns a new proxy URL based on provided configuration options
|
|
222
|
-
* @param [sessionId]
|
|
223
|
-
* Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
|
|
224
|
-
* you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
|
|
225
|
-
* When the provided sessionId is a number, it's converted to a string.
|
|
208
|
+
* Returns a new proxy URL based on provided configuration options.
|
|
226
209
|
*
|
|
227
|
-
* All the HTTP requests going through the proxy with the same session identifier
|
|
228
|
-
* will use the same target proxy server (i.e. the same IP address).
|
|
229
|
-
* The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
|
|
230
210
|
* @return A string with a proxy URL, including authentication credentials and port number.
|
|
231
211
|
* For example, `http://bob:password123@proxy.example.com:8000`
|
|
232
212
|
*/
|
|
233
|
-
async newUrl(
|
|
234
|
-
if (typeof sessionId === 'number')
|
|
235
|
-
sessionId = `${sessionId}`;
|
|
213
|
+
async newUrl(options) {
|
|
236
214
|
if (this.newUrlFunction) {
|
|
237
|
-
return (await this._callNewUrlFunction(
|
|
215
|
+
return (await this._callNewUrlFunction({ request: options?.request })) ?? undefined;
|
|
238
216
|
}
|
|
239
217
|
if (this.tieredProxyUrls) {
|
|
240
|
-
return this._handleTieredUrl(
|
|
218
|
+
return this._handleTieredUrl(options).proxyUrl ?? undefined;
|
|
241
219
|
}
|
|
242
|
-
return this.
|
|
220
|
+
return this._handleProxyUrlsList() ?? undefined;
|
|
243
221
|
}
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
*/
|
|
247
|
-
_handleCustomUrl(sessionId) {
|
|
248
|
-
let customUrlToUse;
|
|
249
|
-
if (!sessionId) {
|
|
250
|
-
return this.proxyUrls[this.nextCustomUrlIndex++ % this.proxyUrls.length];
|
|
251
|
-
}
|
|
252
|
-
if (this.usedProxyUrls.has(sessionId)) {
|
|
253
|
-
customUrlToUse = this.usedProxyUrls.get(sessionId);
|
|
254
|
-
}
|
|
255
|
-
else {
|
|
256
|
-
customUrlToUse = this.proxyUrls[this.nextCustomUrlIndex++ % this.proxyUrls.length];
|
|
257
|
-
this.usedProxyUrls.set(sessionId, customUrlToUse);
|
|
258
|
-
}
|
|
259
|
-
return customUrlToUse;
|
|
222
|
+
_handleProxyUrlsList() {
|
|
223
|
+
return this.proxyUrls[this.nextCustomUrlIndex++ % this.proxyUrls.length];
|
|
260
224
|
}
|
|
261
225
|
/**
|
|
262
226
|
* Calls the custom newUrlFunction and checks format of its return value
|
|
263
227
|
*/
|
|
264
|
-
async _callNewUrlFunction(
|
|
265
|
-
const proxyUrl = await this.newUrlFunction(
|
|
228
|
+
async _callNewUrlFunction(options) {
|
|
229
|
+
const proxyUrl = await this.newUrlFunction(options);
|
|
266
230
|
try {
|
|
267
231
|
if (proxyUrl) {
|
|
268
232
|
new URL(proxyUrl); // eslint-disable-line no-new
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"proxy_configuration.js","sourceRoot":"","sources":["../src/proxy_configuration.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,OAAO,GAAG,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"proxy_configuration.js","sourceRoot":"","sources":["../src/proxy_configuration.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,OAAO,GAAG,MAAM,YAAY,CAAC;AAoH7B;;;;GAIG;AACH,MAAM,gBAAgB;IACV,SAAS,CAAW;IACpB,WAAW,CAAS;IAE5B,YAAY,eAAoC;QAC5C,IAAI,CAAC,SAAS,GAAG,eAAe,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;QAC9C,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC;IACzB,CAAC;IAED;;OAEG;IACK,WAAW;QACf,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAC5B,IAAI,IAAI,CAAC,WAAW,KAAK,CAAC;gBAAE,OAAO;YACnC,IAAI,CAAC,GAAG,CAAC;gBAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;QACnC,CAAC,CAAC,CAAC;QAEH,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;QACpF,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;QAE7G,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,CAAC;YAC3D,IAAI,CAAC,WAAW,GAAG,IAAI,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC;QACnF,CAAC;aAAM,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,KAAK,IAAI,EAAE,CAAC;YACnD,IAAI,CAAC,WAAW,EAAE,CAAC;QACvB,CAAC;IACL,CAAC;IAED;;;;;OAKG;IACH,QAAQ,CAAC,IAAY;QACjB,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IAC/B,CAAC;IAED;;;OAGG;IACH,WAAW;QACP,IAAI,CAAC,WAAW,EAAE,CAAC;QACnB,OAAO,IAAI,CAAC,WAAW,CAAC;IAC5B,CAAC;CACJ;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,MAAM,OAAO,kBAAkB;IAC3B,gBAAgB,GAAG,KAAK,CAAC;IACf,kBAAkB,GAAG,CAAC,CAAC;IACvB,SAAS,CAAW;IACpB,eAAe,CAAa;IAC5B,aAAa,GAAG,IAAI,GAAG,EAAyB,CAAC;IACjD,cAAc,CAA8B;IAC5C,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,oBAAoB,EAAE,CAAC,CAAC;IAClD,WAAW,GAAG,IAAI,GAAG,EAA4B,CAAC;IAE5D;;;;;;;;;;;;;;;;;;;OAmBG;IACH,YAAY,UAAqC,EAAE;QAC/C,MAAM,EAAE,gBAAgB,EAAE,GAAG,IAAI,EAAE,GAAG,OAAqB,CAAC;QAC5D,EAAE,CACE,IAAI,EACJ,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC;YACjB,SAAS,EAAE,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;YAC5E,cAAc,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ;YACpC,eAAe,EAAE,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAC9C,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,CAC3D;SACJ,CAAC,CACL,CAAC;QAEF,MAAM,EAAE,SAAS,EAAE,cAAc,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC;QAE/D,IAAI,CAAC,SAAS,EAAE,cAAc,EAAE,eAAe,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC;YACxE,IAAI,CAAC,gCAAgC,EAAE,CAAC;QAC5C,IAAI,CAAC,SAAS,IAAI,CAAC,cAAc,IAAI,gBAAgB;YAAE,IAAI,CAAC,uBAAuB,EAAE,CAAC;QAEtF,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;QACrC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IAC3C,CAAC;IAED;;;;;;;;OAQG;IACH,KAAK,CAAC,YAAY,CAAC,OAA4B;QAC3C,IAAI,GAAuB,CAAC;QAC5B,IAAI,IAAwB,CAAC;QAC7B,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACvB,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;YAC/D,GAAG,GAAG,QAAQ,IAAI,SAAS,CAAC;YAC5B,IAAI,GAAG,SAAS,CAAC;QACrB,CAAC;aAAM,CAAC;YACJ,GAAG,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACrC,CAAC;QAED,IAAI,CAAC,GAAG;YAAE,OAAO,SAAS,CAAC;QAE3B,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAE5D,OAAO;YACH,GAAG;YACH,QAAQ,EAAE,kBAAkB,CAAC,QAAQ,CAAC;YACtC,QAAQ,EAAE,kBAAkB,CAAC,QAAQ,CAAC;YACtC,QAAQ;YACR,IAAI,EAAE,IAAK;YACX,SAAS,EAAE,IAAI;SAClB,CAAC;IACN,CAAC;IAED;;;;OAIG;IACO,gBAAgB,CAAC,OAA4B;QACnD,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAC;QAE5E,IAAI,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,EAAE,OAAO,IAAI,OAAO,EAAE,SAAS,KAAK,SAAS,CAAC,EAAE,CAAC;YACtE,MAAM,YAAY,GAAG,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;YACjD,OAAO;gBACH,QAAQ,EAAE,YAAY,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,YAAY,CAAC,MAAM,CAAC;aAC1E,CAAC;QACN,CAAC;QAED,IAAI,cAAc,GAAG,OAAO,CAAC,SAAU,CAAC;QAExC,IAAI,OAAO,cAAc,KAAK,QAAQ,EAAE,CAAC;YACrC,cAAc,GAAG,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,OAAQ,CAAE,CAAC;QAC9D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,eAAgB,CAAC,cAAc,CAAC,CAAC;QAExD,OAAO;YACH,QAAQ,EAAE,SAAS,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,SAAS,CAAC,MAAM,CAAC;YACjE,SAAS,EAAE,cAAc;SAC5B,CAAC;IACN,CAAC;IAED;;;;OAIG;IACO,gBAAgB,CAAC,OAAgB;QACvC,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,OAAO,IAAI,CAAC;QAEvC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;QAC7C,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YAChC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,gBAAgB,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC;QAC7E,CAAC;QAED,OAAO,CAAC,QAAQ,CAAC,SAAS,KAAK,EAAE,CAAC;QAElC,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC;QAE9C,IAAI,OAAO,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,aAAa,KAAK,QAAQ,EAAE,CAAC;YAC/D,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;QAC/D,CAAC;QAED,MAAM,cAAc,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;QAE7C,IACI,OAAO,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,aAAa,KAAK,QAAQ;YAC5D,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,aAAa,KAAK,cAAc,EAC7D,CAAC;YACC,GAAG,CAAC,KAAK,CACL,mCAAmC,MAAM,UAAU,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,aAAa,OAAO,cAAc,GAAG,CACtH,CAAC;QACN,CAAC;QAED,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,aAAa,GAAG,cAAc,CAAC;QAC1D,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,SAAS,GAAG,IAAI,CAAC;QAE5C,OAAO,cAAc,CAAC;IAC1B,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,MAAM,CAAC,OAA4B;QACrC,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACtB,OAAO,CAAC,MAAM,IAAI,CAAC,mBAAmB,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC,IAAI,SAAS,CAAC;QACxF,CAAC;QAED,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACvB,OAAO,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC,QAAQ,IAAI,SAAS,CAAC;QAChE,CAAC;QAED,OAAO,IAAI,CAAC,oBAAoB,EAAE,IAAI,SAAS,CAAC;IACpD,CAAC;IAES,oBAAoB;QAC1B,OAAO,IAAI,CAAC,SAAU,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,IAAI,CAAC,SAAU,CAAC,MAAM,CAAC,CAAC;IAC/E,CAAC;IAED;;OAEG;IACO,KAAK,CAAC,mBAAmB,CAAC,OAA+B;QAC/D,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,cAAe,CAAC,OAAO,CAAC,CAAC;QACrD,IAAI,CAAC;YACD,IAAI,QAAQ,EAAE,CAAC;gBACX,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,6BAA6B;YACpD,CAAC;YACD,OAAO,QAAQ,CAAC;QACpB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,MAAM,IAAI,KAAK,CACX,mEAAoE,GAAa,CAAC,OAAO,EAAE,CAC9F,CAAC;QACN,CAAC;IACL,CAAC;IAES,gCAAgC;QACtC,MAAM,IAAI,KAAK,CACX,6GAA6G,CAChH,CAAC;IACN,CAAC;IAES,uBAAuB;QAC7B,MAAM,IAAI,KAAK,CAAC,8EAA8E,CAAC,CAAC;IACpG,CAAC;CACJ"}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { Configuration } from '@crawlee/core';
|
|
2
|
+
import type { Log } from '@apify/log';
|
|
3
|
+
export interface RecoverableStatePersistenceOptions {
|
|
4
|
+
/**
|
|
5
|
+
* The key under which the state is stored in the KeyValueStore
|
|
6
|
+
*/
|
|
7
|
+
persistStateKey: string;
|
|
8
|
+
/**
|
|
9
|
+
* Flag to enable or disable state persistence
|
|
10
|
+
*/
|
|
11
|
+
persistenceEnabled?: boolean;
|
|
12
|
+
/**
|
|
13
|
+
* The name of the KeyValueStore to use for persistence.
|
|
14
|
+
* If neither a name nor an id are supplied, the default store will be used.
|
|
15
|
+
*/
|
|
16
|
+
persistStateKvsName?: string;
|
|
17
|
+
/**
|
|
18
|
+
* The identifier of the KeyValueStore to use for persistence.
|
|
19
|
+
* If neither a name nor an id are supplied, the default store will be used.
|
|
20
|
+
*/
|
|
21
|
+
persistStateKvsId?: string;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Options for configuring the RecoverableState
|
|
25
|
+
*/
|
|
26
|
+
export interface RecoverableStateOptions<TStateModel = Record<string, unknown>> extends RecoverableStatePersistenceOptions {
|
|
27
|
+
/**
|
|
28
|
+
* The default state used if no persisted state is found.
|
|
29
|
+
* A deep copy is made each time the state is used.
|
|
30
|
+
*/
|
|
31
|
+
defaultState: TStateModel;
|
|
32
|
+
/**
|
|
33
|
+
* A logger instance for logging operations related to state persistence
|
|
34
|
+
*/
|
|
35
|
+
logger?: Log;
|
|
36
|
+
/**
|
|
37
|
+
* Configuration instance to use
|
|
38
|
+
*/
|
|
39
|
+
config?: Configuration;
|
|
40
|
+
/**
|
|
41
|
+
* Optional function to transform the state to a JSON string before persistence.
|
|
42
|
+
* If not provided, JSON.stringify will be used.
|
|
43
|
+
*/
|
|
44
|
+
serialize?: (state: TStateModel) => string;
|
|
45
|
+
/**
|
|
46
|
+
* Optional function to transform a JSON-serialized object back to the state model.
|
|
47
|
+
* If not provided, JSON.parse is used.
|
|
48
|
+
* It is advisable to perform validation in this function and to throw an exception if it fails.
|
|
49
|
+
*/
|
|
50
|
+
deserialize?: (serializedState: string) => TStateModel;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* A class for managing persistent recoverable state using a plain JavaScript object.
|
|
54
|
+
*
|
|
55
|
+
* This class facilitates state persistence to a `KeyValueStore`, allowing data to be saved and retrieved
|
|
56
|
+
* across migrations or restarts. It manages the loading, saving, and resetting of state data,
|
|
57
|
+
* with optional persistence capabilities.
|
|
58
|
+
*
|
|
59
|
+
* The state is represented by a plain JavaScript object that can be serialized to and deserialized from JSON.
|
|
60
|
+
* The class automatically hooks into the event system to persist state when needed.
|
|
61
|
+
*/
|
|
62
|
+
export declare class RecoverableState<TStateModel = Record<string, unknown>> {
|
|
63
|
+
private readonly defaultState;
|
|
64
|
+
private state;
|
|
65
|
+
private readonly persistenceEnabled;
|
|
66
|
+
private readonly persistStateKey;
|
|
67
|
+
private readonly persistStateKvsName?;
|
|
68
|
+
private readonly persistStateKvsId?;
|
|
69
|
+
private keyValueStore;
|
|
70
|
+
private readonly log;
|
|
71
|
+
private readonly config;
|
|
72
|
+
private readonly serialize;
|
|
73
|
+
private readonly deserialize;
|
|
74
|
+
/**
|
|
75
|
+
* Initialize a new recoverable state object.
|
|
76
|
+
*
|
|
77
|
+
* @param options Configuration options for the recoverable state
|
|
78
|
+
*/
|
|
79
|
+
constructor(options: RecoverableStateOptions<TStateModel>);
|
|
80
|
+
/**
|
|
81
|
+
* Initialize the recoverable state.
|
|
82
|
+
*
|
|
83
|
+
* This method must be called before using the recoverable state. It loads the saved state
|
|
84
|
+
* if persistence is enabled and registers the object to listen for PERSIST_STATE events.
|
|
85
|
+
*
|
|
86
|
+
* @returns The loaded state object
|
|
87
|
+
*/
|
|
88
|
+
initialize(): Promise<TStateModel>;
|
|
89
|
+
/**
|
|
90
|
+
* Clean up resources used by the recoverable state.
|
|
91
|
+
*
|
|
92
|
+
* If persistence is enabled, this method deregisters the object from PERSIST_STATE events
|
|
93
|
+
* and persists the current state one last time.
|
|
94
|
+
*/
|
|
95
|
+
teardown(): Promise<void>;
|
|
96
|
+
/**
|
|
97
|
+
* Get the current state.
|
|
98
|
+
*/
|
|
99
|
+
get currentValue(): TStateModel;
|
|
100
|
+
/**
|
|
101
|
+
* Reset the state to the default values and clear any persisted state.
|
|
102
|
+
*
|
|
103
|
+
* Resets the current state to the default state and, if persistence is enabled,
|
|
104
|
+
* clears the persisted state from the KeyValueStore.
|
|
105
|
+
*/
|
|
106
|
+
reset(): Promise<void>;
|
|
107
|
+
/**
|
|
108
|
+
* Persist the current state to the KeyValueStore.
|
|
109
|
+
*
|
|
110
|
+
* This method is typically called in response to a PERSIST_STATE event, but can also be called
|
|
111
|
+
* directly when needed.
|
|
112
|
+
*
|
|
113
|
+
* @param eventData Optional data associated with a PERSIST_STATE event
|
|
114
|
+
*/
|
|
115
|
+
persistState(eventData?: {
|
|
116
|
+
isMigrating: boolean;
|
|
117
|
+
}): Promise<void>;
|
|
118
|
+
/**
|
|
119
|
+
* Load the saved state from the KeyValueStore
|
|
120
|
+
*/
|
|
121
|
+
private loadSavedState;
|
|
122
|
+
}
|
|
123
|
+
//# sourceMappingURL=recoverable_state.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"recoverable_state.d.ts","sourceRoot":"","sources":["../src/recoverable_state.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAA4B,MAAM,eAAe,CAAC;AAExE,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,YAAY,CAAC;AAGtC,MAAM,WAAW,kCAAkC;IAC/C;;OAEG;IACH,eAAe,EAAE,MAAM,CAAC;IAExB;;OAEG;IACH,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAE7B;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAE7B;;;OAGG;IACH,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC9B;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB,CAAC,WAAW,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAC1E,SAAQ,kCAAkC;IAC1C;;;OAGG;IACH,YAAY,EAAE,WAAW,CAAC;IAE1B;;OAEG;IACH,MAAM,CAAC,EAAE,GAAG,CAAC;IAEb;;OAEG;IACH,MAAM,CAAC,EAAE,aAAa,CAAC;IAEvB;;;OAGG;IACH,SAAS,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,KAAK,MAAM,CAAC;IAE3C;;;;OAIG;IACH,WAAW,CAAC,EAAE,CAAC,eAAe,EAAE,MAAM,KAAK,WAAW,CAAC;CAC1D;AAED;;;;;;;;;GASG;AACH,qBAAa,gBAAgB,CAAC,WAAW,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC;IAC/D,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAc;IAC3C,OAAO,CAAC,KAAK,CAA4B;IACzC,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAU;IAC7C,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAS;IACzC,OAAO,CAAC,QAAQ,CAAC,mBAAmB,CAAC,CAAS;IAC9C,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAS;IAC5C,OAAO,CAAC,aAAa,CAA8B;IACnD,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAM;IAC1B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAiC;IAC3D,OAAO,CAAC,QAAQ,CAAC,WAAW,CAA2C;IAEvE;;;;OAIG;gBACS,OAAO,EAAE,uBAAuB,CAAC,WAAW,CAAC;IAczD;;;;;;;OAOG;IACG,UAAU,IAAI,OAAO,CAAC,WAAW,CAAC;IAuBxC;;;;;OAKG;IACG,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;IAU/B;;OAEG;IACH,IAAI,YAAY,IAAI,WAAW,CAM9B;IAED;;;;;OAKG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAY5B;;;;;;;OAOG;IACG,YAAY,CAAC,SAAS,CAAC,EAAE;QAAE,WAAW,EAAE,OAAO,CAAA;KAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAcvE;;OAEG;YACW,cAAc;CAY/B"}
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import { Configuration, KeyValueStore } from '@crawlee/core';
|
|
2
|
+
import log from '@apify/log';
|
|
3
|
+
/**
|
|
4
|
+
* A class for managing persistent recoverable state using a plain JavaScript object.
|
|
5
|
+
*
|
|
6
|
+
* This class facilitates state persistence to a `KeyValueStore`, allowing data to be saved and retrieved
|
|
7
|
+
* across migrations or restarts. It manages the loading, saving, and resetting of state data,
|
|
8
|
+
* with optional persistence capabilities.
|
|
9
|
+
*
|
|
10
|
+
* The state is represented by a plain JavaScript object that can be serialized to and deserialized from JSON.
|
|
11
|
+
* The class automatically hooks into the event system to persist state when needed.
|
|
12
|
+
*/
|
|
13
|
+
export class RecoverableState {
|
|
14
|
+
defaultState;
|
|
15
|
+
state = null;
|
|
16
|
+
persistenceEnabled;
|
|
17
|
+
persistStateKey;
|
|
18
|
+
persistStateKvsName;
|
|
19
|
+
persistStateKvsId;
|
|
20
|
+
keyValueStore = null;
|
|
21
|
+
log;
|
|
22
|
+
config;
|
|
23
|
+
serialize;
|
|
24
|
+
deserialize;
|
|
25
|
+
/**
|
|
26
|
+
* Initialize a new recoverable state object.
|
|
27
|
+
*
|
|
28
|
+
* @param options Configuration options for the recoverable state
|
|
29
|
+
*/
|
|
30
|
+
constructor(options) {
|
|
31
|
+
this.defaultState = options.defaultState;
|
|
32
|
+
this.persistStateKey = options.persistStateKey;
|
|
33
|
+
this.persistenceEnabled = options.persistenceEnabled ?? false;
|
|
34
|
+
this.persistStateKvsName = options.persistStateKvsName;
|
|
35
|
+
this.persistStateKvsId = options.persistStateKvsId;
|
|
36
|
+
this.log = options.logger ?? log.child({ prefix: 'RecoverableState' });
|
|
37
|
+
this.config = options.config ?? Configuration.getGlobalConfig();
|
|
38
|
+
this.serialize = options.serialize ?? JSON.stringify;
|
|
39
|
+
this.deserialize = options.deserialize ?? JSON.parse;
|
|
40
|
+
this.persistState = this.persistState.bind(this);
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Initialize the recoverable state.
|
|
44
|
+
*
|
|
45
|
+
* This method must be called before using the recoverable state. It loads the saved state
|
|
46
|
+
* if persistence is enabled and registers the object to listen for PERSIST_STATE events.
|
|
47
|
+
*
|
|
48
|
+
* @returns The loaded state object
|
|
49
|
+
*/
|
|
50
|
+
async initialize() {
|
|
51
|
+
if (this.state !== null && this.state !== undefined) {
|
|
52
|
+
return this.currentValue;
|
|
53
|
+
}
|
|
54
|
+
if (!this.persistenceEnabled) {
|
|
55
|
+
this.state = this.deserialize(this.serialize(this.defaultState));
|
|
56
|
+
return this.currentValue;
|
|
57
|
+
}
|
|
58
|
+
this.keyValueStore = await KeyValueStore.open(this.persistStateKvsName ?? this.persistStateKvsId, {
|
|
59
|
+
config: this.config,
|
|
60
|
+
});
|
|
61
|
+
await this.loadSavedState();
|
|
62
|
+
// Register for persist state events
|
|
63
|
+
const eventManager = this.config.getEventManager();
|
|
64
|
+
eventManager.on("persistState" /* EventType.PERSIST_STATE */, this.persistState);
|
|
65
|
+
return this.currentValue;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Clean up resources used by the recoverable state.
|
|
69
|
+
*
|
|
70
|
+
* If persistence is enabled, this method deregisters the object from PERSIST_STATE events
|
|
71
|
+
* and persists the current state one last time.
|
|
72
|
+
*/
|
|
73
|
+
async teardown() {
|
|
74
|
+
if (!this.persistenceEnabled || !this.persistState) {
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
const eventManager = this.config.getEventManager();
|
|
78
|
+
eventManager.off("persistState" /* EventType.PERSIST_STATE */, this.persistState);
|
|
79
|
+
await this.persistState();
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Get the current state.
|
|
83
|
+
*/
|
|
84
|
+
get currentValue() {
|
|
85
|
+
if (this.state === null) {
|
|
86
|
+
throw new Error('Recoverable state has not yet been loaded');
|
|
87
|
+
}
|
|
88
|
+
return this.state;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Reset the state to the default values and clear any persisted state.
|
|
92
|
+
*
|
|
93
|
+
* Resets the current state to the default state and, if persistence is enabled,
|
|
94
|
+
* clears the persisted state from the KeyValueStore.
|
|
95
|
+
*/
|
|
96
|
+
async reset() {
|
|
97
|
+
this.state = this.deserialize(this.serialize(this.defaultState));
|
|
98
|
+
if (this.persistenceEnabled) {
|
|
99
|
+
if (this.keyValueStore === null) {
|
|
100
|
+
throw new Error('Recoverable state has not yet been initialized');
|
|
101
|
+
}
|
|
102
|
+
await this.keyValueStore.setValue(this.persistStateKey, null);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Persist the current state to the KeyValueStore.
|
|
107
|
+
*
|
|
108
|
+
* This method is typically called in response to a PERSIST_STATE event, but can also be called
|
|
109
|
+
* directly when needed.
|
|
110
|
+
*
|
|
111
|
+
* @param eventData Optional data associated with a PERSIST_STATE event
|
|
112
|
+
*/
|
|
113
|
+
async persistState(eventData) {
|
|
114
|
+
this.log.debug(`Persisting state of the RecoverableState (eventData=${JSON.stringify(eventData)}).`);
|
|
115
|
+
if (this.keyValueStore === null || this.state === null) {
|
|
116
|
+
throw new Error('Recoverable state has not yet been initialized');
|
|
117
|
+
}
|
|
118
|
+
if (this.persistenceEnabled) {
|
|
119
|
+
await this.keyValueStore.setValue(this.persistStateKey, this.serialize(this.state), {
|
|
120
|
+
contentType: 'text/plain', // HACK - the result is expected to be JSON, but we do this to avoid the implicit JSON.parse in `KeyValueStore.getValue`
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Load the saved state from the KeyValueStore
|
|
126
|
+
*/
|
|
127
|
+
async loadSavedState() {
|
|
128
|
+
if (this.keyValueStore === null) {
|
|
129
|
+
throw new Error('Recoverable state has not yet been initialized');
|
|
130
|
+
}
|
|
131
|
+
const storedState = await this.keyValueStore.getValue(this.persistStateKey);
|
|
132
|
+
if (storedState === null || storedState === undefined) {
|
|
133
|
+
this.state = this.deserialize(this.serialize(this.defaultState));
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
this.state = this.deserialize(storedState);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
//# sourceMappingURL=recoverable_state.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"recoverable_state.js","sourceRoot":"","sources":["../src/recoverable_state.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAa,aAAa,EAAE,MAAM,eAAe,CAAC;AAGxE,OAAO,GAAG,MAAM,YAAY,CAAC;AA6D7B;;;;;;;;;GASG;AACH,MAAM,OAAO,gBAAgB;IACR,YAAY,CAAc;IACnC,KAAK,GAAuB,IAAI,CAAC;IACxB,kBAAkB,CAAU;IAC5B,eAAe,CAAS;IACxB,mBAAmB,CAAU;IAC7B,iBAAiB,CAAU;IACpC,aAAa,GAAyB,IAAI,CAAC;IAClC,GAAG,CAAM;IACT,MAAM,CAAgB;IACtB,SAAS,CAAiC;IAC1C,WAAW,CAA2C;IAEvE;;;;OAIG;IACH,YAAY,OAA6C;QACrD,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;QACzC,IAAI,CAAC,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC;QAC/C,IAAI,CAAC,kBAAkB,GAAG,OAAO,CAAC,kBAAkB,IAAI,KAAK,CAAC;QAC9D,IAAI,CAAC,mBAAmB,GAAG,OAAO,CAAC,mBAAmB,CAAC;QACvD,IAAI,CAAC,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,CAAC;QACnD,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,MAAM,IAAI,GAAG,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,kBAAkB,EAAE,CAAC,CAAC;QACvE,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,aAAa,CAAC,eAAe,EAAE,CAAC;QAChE,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC;QACrD,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,IAAI,CAAC,KAAK,CAAC;QAErD,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACrD,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,UAAU;QACZ,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;YAClD,OAAO,IAAI,CAAC,YAAY,CAAC;QAC7B,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,kBAAkB,EAAE,CAAC;YAC3B,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC;YACjE,OAAO,IAAI,CAAC,YAAY,CAAC;QAC7B,CAAC;QAED,IAAI,CAAC,aAAa,GAAG,MAAM,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,mBAAmB,IAAI,IAAI,CAAC,iBAAiB,EAAE;YAC9F,MAAM,EAAE,IAAI,CAAC,MAAM;SACtB,CAAC,CAAC;QAEH,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC;QAE5B,oCAAoC;QACpC,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;QACnD,YAAY,CAAC,EAAE,+CAA0B,IAAI,CAAC,YAAY,CAAC,CAAC;QAE5D,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,QAAQ;QACV,IAAI,CAAC,IAAI,CAAC,kBAAkB,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACjD,OAAO;QACX,CAAC;QAED,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;QACnD,YAAY,CAAC,GAAG,+CAA0B,IAAI,CAAC,YAAY,CAAC,CAAC;QAC7D,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;IAC9B,CAAC;IAED;;OAEG;IACH,IAAI,YAAY;QACZ,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;QACjE,CAAC;QAED,OAAO,IAAI,CAAC,KAAK,CAAC;IACtB,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,KAAK;QACP,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC;QAEjE,IAAI,IAAI,CAAC,kBAAkB,EAAE,CAAC;YAC1B,IAAI,IAAI,CAAC,aAAa,KAAK,IAAI,EAAE,CAAC;gBAC9B,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;YACtE,CAAC;YAED,MAAM,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC,eAAe,EAAE,IAAI,CAAC,CAAC;QAClE,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,YAAY,CAAC,SAAoC;QACnD,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,uDAAuD,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAErG,IAAI,IAAI,CAAC,aAAa,KAAK,IAAI,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,EAAE,CAAC;YACrD,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;QACtE,CAAC;QAED,IAAI,IAAI,CAAC,kBAAkB,EAAE,CAAC;YAC1B,MAAM,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC,eAAe,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE;gBAChF,WAAW,EAAE,YAAY,EAAE,wHAAwH;aACtJ,CAAC,CAAC;QACP,CAAC;IACL,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,cAAc;QACxB,IAAI,IAAI,CAAC,aAAa,KAAK,IAAI,EAAE,CAAC;YAC9B,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;QACtE,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAC5E,IAAI,WAAW,KAAK,IAAI,IAAI,WAAW,KAAK,SAAS,EAAE,CAAC;YACpD,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC;QACrE,CAAC;aAAM,CAAC;YACJ,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,WAAqB,CAAC,CAAC;QACzD,CAAC;IACL,CAAC;CACJ"}
|