@crawlee/core 4.0.0-beta.5 → 4.0.0-beta.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/README.md +9 -5
  2. package/autoscaling/autoscaled_pool.d.ts +3 -5
  3. package/autoscaling/autoscaled_pool.d.ts.map +1 -1
  4. package/autoscaling/autoscaled_pool.js +3 -9
  5. package/autoscaling/autoscaled_pool.js.map +1 -1
  6. package/autoscaling/snapshotter.d.ts +3 -13
  7. package/autoscaling/snapshotter.d.ts.map +1 -1
  8. package/autoscaling/snapshotter.js +18 -29
  9. package/autoscaling/snapshotter.js.map +1 -1
  10. package/autoscaling/system_status.d.ts +0 -3
  11. package/autoscaling/system_status.d.ts.map +1 -1
  12. package/autoscaling/system_status.js +2 -3
  13. package/autoscaling/system_status.js.map +1 -1
  14. package/configuration.d.ts +85 -227
  15. package/configuration.d.ts.map +1 -1
  16. package/configuration.js +159 -223
  17. package/configuration.js.map +1 -1
  18. package/cookie_utils.d.ts +1 -1
  19. package/cookie_utils.d.ts.map +1 -1
  20. package/cookie_utils.js +8 -8
  21. package/cookie_utils.js.map +1 -1
  22. package/crawlers/context_pipeline.d.ts +71 -0
  23. package/crawlers/context_pipeline.d.ts.map +1 -0
  24. package/crawlers/context_pipeline.js +123 -0
  25. package/crawlers/context_pipeline.js.map +1 -0
  26. package/crawlers/crawler_commons.d.ts +19 -27
  27. package/crawlers/crawler_commons.d.ts.map +1 -1
  28. package/crawlers/crawler_commons.js +12 -20
  29. package/crawlers/crawler_commons.js.map +1 -1
  30. package/crawlers/crawler_utils.js +1 -1
  31. package/crawlers/crawler_utils.js.map +1 -1
  32. package/crawlers/error_snapshotter.d.ts +3 -2
  33. package/crawlers/error_snapshotter.d.ts.map +1 -1
  34. package/crawlers/error_snapshotter.js +2 -2
  35. package/crawlers/error_snapshotter.js.map +1 -1
  36. package/crawlers/error_tracker.d.ts +2 -1
  37. package/crawlers/error_tracker.d.ts.map +1 -1
  38. package/crawlers/error_tracker.js.map +1 -1
  39. package/crawlers/index.d.ts +1 -1
  40. package/crawlers/index.d.ts.map +1 -1
  41. package/crawlers/index.js +1 -1
  42. package/crawlers/index.js.map +1 -1
  43. package/crawlers/internals/types.d.ts +8 -0
  44. package/crawlers/internals/types.d.ts.map +1 -0
  45. package/crawlers/internals/types.js +2 -0
  46. package/crawlers/internals/types.js.map +1 -0
  47. package/crawlers/statistics.d.ts +15 -15
  48. package/crawlers/statistics.d.ts.map +1 -1
  49. package/crawlers/statistics.js +21 -18
  50. package/crawlers/statistics.js.map +1 -1
  51. package/enqueue_links/enqueue_links.d.ts +32 -18
  52. package/enqueue_links/enqueue_links.d.ts.map +1 -1
  53. package/enqueue_links/enqueue_links.js +45 -24
  54. package/enqueue_links/enqueue_links.js.map +1 -1
  55. package/enqueue_links/shared.d.ts +25 -8
  56. package/enqueue_links/shared.d.ts.map +1 -1
  57. package/enqueue_links/shared.js +69 -37
  58. package/enqueue_links/shared.js.map +1 -1
  59. package/errors.d.ts +30 -0
  60. package/errors.d.ts.map +1 -1
  61. package/errors.js +44 -0
  62. package/errors.js.map +1 -1
  63. package/events/event_manager.d.ts +8 -5
  64. package/events/event_manager.d.ts.map +1 -1
  65. package/events/event_manager.js +7 -9
  66. package/events/event_manager.js.map +1 -1
  67. package/events/local_event_manager.d.ts +14 -4
  68. package/events/local_event_manager.d.ts.map +1 -1
  69. package/events/local_event_manager.js +33 -39
  70. package/events/local_event_manager.js.map +1 -1
  71. package/index.d.ts +3 -2
  72. package/index.d.ts.map +1 -1
  73. package/index.js +2 -1
  74. package/index.js.map +1 -1
  75. package/log.d.ts +82 -2
  76. package/log.d.ts.map +1 -1
  77. package/log.js +102 -0
  78. package/log.js.map +1 -1
  79. package/package.json +9 -10
  80. package/proxy_configuration.d.ts +14 -148
  81. package/proxy_configuration.d.ts.map +1 -1
  82. package/proxy_configuration.js +19 -167
  83. package/proxy_configuration.js.map +1 -1
  84. package/recoverable_state.d.ts +121 -0
  85. package/recoverable_state.d.ts.map +1 -0
  86. package/recoverable_state.js +142 -0
  87. package/recoverable_state.js.map +1 -0
  88. package/request.d.ts +74 -8
  89. package/request.d.ts.map +1 -1
  90. package/request.js +87 -18
  91. package/request.js.map +1 -1
  92. package/router.d.ts.map +1 -1
  93. package/router.js.map +1 -1
  94. package/serialization.js +1 -1
  95. package/serialization.js.map +1 -1
  96. package/service_locator.d.ts +162 -0
  97. package/service_locator.d.ts.map +1 -0
  98. package/service_locator.js +247 -0
  99. package/service_locator.js.map +1 -0
  100. package/session_pool/session.d.ts +9 -31
  101. package/session_pool/session.d.ts.map +1 -1
  102. package/session_pool/session.js +17 -21
  103. package/session_pool/session.js.map +1 -1
  104. package/session_pool/session_pool.d.ts +51 -57
  105. package/session_pool/session_pool.d.ts.map +1 -1
  106. package/session_pool/session_pool.js +86 -79
  107. package/session_pool/session_pool.js.map +1 -1
  108. package/storages/dataset.d.ts +63 -19
  109. package/storages/dataset.d.ts.map +1 -1
  110. package/storages/dataset.js +86 -22
  111. package/storages/dataset.js.map +1 -1
  112. package/storages/index.d.ts +2 -0
  113. package/storages/index.d.ts.map +1 -1
  114. package/storages/index.js +2 -0
  115. package/storages/index.js.map +1 -1
  116. package/storages/key_value_store.d.ts +79 -10
  117. package/storages/key_value_store.d.ts.map +1 -1
  118. package/storages/key_value_store.js +104 -23
  119. package/storages/key_value_store.js.map +1 -1
  120. package/storages/request_list.d.ts +9 -9
  121. package/storages/request_list.d.ts.map +1 -1
  122. package/storages/request_list.js +13 -8
  123. package/storages/request_list.js.map +1 -1
  124. package/storages/request_list_adapter.d.ts +58 -0
  125. package/storages/request_list_adapter.d.ts.map +1 -0
  126. package/storages/request_list_adapter.js +81 -0
  127. package/storages/request_list_adapter.js.map +1 -0
  128. package/storages/request_manager_tandem.d.ts +68 -0
  129. package/storages/request_manager_tandem.d.ts.map +1 -0
  130. package/storages/request_manager_tandem.js +124 -0
  131. package/storages/request_manager_tandem.js.map +1 -0
  132. package/storages/request_provider.d.ts +86 -23
  133. package/storages/request_provider.d.ts.map +1 -1
  134. package/storages/request_provider.js +114 -74
  135. package/storages/request_provider.js.map +1 -1
  136. package/storages/request_queue.d.ts +1 -3
  137. package/storages/request_queue.d.ts.map +1 -1
  138. package/storages/request_queue.js +2 -4
  139. package/storages/request_queue.js.map +1 -1
  140. package/storages/request_queue_v2.d.ts +3 -3
  141. package/storages/request_queue_v2.d.ts.map +1 -1
  142. package/storages/request_queue_v2.js +4 -5
  143. package/storages/request_queue_v2.js.map +1 -1
  144. package/storages/sitemap_request_list.d.ts +5 -5
  145. package/storages/sitemap_request_list.d.ts.map +1 -1
  146. package/storages/sitemap_request_list.js +10 -7
  147. package/storages/sitemap_request_list.js.map +1 -1
  148. package/storages/storage_manager.d.ts +22 -17
  149. package/storages/storage_manager.d.ts.map +1 -1
  150. package/storages/storage_manager.js +58 -52
  151. package/storages/storage_manager.js.map +1 -1
  152. package/storages/utils.d.ts.map +1 -1
  153. package/storages/utils.js +6 -5
  154. package/storages/utils.js.map +1 -1
  155. package/typedefs.d.ts +1 -1
  156. package/typedefs.d.ts.map +1 -1
  157. package/crawlers/crawler_extension.d.ts +0 -12
  158. package/crawlers/crawler_extension.d.ts.map +0 -1
  159. package/crawlers/crawler_extension.js +0 -14
  160. package/crawlers/crawler_extension.js.map +0 -1
  161. package/http_clients/base-http-client.d.ts +0 -134
  162. package/http_clients/base-http-client.d.ts.map +0 -1
  163. package/http_clients/base-http-client.js +0 -33
  164. package/http_clients/base-http-client.js.map +0 -1
  165. package/http_clients/form-data-like.d.ts +0 -67
  166. package/http_clients/form-data-like.d.ts.map +0 -1
  167. package/http_clients/form-data-like.js +0 -5
  168. package/http_clients/form-data-like.js.map +0 -1
  169. package/http_clients/got-scraping-http-client.d.ts +0 -15
  170. package/http_clients/got-scraping-http-client.d.ts.map +0 -1
  171. package/http_clients/got-scraping-http-client.js +0 -69
  172. package/http_clients/got-scraping-http-client.js.map +0 -1
  173. package/http_clients/index.d.ts +0 -3
  174. package/http_clients/index.d.ts.map +0 -1
  175. package/http_clients/index.js +0 -3
  176. package/http_clients/index.js.map +0 -1
  177. package/tsconfig.build.tsbuildinfo +0 -1
@@ -1,55 +1,5 @@
1
1
  import ow from 'ow';
2
- import log from '@apify/log';
3
- import { cryptoRandomObjectId } from '@apify/utilities';
4
- /**
5
- * Internal class for tracking the proxy tier history for a specific domain.
6
- *
7
- * Predicts the best proxy tier for the next request based on the error history for different proxy tiers.
8
- */
9
- class ProxyTierTracker {
10
- histogram;
11
- currentTier;
12
- constructor(tieredProxyUrls) {
13
- this.histogram = tieredProxyUrls.map(() => 0);
14
- this.currentTier = 0;
15
- }
16
- /**
17
- * Processes a single step of the algorithm and updates the current tier prediction based on the error history.
18
- */
19
- processStep() {
20
- this.histogram.forEach((x, i) => {
21
- if (this.currentTier === i)
22
- return;
23
- if (x > 0)
24
- this.histogram[i]--;
25
- });
26
- const left = this.currentTier > 0 ? this.histogram[this.currentTier - 1] : Infinity;
27
- const right = this.currentTier < this.histogram.length - 1 ? this.histogram[this.currentTier + 1] : Infinity;
28
- if (this.histogram[this.currentTier] > Math.min(left, right)) {
29
- this.currentTier = left <= right ? this.currentTier - 1 : this.currentTier + 1;
30
- }
31
- else if (this.histogram[this.currentTier] === left) {
32
- this.currentTier--;
33
- }
34
- }
35
- /**
36
- * Increases the error score for the given proxy tier. This raises the chance of picking a different proxy tier for the subsequent requests.
37
- *
38
- * The error score is increased by 10 for the given tier. This means that this tier will be disadvantaged for the next 10 requests (every new request prediction decreases the error score by 1).
39
- * @param tier The proxy tier to mark as problematic.
40
- */
41
- addError(tier) {
42
- this.histogram[tier] += 10;
43
- }
44
- /**
45
- * Returns the best proxy tier for the next request based on the error history for different proxy tiers.
46
- * @returns The proxy tier prediction
47
- */
48
- predictTier() {
49
- this.processStep();
50
- return this.currentTier;
51
- }
52
- }
2
+ import { serviceLocator } from './service_locator.js';
53
3
  /**
54
4
  * Configures connection to a proxy server with the provided options. Proxy servers are used to prevent target websites from blocking
55
5
  * your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
@@ -82,11 +32,9 @@ export class ProxyConfiguration {
82
32
  isManInTheMiddle = false;
83
33
  nextCustomUrlIndex = 0;
84
34
  proxyUrls;
85
- tieredProxyUrls;
86
35
  usedProxyUrls = new Map();
87
36
  newUrlFunction;
88
- log = log.child({ prefix: 'ProxyConfiguration' });
89
- domainTiers = new Map();
37
+ log = serviceLocator.getLogger().child({ prefix: 'ProxyConfiguration' });
90
38
  /**
91
39
  * Creates a {@link ProxyConfiguration} instance based on the provided options. Proxy servers are used to prevent target websites from
92
40
  * blocking your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
@@ -109,19 +57,21 @@ export class ProxyConfiguration {
109
57
  */
110
58
  constructor(options = {}) {
111
59
  const { validateRequired, ...rest } = options;
60
+ if ('tieredProxyUrls' in rest) {
61
+ throw new Error('The `tieredProxyUrls` option has been removed in Crawlee v4. ' +
62
+ 'See the v4 upgrading guide for the recommended migration to named sessions.');
63
+ }
112
64
  ow(rest, ow.object.exactShape({
113
- proxyUrls: ow.optional.array.nonEmpty.ofType(ow.string.url),
65
+ proxyUrls: ow.optional.array.nonEmpty.ofType(ow.any(ow.string.url, ow.null)),
114
66
  newUrlFunction: ow.optional.function,
115
- tieredProxyUrls: ow.optional.array.nonEmpty.ofType(ow.array.nonEmpty.ofType(ow.any(ow.string.url, ow.null))),
116
67
  }));
117
- const { proxyUrls, newUrlFunction, tieredProxyUrls } = options;
118
- if ([proxyUrls, newUrlFunction, tieredProxyUrls].filter((x) => x).length > 1)
68
+ const { proxyUrls, newUrlFunction } = options;
69
+ if (proxyUrls && newUrlFunction)
119
70
  this._throwCannotCombineCustomMethods();
120
71
  if (!proxyUrls && !newUrlFunction && validateRequired)
121
72
  this._throwNoOptionsProvided();
122
73
  this.proxyUrls = proxyUrls;
123
74
  this.newUrlFunction = newUrlFunction;
124
- this.tieredProxyUrls = tieredProxyUrls;
125
75
  }
126
76
  /**
127
77
  * This function creates a new {@link ProxyInfo} info object.
@@ -129,140 +79,42 @@ export class ProxyConfiguration {
129
79
  * the currently used proxy via the requestHandler parameter `proxyInfo`.
130
80
  * Use it if you want to work with a rich representation of a proxy URL.
131
81
  * If you need the URL string only, use {@link ProxyConfiguration.newUrl}.
132
- * @param [sessionId]
133
- * Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
134
- * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
135
- * When the provided sessionId is a number, it's converted to a string. Property sessionId of
136
- * {@link ProxyInfo} is always returned as a type string.
137
82
  *
138
- * All the HTTP requests going through the proxy with the same session identifier
139
- * will use the same target proxy server (i.e. the same IP address).
140
- * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
141
83
  * @return Represents information about used proxy and its configuration.
142
84
  */
143
- async newProxyInfo(sessionId, options) {
144
- if (typeof sessionId === 'number')
145
- sessionId = `${sessionId}`;
146
- let url;
147
- let tier;
148
- if (this.tieredProxyUrls) {
149
- const { proxyUrl, proxyTier } = this._handleTieredUrl(sessionId ?? cryptoRandomObjectId(6), options);
150
- url = proxyUrl ?? undefined;
151
- tier = proxyTier;
152
- }
153
- else {
154
- url = await this.newUrl(sessionId, options);
155
- }
85
+ async newProxyInfo(options) {
86
+ const url = await this.newUrl(options);
156
87
  if (!url)
157
88
  return undefined;
158
89
  const { username, password, port, hostname } = new URL(url);
159
90
  return {
160
- sessionId,
161
91
  url,
162
92
  username: decodeURIComponent(username),
163
93
  password: decodeURIComponent(password),
164
94
  hostname,
165
95
  port: port,
166
- proxyTier: tier,
167
96
  };
168
97
  }
169
98
  /**
170
- * Given a session identifier and a request / proxy tier, this function returns a new proxy URL based on the provided configuration options.
171
- * @param _sessionId Session identifier
172
- * @param options Options for the tiered proxy rotation
173
- * @returns An object with the proxy URL and the proxy tier used.
174
- */
175
- _handleTieredUrl(_sessionId, options) {
176
- if (!this.tieredProxyUrls)
177
- throw new Error('Tiered proxy URLs are not set');
178
- if (!options || (!options?.request && options?.proxyTier === undefined)) {
179
- const allProxyUrls = this.tieredProxyUrls.flat();
180
- return {
181
- proxyUrl: allProxyUrls[this.nextCustomUrlIndex++ % allProxyUrls.length],
182
- };
183
- }
184
- let tierPrediction = options.proxyTier;
185
- if (typeof tierPrediction !== 'number') {
186
- tierPrediction = this.predictProxyTier(options.request);
187
- }
188
- const proxyTier = this.tieredProxyUrls[tierPrediction];
189
- return {
190
- proxyUrl: proxyTier[this.nextCustomUrlIndex++ % proxyTier.length],
191
- proxyTier: tierPrediction,
192
- };
193
- }
194
- /**
195
- * Given a `Request` object, this function returns the tier of the proxy that should be used for the request.
196
- *
197
- * This returns `null` if `tieredProxyUrls` option is not set.
198
- */
199
- predictProxyTier(request) {
200
- if (!this.tieredProxyUrls)
201
- return null;
202
- const domain = new URL(request.url).hostname;
203
- if (!this.domainTiers.has(domain)) {
204
- this.domainTiers.set(domain, new ProxyTierTracker(this.tieredProxyUrls));
205
- }
206
- request.userData.__crawlee ??= {};
207
- const tracker = this.domainTiers.get(domain);
208
- if (typeof request.userData.__crawlee.lastProxyTier === 'number') {
209
- tracker.addError(request.userData.__crawlee.lastProxyTier);
210
- }
211
- const tierPrediction = tracker.predictTier();
212
- if (typeof request.userData.__crawlee.lastProxyTier === 'number' &&
213
- request.userData.__crawlee.lastProxyTier !== tierPrediction) {
214
- log.debug(`Changing proxy tier for domain "${domain}" from ${request.userData.__crawlee.lastProxyTier} to ${tierPrediction}.`);
215
- }
216
- request.userData.__crawlee.lastProxyTier = tierPrediction;
217
- request.userData.__crawlee.forefront = true;
218
- return tierPrediction;
219
- }
220
- /**
221
- * Returns a new proxy URL based on provided configuration options and the `sessionId` parameter.
222
- * @param [sessionId]
223
- * Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
224
- * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
225
- * When the provided sessionId is a number, it's converted to a string.
99
+ * Returns a new proxy URL based on provided configuration options.
226
100
  *
227
- * All the HTTP requests going through the proxy with the same session identifier
228
- * will use the same target proxy server (i.e. the same IP address).
229
- * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
230
101
  * @return A string with a proxy URL, including authentication credentials and port number.
231
102
  * For example, `http://bob:password123@proxy.example.com:8000`
232
103
  */
233
- async newUrl(sessionId, options) {
234
- if (typeof sessionId === 'number')
235
- sessionId = `${sessionId}`;
104
+ async newUrl(options) {
236
105
  if (this.newUrlFunction) {
237
- return (await this._callNewUrlFunction(sessionId, { request: options?.request })) ?? undefined;
238
- }
239
- if (this.tieredProxyUrls) {
240
- return this._handleTieredUrl(sessionId ?? cryptoRandomObjectId(6), options).proxyUrl ?? undefined;
106
+ return (await this._callNewUrlFunction({ request: options?.request })) ?? undefined;
241
107
  }
242
- return this._handleCustomUrl(sessionId);
108
+ return this._handleProxyUrlsList() ?? undefined;
243
109
  }
244
- /**
245
- * Handles custom url rotation with session
246
- */
247
- _handleCustomUrl(sessionId) {
248
- let customUrlToUse;
249
- if (!sessionId) {
250
- return this.proxyUrls[this.nextCustomUrlIndex++ % this.proxyUrls.length];
251
- }
252
- if (this.usedProxyUrls.has(sessionId)) {
253
- customUrlToUse = this.usedProxyUrls.get(sessionId);
254
- }
255
- else {
256
- customUrlToUse = this.proxyUrls[this.nextCustomUrlIndex++ % this.proxyUrls.length];
257
- this.usedProxyUrls.set(sessionId, customUrlToUse);
258
- }
259
- return customUrlToUse;
110
+ _handleProxyUrlsList() {
111
+ return this.proxyUrls[this.nextCustomUrlIndex++ % this.proxyUrls.length];
260
112
  }
261
113
  /**
262
114
  * Calls the custom newUrlFunction and checks format of its return value
263
115
  */
264
- async _callNewUrlFunction(sessionId, options) {
265
- const proxyUrl = await this.newUrlFunction(sessionId, options);
116
+ async _callNewUrlFunction(options) {
117
+ const proxyUrl = await this.newUrlFunction(options);
266
118
  try {
267
119
  if (proxyUrl) {
268
120
  new URL(proxyUrl); // eslint-disable-line no-new
@@ -1 +1 @@
1
- {"version":3,"file":"proxy_configuration.js","sourceRoot":"","sources":["../src/proxy_configuration.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,OAAO,GAAG,MAAM,YAAY,CAAC;AAC7B,OAAO,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAmHxD;;;;GAIG;AACH,MAAM,gBAAgB;IACV,SAAS,CAAW;IACpB,WAAW,CAAS;IAE5B,YAAY,eAAoC;QAC5C,IAAI,CAAC,SAAS,GAAG,eAAe,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;QAC9C,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC;IACzB,CAAC;IAED;;OAEG;IACK,WAAW;QACf,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAC5B,IAAI,IAAI,CAAC,WAAW,KAAK,CAAC;gBAAE,OAAO;YACnC,IAAI,CAAC,GAAG,CAAC;gBAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;QACnC,CAAC,CAAC,CAAC;QAEH,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;QACpF,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;QAE7G,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,CAAC;YAC3D,IAAI,CAAC,WAAW,GAAG,IAAI,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC;QACnF,CAAC;aAAM,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,KAAK,IAAI,EAAE,CAAC;YACnD,IAAI,CAAC,WAAW,EAAE,CAAC;QACvB,CAAC;IACL,CAAC;IAED;;;;;OAKG;IACH,QAAQ,CAAC,IAAY;QACjB,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IAC/B,CAAC;IAED;;;OAGG;IACH,WAAW;QACP,IAAI,CAAC,WAAW,EAAE,CAAC;QACnB,OAAO,IAAI,CAAC,WAAW,CAAC;IAC5B,CAAC;CACJ;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,MAAM,OAAO,kBAAkB;IAC3B,gBAAgB,GAAG,KAAK,CAAC;IACf,kBAAkB,GAAG,CAAC,CAAC;IACvB,SAAS,CAAY;IACrB,eAAe,CAAuB;IACtC,aAAa,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC1C,cAAc,CAA8B;IAC5C,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,oBAAoB,EAAE,CAAC,CAAC;IAClD,WAAW,GAAG,IAAI,GAAG,EAA4B,CAAC;IAE5D;;;;;;;;;;;;;;;;;;;OAmBG;IACH,YAAY,UAAqC,EAAE;QAC/C,MAAM,EAAE,gBAAgB,EAAE,GAAG,IAAI,EAAE,GAAG,OAAqB,CAAC;QAC5D,EAAE,CACE,IAAI,EACJ,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC;YACjB,SAAS,EAAE,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC;YAC3D,cAAc,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ;YACpC,eAAe,EAAE,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAC9C,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,CAC3D;SACJ,CAAC,CACL,CAAC;QAEF,MAAM,EAAE,SAAS,EAAE,cAAc,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC;QAE/D,IAAI,CAAC,SAAS,EAAE,cAAc,EAAE,eAAe,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC;YACxE,IAAI,CAAC,gCAAgC,EAAE,CAAC;QAC5C,IAAI,CAAC,SAAS,IAAI,CAAC,cAAc,IAAI,gBAAgB;YAAE,IAAI,CAAC,uBAAuB,EAAE,CAAC;QAEtF,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;QACrC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IAC3C,CAAC;IAED;;;;;;;;;;;;;;;;OAgBG;IACH,KAAK,CAAC,YAAY,CAAC,SAA2B,EAAE,OAA4B;QACxE,IAAI,OAAO,SAAS,KAAK,QAAQ;YAAE,SAAS,GAAG,GAAG,SAAS,EAAE,CAAC;QAE9D,IAAI,GAAuB,CAAC;QAC5B,IAAI,IAAwB,CAAC;QAC7B,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACvB,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,gBAAgB,CAAC,SAAS,IAAI,oBAAoB,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YACrG,GAAG,GAAG,QAAQ,IAAI,SAAS,CAAC;YAC5B,IAAI,GAAG,SAAS,CAAC;QACrB,CAAC;aAAM,CAAC;YACJ,GAAG,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QAChD,CAAC;QAED,IAAI,CAAC,GAAG;YAAE,OAAO,SAAS,CAAC;QAE3B,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAE5D,OAAO;YACH,SAAS;YACT,GAAG;YACH,QAAQ,EAAE,kBAAkB,CAAC,QAAQ,CAAC;YACtC,QAAQ,EAAE,kBAAkB,CAAC,QAAQ,CAAC;YACtC,QAAQ;YACR,IAAI,EAAE,IAAK;YACX,SAAS,EAAE,IAAI;SAClB,CAAC;IACN,CAAC;IAED;;;;;OAKG;IACO,gBAAgB,CAAC,UAAkB,EAAE,OAA4B;QACvE,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAC;QAE5E,IAAI,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,EAAE,OAAO,IAAI,OAAO,EAAE,SAAS,KAAK,SAAS,CAAC,EAAE,CAAC;YACtE,MAAM,YAAY,GAAG,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;YACjD,OAAO;gBACH,QAAQ,EAAE,YAAY,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,YAAY,CAAC,MAAM,CAAC;aAC1E,CAAC;QACN,CAAC;QAED,IAAI,cAAc,GAAG,OAAO,CAAC,SAAU,CAAC;QAExC,IAAI,OAAO,cAAc,KAAK,QAAQ,EAAE,CAAC;YACrC,cAAc,GAAG,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,OAAQ,CAAE,CAAC;QAC9D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,eAAgB,CAAC,cAAc,CAAC,CAAC;QAExD,OAAO;YACH,QAAQ,EAAE,SAAS,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,SAAS,CAAC,MAAM,CAAC;YACjE,SAAS,EAAE,cAAc;SAC5B,CAAC;IACN,CAAC;IAED;;;;OAIG;IACO,gBAAgB,CAAC,OAAgB;QACvC,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,OAAO,IAAI,CAAC;QAEvC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;QAC7C,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YAChC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,gBAAgB,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC;QAC7E,CAAC;QAED,OAAO,CAAC,QAAQ,CAAC,SAAS,KAAK,EAAE,CAAC;QAElC,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC;QAE9C,IAAI,OAAO,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,aAAa,KAAK,QAAQ,EAAE,CAAC;YAC/D,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;QAC/D,CAAC;QAED,MAAM,cAAc,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;QAE7C,IACI,OAAO,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,aAAa,KAAK,QAAQ;YAC5D,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,aAAa,KAAK,cAAc,EAC7D,CAAC;YACC,GAAG,CAAC,KAAK,CACL,mCAAmC,MAAM,UAAU,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,aAAa,OAAO,cAAc,GAAG,CACtH,CAAC;QACN,CAAC;QAED,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,aAAa,GAAG,cAAc,CAAC;QAC1D,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,SAAS,GAAG,IAAI,CAAC;QAE5C,OAAO,cAAc,CAAC;IAC1B,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,KAAK,CAAC,MAAM,CAAC,SAA2B,EAAE,OAA4B;QAClE,IAAI,OAAO,SAAS,KAAK,QAAQ;YAAE,SAAS,GAAG,GAAG,SAAS,EAAE,CAAC;QAE9D,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACtB,OAAO,CAAC,MAAM,IAAI,CAAC,mBAAmB,CAAC,SAAS,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC,IAAI,SAAS,CAAC;QACnG,CAAC;QAED,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACvB,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,IAAI,oBAAoB,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,QAAQ,IAAI,SAAS,CAAC;QACtG,CAAC;QAED,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;IAC5C,CAAC;IAED;;OAEG;IACO,gBAAgB,CAAC,SAAkB;QACzC,IAAI,cAAsB,CAAC;QAE3B,IAAI,CAAC,SAAS,EAAE,CAAC;YACb,OAAO,IAAI,CAAC,SAAU,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,IAAI,CAAC,SAAU,CAAC,MAAM,CAAC,CAAC;QAC/E,CAAC;QAED,IAAI,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;YACpC,cAAc,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,SAAS,CAAE,CAAC;QACxD,CAAC;aAAM,CAAC;YACJ,cAAc,GAAG,IAAI,CAAC,SAAU,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,IAAI,CAAC,SAAU,CAAC,MAAM,CAAC,CAAC;YACrF,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,SAAS,EAAE,cAAc,CAAC,CAAC;QACtD,CAAC;QAED,OAAO,cAAc,CAAC;IAC1B,CAAC;IAED;;OAEG;IACO,KAAK,CAAC,mBAAmB,CAAC,SAAkB,EAAE,OAA+B;QACnF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,cAAe,CAAC,SAAU,EAAE,OAAO,CAAC,CAAC;QACjE,IAAI,CAAC;YACD,IAAI,QAAQ,EAAE,CAAC;gBACX,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,6BAA6B;YACpD,CAAC;YACD,OAAO,QAAQ,CAAC;QACpB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,MAAM,IAAI,KAAK,CACX,mEAAoE,GAAa,CAAC,OAAO,EAAE,CAC9F,CAAC;QACN,CAAC;IACL,CAAC;IAES,gCAAgC;QACtC,MAAM,IAAI,KAAK,CACX,6GAA6G,CAChH,CAAC;IACN,CAAC;IAES,uBAAuB;QAC7B,MAAM,IAAI,KAAK,CAAC,8EAA8E,CAAC,CAAC;IACpG,CAAC;CACJ"}
1
+ {"version":3,"file":"proxy_configuration.js","sourceRoot":"","sources":["../src/proxy_configuration.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,IAAI,CAAC;AAGpB,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AA6BtD;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,MAAM,OAAO,kBAAkB;IAC3B,gBAAgB,GAAG,KAAK,CAAC;IACf,kBAAkB,GAAG,CAAC,CAAC;IACvB,SAAS,CAAW;IACpB,aAAa,GAAG,IAAI,GAAG,EAAyB,CAAC;IACjD,cAAc,CAA8B;IAC5C,GAAG,GAAG,cAAc,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,oBAAoB,EAAE,CAAC,CAAC;IAEnF;;;;;;;;;;;;;;;;;;;OAmBG;IACH,YAAY,UAAqC,EAAE;QAC/C,MAAM,EAAE,gBAAgB,EAAE,GAAG,IAAI,EAAE,GAAG,OAAqB,CAAC;QAE5D,IAAI,iBAAiB,IAAI,IAAI,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CACX,+DAA+D;gBAC3D,6EAA6E,CACpF,CAAC;QACN,CAAC;QAED,EAAE,CACE,IAAI,EACJ,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC;YACjB,SAAS,EAAE,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;YAC5E,cAAc,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ;SACvC,CAAC,CACL,CAAC;QAEF,MAAM,EAAE,SAAS,EAAE,cAAc,EAAE,GAAG,OAAO,CAAC;QAE9C,IAAI,SAAS,IAAI,cAAc;YAAE,IAAI,CAAC,gCAAgC,EAAE,CAAC;QACzE,IAAI,CAAC,SAAS,IAAI,CAAC,cAAc,IAAI,gBAAgB;YAAE,IAAI,CAAC,uBAAuB,EAAE,CAAC;QAEtF,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;IACzC,CAAC;IAED;;;;;;;;OAQG;IACH,KAAK,CAAC,YAAY,CAAC,OAAuB;QACtC,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACvC,IAAI,CAAC,GAAG;YAAE,OAAO,SAAS,CAAC;QAE3B,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAE5D,OAAO;YACH,GAAG;YACH,QAAQ,EAAE,kBAAkB,CAAC,QAAQ,CAAC;YACtC,QAAQ,EAAE,kBAAkB,CAAC,QAAQ,CAAC;YACtC,QAAQ;YACR,IAAI,EAAE,IAAK;SACd,CAAC;IACN,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,MAAM,CAAC,OAAuB;QAChC,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACtB,OAAO,CAAC,MAAM,IAAI,CAAC,mBAAmB,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC,IAAI,SAAS,CAAC;QACxF,CAAC;QAED,OAAO,IAAI,CAAC,oBAAoB,EAAE,IAAI,SAAS,CAAC;IACpD,CAAC;IAES,oBAAoB;QAC1B,OAAO,IAAI,CAAC,SAAU,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,IAAI,CAAC,SAAU,CAAC,MAAM,CAAC,CAAC;IAC/E,CAAC;IAED;;OAEG;IACO,KAAK,CAAC,mBAAmB,CAAC,OAA+B;QAC/D,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,cAAe,CAAC,OAAO,CAAC,CAAC;QACrD,IAAI,CAAC;YACD,IAAI,QAAQ,EAAE,CAAC;gBACX,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,6BAA6B;YACpD,CAAC;YACD,OAAO,QAAQ,CAAC;QACpB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,MAAM,IAAI,KAAK,CACX,mEAAoE,GAAa,CAAC,OAAO,EAAE,CAC9F,CAAC;QACN,CAAC;IACL,CAAC;IAES,gCAAgC;QACtC,MAAM,IAAI,KAAK,CACX,6GAA6G,CAChH,CAAC;IACN,CAAC;IAES,uBAAuB;QAC7B,MAAM,IAAI,KAAK,CAAC,8EAA8E,CAAC,CAAC;IACpG,CAAC;CACJ"}
@@ -0,0 +1,121 @@
1
+ import type { Configuration, CrawleeLogger } from '@crawlee/core';
2
+ export interface RecoverableStatePersistenceOptions {
3
+ /**
4
+ * The key under which the state is stored in the KeyValueStore
5
+ */
6
+ persistStateKey: string;
7
+ /**
8
+ * Flag to enable or disable state persistence
9
+ */
10
+ persistenceEnabled?: boolean;
11
+ /**
12
+ * The name of the KeyValueStore to use for persistence.
13
+ * If neither a name nor an id are supplied, the default store will be used.
14
+ */
15
+ persistStateKvsName?: string;
16
+ /**
17
+ * The identifier of the KeyValueStore to use for persistence.
18
+ * If neither a name nor an id are supplied, the default store will be used.
19
+ */
20
+ persistStateKvsId?: string;
21
+ }
22
+ /**
23
+ * Options for configuring the RecoverableState
24
+ */
25
+ export interface RecoverableStateOptions<TStateModel = Record<string, unknown>> extends RecoverableStatePersistenceOptions {
26
+ /**
27
+ * The default state used if no persisted state is found.
28
+ * A deep copy is made each time the state is used.
29
+ */
30
+ defaultState: TStateModel;
31
+ /**
32
+ * A logger instance for logging operations related to state persistence
33
+ */
34
+ logger?: CrawleeLogger;
35
+ /**
36
+ * Configuration instance to use
37
+ */
38
+ config?: Configuration;
39
+ /**
40
+ * Optional function to transform the state to a JSON string before persistence.
41
+ * If not provided, JSON.stringify will be used.
42
+ */
43
+ serialize?: (state: TStateModel) => string;
44
+ /**
45
+ * Optional function to transform a JSON-serialized object back to the state model.
46
+ * If not provided, JSON.parse is used.
47
+ * It is advisable to perform validation in this function and to throw an exception if it fails.
48
+ */
49
+ deserialize?: (serializedState: string) => TStateModel;
50
+ }
51
+ /**
52
+ * A class for managing persistent recoverable state using a plain JavaScript object.
53
+ *
54
+ * This class facilitates state persistence to a `KeyValueStore`, allowing data to be saved and retrieved
55
+ * across migrations or restarts. It manages the loading, saving, and resetting of state data,
56
+ * with optional persistence capabilities.
57
+ *
58
+ * The state is represented by a plain JavaScript object that can be serialized to and deserialized from JSON.
59
+ * The class automatically hooks into the event system to persist state when needed.
60
+ */
61
+ export declare class RecoverableState<TStateModel = Record<string, unknown>> {
62
+ private readonly defaultState;
63
+ private state;
64
+ private readonly persistenceEnabled;
65
+ private readonly persistStateKey;
66
+ private readonly persistStateKvsName?;
67
+ private readonly persistStateKvsId?;
68
+ private keyValueStore;
69
+ private readonly log;
70
+ private readonly serialize;
71
+ private readonly deserialize;
72
+ /**
73
+ * Initialize a new recoverable state object.
74
+ *
75
+ * @param options Configuration options for the recoverable state
76
+ */
77
+ constructor(options: RecoverableStateOptions<TStateModel>);
78
+ /**
79
+ * Initialize the recoverable state.
80
+ *
81
+ * This method must be called before using the recoverable state. It loads the saved state
82
+ * if persistence is enabled and registers the object to listen for PERSIST_STATE events.
83
+ *
84
+ * @returns The loaded state object
85
+ */
86
+ initialize(): Promise<TStateModel>;
87
+ /**
88
+ * Clean up resources used by the recoverable state.
89
+ *
90
+ * If persistence is enabled, this method deregisters the object from PERSIST_STATE events
91
+ * and persists the current state one last time.
92
+ */
93
+ teardown(): Promise<void>;
94
+ /**
95
+ * Get the current state.
96
+ */
97
+ get currentValue(): TStateModel;
98
+ /**
99
+ * Reset the state to the default values and clear any persisted state.
100
+ *
101
+ * Resets the current state to the default state and, if persistence is enabled,
102
+ * clears the persisted state from the KeyValueStore.
103
+ */
104
+ reset(): Promise<void>;
105
+ /**
106
+ * Persist the current state to the KeyValueStore.
107
+ *
108
+ * This method is typically called in response to a PERSIST_STATE event, but can also be called
109
+ * directly when needed.
110
+ *
111
+ * @param eventData Optional data associated with a PERSIST_STATE event
112
+ */
113
+ persistState(eventData?: {
114
+ isMigrating: boolean;
115
+ }): Promise<void>;
116
+ /**
117
+ * Load the saved state from the KeyValueStore
118
+ */
119
+ private loadSavedState;
120
+ }
121
+ //# sourceMappingURL=recoverable_state.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"recoverable_state.d.ts","sourceRoot":"","sources":["../src/recoverable_state.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAGlE,MAAM,WAAW,kCAAkC;IAC/C;;OAEG;IACH,eAAe,EAAE,MAAM,CAAC;IAExB;;OAEG;IACH,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAE7B;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAE7B;;;OAGG;IACH,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC9B;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB,CACpC,WAAW,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CACvC,SAAQ,kCAAkC;IACxC;;;OAGG;IACH,YAAY,EAAE,WAAW,CAAC;IAE1B;;OAEG;IACH,MAAM,CAAC,EAAE,aAAa,CAAC;IAEvB;;OAEG;IACH,MAAM,CAAC,EAAE,aAAa,CAAC;IAEvB;;;OAGG;IACH,SAAS,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,KAAK,MAAM,CAAC;IAE3C;;;;OAIG;IACH,WAAW,CAAC,EAAE,CAAC,eAAe,EAAE,MAAM,KAAK,WAAW,CAAC;CAC1D;AAED;;;;;;;;;GASG;AACH,qBAAa,gBAAgB,CAAC,WAAW,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC;IAC/D,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAc;IAC3C,OAAO,CAAC,KAAK,CAA4B;IACzC,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAU;IAC7C,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAS;IACzC,OAAO,CAAC,QAAQ,CAAC,mBAAmB,CAAC,CAAS;IAC9C,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAS;IAC5C,OAAO,CAAC,aAAa,CAA8B;IACnD,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAgB;IACpC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAiC;IAC3D,OAAO,CAAC,QAAQ,CAAC,WAAW,CAA2C;IAEvE;;;;OAIG;gBACS,OAAO,EAAE,uBAAuB,CAAC,WAAW,CAAC;IAazD;;;;;;;OAOG;IACG,UAAU,IAAI,OAAO,CAAC,WAAW,CAAC;IA6BxC;;;;;OAKG;IACG,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;IAU/B;;OAEG;IACH,IAAI,YAAY,IAAI,WAAW,CAM9B;IAED;;;;;OAKG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAY5B;;;;;;;OAOG;IACG,YAAY,CAAC,SAAS,CAAC,EAAE;QAAE,WAAW,EAAE,OAAO,CAAA;KAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAcvE;;OAEG;YACW,cAAc;CAY/B"}
@@ -0,0 +1,142 @@
1
+ import { KeyValueStore, serviceLocator } from '@crawlee/core';
2
+ /**
3
+ * A class for managing persistent recoverable state using a plain JavaScript object.
4
+ *
5
+ * This class facilitates state persistence to a `KeyValueStore`, allowing data to be saved and retrieved
6
+ * across migrations or restarts. It manages the loading, saving, and resetting of state data,
7
+ * with optional persistence capabilities.
8
+ *
9
+ * The state is represented by a plain JavaScript object that can be serialized to and deserialized from JSON.
10
+ * The class automatically hooks into the event system to persist state when needed.
11
+ */
12
+ export class RecoverableState {
13
+ defaultState;
14
+ state = null;
15
+ persistenceEnabled;
16
+ persistStateKey;
17
+ persistStateKvsName;
18
+ persistStateKvsId;
19
+ keyValueStore = null;
20
+ log;
21
+ serialize;
22
+ deserialize;
23
+ /**
24
+ * Initialize a new recoverable state object.
25
+ *
26
+ * @param options Configuration options for the recoverable state
27
+ */
28
+ constructor(options) {
29
+ this.defaultState = options.defaultState;
30
+ this.persistStateKey = options.persistStateKey;
31
+ this.persistenceEnabled = options.persistenceEnabled ?? false;
32
+ this.persistStateKvsName = options.persistStateKvsName;
33
+ this.persistStateKvsId = options.persistStateKvsId;
34
+ this.log = options.logger ?? serviceLocator.getLogger().child({ prefix: 'RecoverableState' });
35
+ this.serialize = options.serialize ?? JSON.stringify;
36
+ this.deserialize = options.deserialize ?? JSON.parse;
37
+ this.persistState = this.persistState.bind(this);
38
+ }
39
+ /**
40
+ * Initialize the recoverable state.
41
+ *
42
+ * This method must be called before using the recoverable state. It loads the saved state
43
+ * if persistence is enabled and registers the object to listen for PERSIST_STATE events.
44
+ *
45
+ * @returns The loaded state object
46
+ */
47
+ async initialize() {
48
+ if (this.state !== null && this.state !== undefined) {
49
+ return this.currentValue;
50
+ }
51
+ if (!this.persistenceEnabled) {
52
+ this.state = this.deserialize(this.serialize(this.defaultState));
53
+ return this.currentValue;
54
+ }
55
+ let kvsIdentifier = null;
56
+ if (this.persistStateKvsName) {
57
+ kvsIdentifier = { name: this.persistStateKvsName };
58
+ }
59
+ else if (this.persistStateKvsId) {
60
+ kvsIdentifier = { id: this.persistStateKvsId };
61
+ }
62
+ this.keyValueStore = await KeyValueStore.open(kvsIdentifier, { config: serviceLocator.getConfiguration() });
63
+ await this.loadSavedState();
64
+ // Register for persist state events
65
+ const eventManager = serviceLocator.getEventManager();
66
+ eventManager.on("persistState" /* EventType.PERSIST_STATE */, this.persistState);
67
+ return this.currentValue;
68
+ }
69
+ /**
70
+ * Clean up resources used by the recoverable state.
71
+ *
72
+ * If persistence is enabled, this method deregisters the object from PERSIST_STATE events
73
+ * and persists the current state one last time.
74
+ */
75
+ async teardown() {
76
+ if (!this.persistenceEnabled || !this.persistState) {
77
+ return;
78
+ }
79
+ const eventManager = serviceLocator.getEventManager();
80
+ eventManager.off("persistState" /* EventType.PERSIST_STATE */, this.persistState);
81
+ await this.persistState();
82
+ }
83
+ /**
84
+ * Get the current state.
85
+ */
86
+ get currentValue() {
87
+ if (this.state === null) {
88
+ throw new Error('Recoverable state has not yet been loaded');
89
+ }
90
+ return this.state;
91
+ }
92
+ /**
93
+ * Reset the state to the default values and clear any persisted state.
94
+ *
95
+ * Resets the current state to the default state and, if persistence is enabled,
96
+ * clears the persisted state from the KeyValueStore.
97
+ */
98
+ async reset() {
99
+ this.state = this.deserialize(this.serialize(this.defaultState));
100
+ if (this.persistenceEnabled) {
101
+ if (this.keyValueStore === null) {
102
+ throw new Error('Recoverable state has not yet been initialized');
103
+ }
104
+ await this.keyValueStore.setValue(this.persistStateKey, null);
105
+ }
106
+ }
107
+ /**
108
+ * Persist the current state to the KeyValueStore.
109
+ *
110
+ * This method is typically called in response to a PERSIST_STATE event, but can also be called
111
+ * directly when needed.
112
+ *
113
+ * @param eventData Optional data associated with a PERSIST_STATE event
114
+ */
115
+ async persistState(eventData) {
116
+ this.log.debug(`Persisting state of the RecoverableState (eventData=${JSON.stringify(eventData)}).`);
117
+ if (this.keyValueStore === null || this.state === null) {
118
+ throw new Error('Recoverable state has not yet been initialized');
119
+ }
120
+ if (this.persistenceEnabled) {
121
+ await this.keyValueStore.setValue(this.persistStateKey, this.serialize(this.state), {
122
+ contentType: 'text/plain', // HACK - the result is expected to be JSON, but we do this to avoid the implicit JSON.parse in `KeyValueStore.getValue`
123
+ });
124
+ }
125
+ }
126
+ /**
127
+ * Load the saved state from the KeyValueStore
128
+ */
129
+ async loadSavedState() {
130
+ if (this.keyValueStore === null) {
131
+ throw new Error('Recoverable state has not yet been initialized');
132
+ }
133
+ const storedState = await this.keyValueStore.getValue(this.persistStateKey);
134
+ if (storedState === null || storedState === undefined) {
135
+ this.state = this.deserialize(this.serialize(this.defaultState));
136
+ }
137
+ else {
138
+ this.state = this.deserialize(storedState);
139
+ }
140
+ }
141
+ }
142
+ //# sourceMappingURL=recoverable_state.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"recoverable_state.js","sourceRoot":"","sources":["../src/recoverable_state.ts"],"names":[],"mappings":"AACA,OAAO,EAAa,aAAa,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AA8DzE;;;;;;;;;GASG;AACH,MAAM,OAAO,gBAAgB;IACR,YAAY,CAAc;IACnC,KAAK,GAAuB,IAAI,CAAC;IACxB,kBAAkB,CAAU;IAC5B,eAAe,CAAS;IACxB,mBAAmB,CAAU;IAC7B,iBAAiB,CAAU;IACpC,aAAa,GAAyB,IAAI,CAAC;IAClC,GAAG,CAAgB;IACnB,SAAS,CAAiC;IAC1C,WAAW,CAA2C;IAEvE;;;;OAIG;IACH,YAAY,OAA6C;QACrD,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;QACzC,IAAI,CAAC,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC;QAC/C,IAAI,CAAC,kBAAkB,GAAG,OAAO,CAAC,kBAAkB,IAAI,KAAK,CAAC;QAC9D,IAAI,CAAC,mBAAmB,GAAG,OAAO,CAAC,mBAAmB,CAAC;QACvD,IAAI,CAAC,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,CAAC;QACnD,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,MAAM,IAAI,cAAc,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,kBAAkB,EAAE,CAAC,CAAC;QAC9F,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC;QACrD,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,IAAI,CAAC,KAAK,CAAC;QAErD,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACrD,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,UAAU;QACZ,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;YAClD,OAAO,IAAI,CAAC,YAAY,CAAC;QAC7B,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,kBAAkB,EAAE,CAAC;YAC3B,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC;YACjE,OAAO,IAAI,CAAC,YAAY,CAAC;QAC7B,CAAC;QAED,IAAI,aAAa,GAA6C,IAAI,CAAC;QAEnE,IAAI,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAC3B,aAAa,GAAG,EAAE,IAAI,EAAE,IAAI,CAAC,mBAAmB,EAAE,CAAC;QACvD,CAAC;aAAM,IAAI,IAAI,CAAC,iBAAiB,EAAE,CAAC;YAChC,aAAa,GAAG,EAAE,EAAE,EAAE,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACnD,CAAC;QAED,IAAI,CAAC,aAAa,GAAG,MAAM,aAAa,CAAC,IAAI,CAAC,aAAa,EAAE,EAAE,MAAM,EAAE,cAAc,CAAC,gBAAgB,EAAE,EAAE,CAAC,CAAC;QAE5G,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC;QAE5B,oCAAoC;QACpC,MAAM,YAAY,GAAG,cAAc,CAAC,eAAe,EAAE,CAAC;QACtD,YAAY,CAAC,EAAE,+CAA0B,IAAI,CAAC,YAAY,CAAC,CAAC;QAE5D,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,QAAQ;QACV,IAAI,CAAC,IAAI,CAAC,kBAAkB,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACjD,OAAO;QACX,CAAC;QAED,MAAM,YAAY,GAAG,cAAc,CAAC,eAAe,EAAE,CAAC;QACtD,YAAY,CAAC,GAAG,+CAA0B,IAAI,CAAC,YAAY,CAAC,CAAC;QAC7D,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;IAC9B,CAAC;IAED;;OAEG;IACH,IAAI,YAAY;QACZ,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;QACjE,CAAC;QAED,OAAO,IAAI,CAAC,KAAK,CAAC;IACtB,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,KAAK;QACP,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC;QAEjE,IAAI,IAAI,CAAC,kBAAkB,EAAE,CAAC;YAC1B,IAAI,IAAI,CAAC,aAAa,KAAK,IAAI,EAAE,CAAC;gBAC9B,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;YACtE,CAAC;YAED,MAAM,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC,eAAe,EAAE,IAAI,CAAC,CAAC;QAClE,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,YAAY,CAAC,SAAoC;QACnD,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,uDAAuD,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAErG,IAAI,IAAI,CAAC,aAAa,KAAK,IAAI,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,EAAE,CAAC;YACrD,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;QACtE,CAAC;QAED,IAAI,IAAI,CAAC,kBAAkB,EAAE,CAAC;YAC1B,MAAM,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC,eAAe,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE;gBAChF,WAAW,EAAE,YAAY,EAAE,wHAAwH;aACtJ,CAAC,CAAC;QACP,CAAC;IACL,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,cAAc;QACxB,IAAI,IAAI,CAAC,aAAa,KAAK,IAAI,EAAE,CAAC;YAC9B,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;QACtE,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAC5E,IAAI,WAAW,KAAK,IAAI,IAAI,WAAW,KAAK,SAAS,EAAE,CAAC;YACpD,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC;QACrE,CAAC;aAAM,CAAC;YACJ,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,WAAqB,CAAC,CAAC;QACzD,CAAC;IACL,CAAC;CACJ"}