@crawlee/core 3.13.5 → 4.0.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/autoscaling/autoscaled_pool.d.ts +3 -3
- package/autoscaling/autoscaled_pool.d.ts.map +1 -1
- package/autoscaling/autoscaled_pool.js +76 -225
- package/autoscaling/autoscaled_pool.js.map +1 -1
- package/autoscaling/index.d.ts +3 -3
- package/autoscaling/index.d.ts.map +1 -1
- package/autoscaling/index.js +3 -6
- package/autoscaling/index.js.map +1 -1
- package/autoscaling/snapshotter.d.ts +3 -3
- package/autoscaling/snapshotter.d.ts.map +1 -1
- package/autoscaling/snapshotter.js +42 -137
- package/autoscaling/snapshotter.js.map +1 -1
- package/autoscaling/system_status.d.ts +2 -2
- package/autoscaling/system_status.d.ts.map +1 -1
- package/autoscaling/system_status.js +20 -55
- package/autoscaling/system_status.js.map +1 -1
- package/configuration.d.ts +4 -4
- package/configuration.d.ts.map +1 -1
- package/configuration.js +79 -126
- package/configuration.js.map +1 -1
- package/cookie_utils.js +17 -25
- package/cookie_utils.js.map +1 -1
- package/crawlers/crawler_commons.d.ts +9 -8
- package/crawlers/crawler_commons.d.ts.map +1 -1
- package/crawlers/crawler_commons.js +47 -121
- package/crawlers/crawler_commons.js.map +1 -1
- package/crawlers/crawler_extension.d.ts +1 -1
- package/crawlers/crawler_extension.d.ts.map +1 -1
- package/crawlers/crawler_extension.js +4 -20
- package/crawlers/crawler_extension.js.map +1 -1
- package/crawlers/crawler_utils.d.ts +1 -1
- package/crawlers/crawler_utils.d.ts.map +1 -1
- package/crawlers/crawler_utils.js +3 -6
- package/crawlers/crawler_utils.js.map +1 -1
- package/crawlers/error_snapshotter.d.ts +3 -3
- package/crawlers/error_snapshotter.d.ts.map +1 -1
- package/crawlers/error_snapshotter.js +8 -38
- package/crawlers/error_snapshotter.js.map +1 -1
- package/crawlers/error_tracker.d.ts +2 -2
- package/crawlers/error_tracker.d.ts.map +1 -1
- package/crawlers/error_tracker.js +18 -40
- package/crawlers/error_tracker.js.map +1 -1
- package/crawlers/index.d.ts +6 -6
- package/crawlers/index.d.ts.map +1 -1
- package/crawlers/index.js +6 -9
- package/crawlers/index.js.map +1 -1
- package/crawlers/statistics.d.ts +3 -3
- package/crawlers/statistics.d.ts.map +1 -1
- package/crawlers/statistics.js +59 -167
- package/crawlers/statistics.js.map +1 -1
- package/enqueue_links/enqueue_links.d.ts +2 -2
- package/enqueue_links/enqueue_links.d.ts.map +1 -1
- package/enqueue_links/enqueue_links.js +41 -47
- package/enqueue_links/enqueue_links.js.map +1 -1
- package/enqueue_links/index.d.ts +2 -2
- package/enqueue_links/index.d.ts.map +1 -1
- package/enqueue_links/index.js +2 -5
- package/enqueue_links/index.js.map +1 -1
- package/enqueue_links/shared.d.ts +4 -4
- package/enqueue_links/shared.d.ts.map +1 -1
- package/enqueue_links/shared.js +23 -35
- package/enqueue_links/shared.js.map +1 -1
- package/errors.js +5 -13
- package/errors.js.map +1 -1
- package/events/event_manager.d.ts +1 -1
- package/events/event_manager.d.ts.map +1 -1
- package/events/event_manager.js +16 -45
- package/events/event_manager.js.map +1 -1
- package/events/index.d.ts +2 -2
- package/events/index.d.ts.map +1 -1
- package/events/index.js +2 -5
- package/events/index.js.map +1 -1
- package/events/local_event_manager.d.ts +1 -1
- package/events/local_event_manager.d.ts.map +1 -1
- package/events/local_event_manager.js +15 -28
- package/events/local_event_manager.js.map +1 -1
- package/http_clients/base-http-client.d.ts +1 -1
- package/http_clients/base-http-client.d.ts.map +1 -1
- package/http_clients/base-http-client.js +5 -8
- package/http_clients/base-http-client.js.map +1 -1
- package/http_clients/form-data-like.js +1 -2
- package/http_clients/form-data-like.js.map +1 -1
- package/http_clients/got-scraping-http-client.d.ts +1 -1
- package/http_clients/got-scraping-http-client.d.ts.map +1 -1
- package/http_clients/got-scraping-http-client.js +6 -11
- package/http_clients/got-scraping-http-client.js.map +1 -1
- package/http_clients/index.d.ts +2 -2
- package/http_clients/index.d.ts.map +1 -1
- package/http_clients/index.js +2 -5
- package/http_clients/index.js.map +1 -1
- package/index.d.ts +16 -16
- package/index.d.ts.map +1 -1
- package/index.js +17 -22
- package/index.js.map +1 -1
- package/log.js +2 -11
- package/log.js.map +1 -1
- package/package.json +27 -33
- package/proxy_configuration.d.ts +1 -1
- package/proxy_configuration.d.ts.map +1 -1
- package/proxy_configuration.js +22 -78
- package/proxy_configuration.js.map +1 -1
- package/request.d.ts +2 -2
- package/request.d.ts.map +1 -1
- package/request.js +76 -147
- package/request.js.map +1 -1
- package/router.d.ts +3 -3
- package/router.d.ts.map +1 -1
- package/router.js +6 -21
- package/router.js.map +1 -1
- package/serialization.d.ts.map +1 -1
- package/serialization.js +22 -44
- package/serialization.js.map +1 -1
- package/session_pool/consts.js +3 -6
- package/session_pool/consts.js.map +1 -1
- package/session_pool/errors.js +3 -11
- package/session_pool/errors.js.map +1 -1
- package/session_pool/events.js +1 -4
- package/session_pool/events.js.map +1 -1
- package/session_pool/index.d.ts +5 -5
- package/session_pool/index.d.ts.map +1 -1
- package/session_pool/index.js +5 -8
- package/session_pool/index.js.map +1 -1
- package/session_pool/session.d.ts +3 -17
- package/session_pool/session.d.ts.map +1 -1
- package/session_pool/session.js +52 -116
- package/session_pool/session.js.map +1 -1
- package/session_pool/session_pool.d.ts +7 -7
- package/session_pool/session_pool.d.ts.map +1 -1
- package/session_pool/session_pool.js +42 -126
- package/session_pool/session_pool.js.map +1 -1
- package/storages/access_checking.d.ts +1 -1
- package/storages/access_checking.d.ts.map +1 -1
- package/storages/access_checking.js +4 -9
- package/storages/access_checking.js.map +1 -1
- package/storages/dataset.d.ts +4 -4
- package/storages/dataset.d.ts.map +1 -1
- package/storages/dataset.js +45 -76
- package/storages/dataset.js.map +1 -1
- package/storages/index.d.ts +11 -11
- package/storages/index.d.ts.map +1 -1
- package/storages/index.js +11 -18
- package/storages/index.js.map +1 -1
- package/storages/key_value_store.d.ts +3 -3
- package/storages/key_value_store.d.ts.map +1 -1
- package/storages/key_value_store.js +62 -102
- package/storages/key_value_store.js.map +1 -1
- package/storages/request_list.d.ts +3 -3
- package/storages/request_list.d.ts.map +1 -1
- package/storages/request_list.js +83 -183
- package/storages/request_list.js.map +1 -1
- package/storages/request_provider.d.ts +5 -5
- package/storages/request_provider.d.ts.map +1 -1
- package/storages/request_provider.js +97 -201
- package/storages/request_provider.js.map +1 -1
- package/storages/request_queue.d.ts +4 -4
- package/storages/request_queue.d.ts.map +1 -1
- package/storages/request_queue.js +27 -40
- package/storages/request_queue.js.map +1 -1
- package/storages/request_queue_v2.d.ts +4 -4
- package/storages/request_queue_v2.d.ts.map +1 -1
- package/storages/request_queue_v2.js +18 -42
- package/storages/request_queue_v2.js.map +1 -1
- package/storages/sitemap_request_list.d.ts +4 -4
- package/storages/sitemap_request_list.d.ts.map +1 -1
- package/storages/sitemap_request_list.js +96 -181
- package/storages/sitemap_request_list.js.map +1 -1
- package/storages/storage_manager.d.ts +3 -3
- package/storages/storage_manager.d.ts.map +1 -1
- package/storages/storage_manager.js +14 -42
- package/storages/storage_manager.js.map +1 -1
- package/storages/utils.d.ts +1 -1
- package/storages/utils.d.ts.map +1 -1
- package/storages/utils.js +16 -23
- package/storages/utils.js.map +1 -1
- package/tsconfig.build.tsbuildinfo +1 -1
- package/typedefs.js +2 -6
- package/typedefs.js.map +1 -1
- package/validators.js +6 -10
- package/validators.js.map +1 -1
- package/index.mjs +0 -88
package/storages/utils.js
CHANGED
|
@@ -1,21 +1,14 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
exports.getRequestId = getRequestId;
|
|
7
|
-
const tslib_1 = require("tslib");
|
|
8
|
-
const node_crypto_1 = tslib_1.__importDefault(require("node:crypto"));
|
|
9
|
-
const configuration_1 = require("../configuration");
|
|
10
|
-
const key_value_store_1 = require("./key_value_store");
|
|
11
|
-
async function purgeDefaultStorages(configOrOptions, client) {
|
|
12
|
-
const options = configOrOptions instanceof configuration_1.Configuration
|
|
1
|
+
import crypto from 'node:crypto';
|
|
2
|
+
import { Configuration } from '../configuration.js';
|
|
3
|
+
import { KeyValueStore } from './key_value_store.js';
|
|
4
|
+
export async function purgeDefaultStorages(configOrOptions, client) {
|
|
5
|
+
const options = configOrOptions instanceof Configuration
|
|
13
6
|
? {
|
|
14
7
|
client,
|
|
15
8
|
config: configOrOptions,
|
|
16
9
|
}
|
|
17
10
|
: (configOrOptions ?? {});
|
|
18
|
-
const { config =
|
|
11
|
+
const { config = Configuration.getGlobalConfig(), onlyPurgeOnce = false } = options;
|
|
19
12
|
({ client = config.getStorageClient() } = options);
|
|
20
13
|
const casted = client;
|
|
21
14
|
// if `onlyPurgeOnce` is true, will purge anytime this function is called, otherwise - only on start
|
|
@@ -33,9 +26,9 @@ async function purgeDefaultStorages(configOrOptions, client) {
|
|
|
33
26
|
* @param defaultValue If the store does not yet have a value in it, the value will be initialized with the `defaultValue` you provide.
|
|
34
27
|
* @param options An optional object parameter where a custom `keyValueStoreName` and `config` can be passed in.
|
|
35
28
|
*/
|
|
36
|
-
async function useState(name, defaultValue = {}, options) {
|
|
37
|
-
const kvStore = await
|
|
38
|
-
config: options?.config ||
|
|
29
|
+
export async function useState(name, defaultValue = {}, options) {
|
|
30
|
+
const kvStore = await KeyValueStore.open(options?.keyValueStoreName, {
|
|
31
|
+
config: options?.config || Configuration.getGlobalConfig(),
|
|
39
32
|
});
|
|
40
33
|
return kvStore.getAutoSavedValue(name || 'CRAWLEE_GLOBAL_STATE', defaultValue);
|
|
41
34
|
}
|
|
@@ -48,32 +41,32 @@ async function useState(name, defaultValue = {}, options) {
|
|
|
48
41
|
*
|
|
49
42
|
* @internal
|
|
50
43
|
*/
|
|
51
|
-
function getRequestId(uniqueKey) {
|
|
52
|
-
const str =
|
|
44
|
+
export function getRequestId(uniqueKey) {
|
|
45
|
+
const str = crypto.createHash('sha256').update(uniqueKey).digest('base64').replace(/[+/=]/g, '');
|
|
53
46
|
return str.slice(0, 15);
|
|
54
47
|
}
|
|
55
48
|
/**
|
|
56
49
|
* When requesting queue head we always fetch requestsInProgressCount * QUERY_HEAD_BUFFER number of requests.
|
|
57
50
|
* @internal
|
|
58
51
|
*/
|
|
59
|
-
|
|
52
|
+
export const QUERY_HEAD_MIN_LENGTH = 100;
|
|
60
53
|
/**
|
|
61
54
|
* Indicates how long it usually takes for the underlying storage to propagate all writes
|
|
62
55
|
* to be available to subsequent reads.
|
|
63
56
|
* @internal
|
|
64
57
|
*/
|
|
65
|
-
|
|
58
|
+
export const STORAGE_CONSISTENCY_DELAY_MILLIS = 3000;
|
|
66
59
|
/** @internal */
|
|
67
|
-
|
|
60
|
+
export const QUERY_HEAD_BUFFER = 3;
|
|
68
61
|
/**
|
|
69
62
|
* If queue was modified (request added/updated/deleted) before more than API_PROCESSED_REQUESTS_DELAY_MILLIS
|
|
70
63
|
* then we assume the get head operation to be consistent.
|
|
71
64
|
* @internal
|
|
72
65
|
*/
|
|
73
|
-
|
|
66
|
+
export const API_PROCESSED_REQUESTS_DELAY_MILLIS = 10_000;
|
|
74
67
|
/**
|
|
75
68
|
* How many times we try to get queue head with queueModifiedAt older than API_PROCESSED_REQUESTS_DELAY_MILLIS.
|
|
76
69
|
* @internal
|
|
77
70
|
*/
|
|
78
|
-
|
|
71
|
+
export const MAX_QUERIES_FOR_CONSISTENCY = 6;
|
|
79
72
|
//# sourceMappingURL=utils.js.map
|
package/storages/utils.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.js","sourceRoot":"","sources":["../../src/storages/utils.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"utils.js","sourceRoot":"","sources":["../../src/storages/utils.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,aAAa,CAAC;AAIjC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAwCrD,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACtC,eAA4D,EAC5D,MAAsB;IAEtB,MAAM,OAAO,GACT,eAAe,YAAY,aAAa;QACpC,CAAC,CAAC;YACI,MAAM;YACN,MAAM,EAAE,eAAe;SAC1B;QACH,CAAC,CAAC,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC;IAClC,MAAM,EAAE,MAAM,GAAG,aAAa,CAAC,eAAe,EAAE,EAAE,aAAa,GAAG,KAAK,EAAE,GAAG,OAAO,CAAC;IACpF,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC,gBAAgB,EAAE,EAAE,GAAG,OAAO,CAAC,CAAC;IAEnD,MAAM,MAAM,GAAG,MAAgD,CAAC;IAEhE,oGAAoG;IACpG,IAAI,CAAC,aAAa,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;QACrE,MAAM,CAAC,QAAQ,GAAG,IAAI,CAAC;QACvB,MAAM,MAAM,CAAC,KAAK,EAAE,EAAE,CAAC;IAC3B,CAAC;AACL,CAAC;AAWD;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC1B,IAAa,EACb,eAAe,EAAW,EAC1B,OAAyB;IAEzB,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,IAAI,CAAC,OAAO,EAAE,iBAAiB,EAAE;QACjE,MAAM,EAAE,OAAO,EAAE,MAAM,IAAI,aAAa,CAAC,eAAe,EAAE;KAC7D,CAAC,CAAC;IACH,OAAO,OAAO,CAAC,iBAAiB,CAAQ,IAAI,IAAI,sBAAsB,EAAE,YAAY,CAAC,CAAC;AAC1F,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,YAAY,CAAC,SAAiB;IAC1C,MAAM,GAAG,GAAG,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IAEjG,OAAO,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AAC5B,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAEzC;;;;GAIG;AACH,MAAM,CAAC,MAAM,gCAAgC,GAAG,IAAI,CAAC;AAErD,gBAAgB;AAChB,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAAC,CAAC;AAEnC;;;;GAIG;AACH,MAAM,CAAC,MAAM,mCAAmC,GAAG,MAAM,CAAC;AAE1D;;;GAGG;AACH,MAAM,CAAC,MAAM,2BAA2B,GAAG,CAAC,CAAC"}
|