@crawlee/core 4.0.0-beta.5 → 4.0.0-beta.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -5
- package/autoscaling/autoscaled_pool.d.ts +3 -5
- package/autoscaling/autoscaled_pool.d.ts.map +1 -1
- package/autoscaling/autoscaled_pool.js +3 -9
- package/autoscaling/autoscaled_pool.js.map +1 -1
- package/autoscaling/snapshotter.d.ts +3 -13
- package/autoscaling/snapshotter.d.ts.map +1 -1
- package/autoscaling/snapshotter.js +18 -29
- package/autoscaling/snapshotter.js.map +1 -1
- package/autoscaling/system_status.d.ts +0 -3
- package/autoscaling/system_status.d.ts.map +1 -1
- package/autoscaling/system_status.js +2 -3
- package/autoscaling/system_status.js.map +1 -1
- package/configuration.d.ts +85 -227
- package/configuration.d.ts.map +1 -1
- package/configuration.js +159 -223
- package/configuration.js.map +1 -1
- package/cookie_utils.d.ts +1 -1
- package/cookie_utils.d.ts.map +1 -1
- package/cookie_utils.js +8 -8
- package/cookie_utils.js.map +1 -1
- package/crawlers/context_pipeline.d.ts +71 -0
- package/crawlers/context_pipeline.d.ts.map +1 -0
- package/crawlers/context_pipeline.js +123 -0
- package/crawlers/context_pipeline.js.map +1 -0
- package/crawlers/crawler_commons.d.ts +19 -27
- package/crawlers/crawler_commons.d.ts.map +1 -1
- package/crawlers/crawler_commons.js +12 -20
- package/crawlers/crawler_commons.js.map +1 -1
- package/crawlers/crawler_utils.js +1 -1
- package/crawlers/crawler_utils.js.map +1 -1
- package/crawlers/error_snapshotter.d.ts +3 -2
- package/crawlers/error_snapshotter.d.ts.map +1 -1
- package/crawlers/error_snapshotter.js +2 -2
- package/crawlers/error_snapshotter.js.map +1 -1
- package/crawlers/error_tracker.d.ts +2 -1
- package/crawlers/error_tracker.d.ts.map +1 -1
- package/crawlers/error_tracker.js.map +1 -1
- package/crawlers/index.d.ts +1 -1
- package/crawlers/index.d.ts.map +1 -1
- package/crawlers/index.js +1 -1
- package/crawlers/index.js.map +1 -1
- package/crawlers/internals/types.d.ts +8 -0
- package/crawlers/internals/types.d.ts.map +1 -0
- package/crawlers/internals/types.js +2 -0
- package/crawlers/internals/types.js.map +1 -0
- package/crawlers/statistics.d.ts +15 -15
- package/crawlers/statistics.d.ts.map +1 -1
- package/crawlers/statistics.js +21 -18
- package/crawlers/statistics.js.map +1 -1
- package/enqueue_links/enqueue_links.d.ts +32 -18
- package/enqueue_links/enqueue_links.d.ts.map +1 -1
- package/enqueue_links/enqueue_links.js +45 -24
- package/enqueue_links/enqueue_links.js.map +1 -1
- package/enqueue_links/shared.d.ts +25 -8
- package/enqueue_links/shared.d.ts.map +1 -1
- package/enqueue_links/shared.js +69 -37
- package/enqueue_links/shared.js.map +1 -1
- package/errors.d.ts +30 -0
- package/errors.d.ts.map +1 -1
- package/errors.js +44 -0
- package/errors.js.map +1 -1
- package/events/event_manager.d.ts +8 -5
- package/events/event_manager.d.ts.map +1 -1
- package/events/event_manager.js +7 -9
- package/events/event_manager.js.map +1 -1
- package/events/local_event_manager.d.ts +14 -4
- package/events/local_event_manager.d.ts.map +1 -1
- package/events/local_event_manager.js +33 -39
- package/events/local_event_manager.js.map +1 -1
- package/index.d.ts +3 -2
- package/index.d.ts.map +1 -1
- package/index.js +2 -1
- package/index.js.map +1 -1
- package/log.d.ts +82 -2
- package/log.d.ts.map +1 -1
- package/log.js +102 -0
- package/log.js.map +1 -1
- package/package.json +9 -10
- package/proxy_configuration.d.ts +14 -148
- package/proxy_configuration.d.ts.map +1 -1
- package/proxy_configuration.js +19 -167
- package/proxy_configuration.js.map +1 -1
- package/recoverable_state.d.ts +121 -0
- package/recoverable_state.d.ts.map +1 -0
- package/recoverable_state.js +142 -0
- package/recoverable_state.js.map +1 -0
- package/request.d.ts +74 -8
- package/request.d.ts.map +1 -1
- package/request.js +87 -18
- package/request.js.map +1 -1
- package/router.d.ts.map +1 -1
- package/router.js.map +1 -1
- package/serialization.js +1 -1
- package/serialization.js.map +1 -1
- package/service_locator.d.ts +162 -0
- package/service_locator.d.ts.map +1 -0
- package/service_locator.js +247 -0
- package/service_locator.js.map +1 -0
- package/session_pool/session.d.ts +9 -31
- package/session_pool/session.d.ts.map +1 -1
- package/session_pool/session.js +17 -21
- package/session_pool/session.js.map +1 -1
- package/session_pool/session_pool.d.ts +51 -57
- package/session_pool/session_pool.d.ts.map +1 -1
- package/session_pool/session_pool.js +86 -79
- package/session_pool/session_pool.js.map +1 -1
- package/storages/dataset.d.ts +63 -19
- package/storages/dataset.d.ts.map +1 -1
- package/storages/dataset.js +86 -22
- package/storages/dataset.js.map +1 -1
- package/storages/index.d.ts +2 -0
- package/storages/index.d.ts.map +1 -1
- package/storages/index.js +2 -0
- package/storages/index.js.map +1 -1
- package/storages/key_value_store.d.ts +79 -10
- package/storages/key_value_store.d.ts.map +1 -1
- package/storages/key_value_store.js +104 -23
- package/storages/key_value_store.js.map +1 -1
- package/storages/request_list.d.ts +9 -9
- package/storages/request_list.d.ts.map +1 -1
- package/storages/request_list.js +13 -8
- package/storages/request_list.js.map +1 -1
- package/storages/request_list_adapter.d.ts +58 -0
- package/storages/request_list_adapter.d.ts.map +1 -0
- package/storages/request_list_adapter.js +81 -0
- package/storages/request_list_adapter.js.map +1 -0
- package/storages/request_manager_tandem.d.ts +68 -0
- package/storages/request_manager_tandem.d.ts.map +1 -0
- package/storages/request_manager_tandem.js +124 -0
- package/storages/request_manager_tandem.js.map +1 -0
- package/storages/request_provider.d.ts +86 -23
- package/storages/request_provider.d.ts.map +1 -1
- package/storages/request_provider.js +114 -74
- package/storages/request_provider.js.map +1 -1
- package/storages/request_queue.d.ts +1 -3
- package/storages/request_queue.d.ts.map +1 -1
- package/storages/request_queue.js +2 -4
- package/storages/request_queue.js.map +1 -1
- package/storages/request_queue_v2.d.ts +3 -3
- package/storages/request_queue_v2.d.ts.map +1 -1
- package/storages/request_queue_v2.js +4 -5
- package/storages/request_queue_v2.js.map +1 -1
- package/storages/sitemap_request_list.d.ts +5 -5
- package/storages/sitemap_request_list.d.ts.map +1 -1
- package/storages/sitemap_request_list.js +10 -7
- package/storages/sitemap_request_list.js.map +1 -1
- package/storages/storage_manager.d.ts +22 -17
- package/storages/storage_manager.d.ts.map +1 -1
- package/storages/storage_manager.js +58 -52
- package/storages/storage_manager.js.map +1 -1
- package/storages/utils.d.ts.map +1 -1
- package/storages/utils.js +6 -5
- package/storages/utils.js.map +1 -1
- package/typedefs.d.ts +1 -1
- package/typedefs.d.ts.map +1 -1
- package/crawlers/crawler_extension.d.ts +0 -12
- package/crawlers/crawler_extension.d.ts.map +0 -1
- package/crawlers/crawler_extension.js +0 -14
- package/crawlers/crawler_extension.js.map +0 -1
- package/http_clients/base-http-client.d.ts +0 -134
- package/http_clients/base-http-client.d.ts.map +0 -1
- package/http_clients/base-http-client.js +0 -33
- package/http_clients/base-http-client.js.map +0 -1
- package/http_clients/form-data-like.d.ts +0 -67
- package/http_clients/form-data-like.d.ts.map +0 -1
- package/http_clients/form-data-like.js +0 -5
- package/http_clients/form-data-like.js.map +0 -1
- package/http_clients/got-scraping-http-client.d.ts +0 -15
- package/http_clients/got-scraping-http-client.d.ts.map +0 -1
- package/http_clients/got-scraping-http-client.js +0 -69
- package/http_clients/got-scraping-http-client.js.map +0 -1
- package/http_clients/index.d.ts +0 -3
- package/http_clients/index.d.ts.map +0 -1
- package/http_clients/index.js +0 -3
- package/http_clients/index.js.map +0 -1
- package/tsconfig.build.tsbuildinfo +0 -1
package/configuration.js
CHANGED
|
@@ -1,12 +1,76 @@
|
|
|
1
|
-
import { AsyncLocalStorage } from 'node:async_hooks';
|
|
2
1
|
import { EventEmitter } from 'node:events';
|
|
3
2
|
import { readFileSync } from 'node:fs';
|
|
4
3
|
import { join } from 'node:path';
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import
|
|
8
|
-
|
|
9
|
-
|
|
4
|
+
import { z } from 'zod';
|
|
5
|
+
import { log, LogLevel } from './log.js';
|
|
6
|
+
import { serviceLocator } from './service_locator.js';
|
|
7
|
+
// Crawlee attaches many listeners to shared EventEmitters (one per crawler/session/autoscaled pool),
|
|
8
|
+
// which can exceed Node's default limit of 10 and trigger spurious MaxListenersExceededWarning logs.
|
|
9
|
+
// Raising the global default avoids false positives; real leaks will still manifest as unbounded growth.
|
|
10
|
+
// TODO: tracked in https://github.com/apify/crawlee/issues/3615 — find a less side-effecting place for this.
|
|
11
|
+
EventEmitter.defaultMaxListeners = 50;
|
|
12
|
+
export function field(schema, envVar) {
|
|
13
|
+
return { schema, envVar };
|
|
14
|
+
}
|
|
15
|
+
// --- Zod preprocessors ---
|
|
16
|
+
/** Zod preprocessor treating `'0'` and `'false'` as falsy. */
|
|
17
|
+
export const coerceBoolean = z.preprocess((val) => {
|
|
18
|
+
if (typeof val === 'string') {
|
|
19
|
+
return !['0', 'false'].includes(val.toLowerCase());
|
|
20
|
+
}
|
|
21
|
+
return val;
|
|
22
|
+
}, z.boolean());
|
|
23
|
+
export const coerceNumber = z.preprocess((val) => {
|
|
24
|
+
if (typeof val === 'string')
|
|
25
|
+
return Number(val);
|
|
26
|
+
return val;
|
|
27
|
+
}, z.number());
|
|
28
|
+
/** Zod schema accepting both LogLevel enum values and string names (case-insensitive). */
|
|
29
|
+
const logLevelSchema = z.preprocess((val) => {
|
|
30
|
+
if (val == null)
|
|
31
|
+
return val;
|
|
32
|
+
const s = String(val);
|
|
33
|
+
if (Number.isFinite(+s))
|
|
34
|
+
return +s;
|
|
35
|
+
const key = s.toUpperCase();
|
|
36
|
+
if (key in LogLevel)
|
|
37
|
+
return LogLevel[key];
|
|
38
|
+
return val;
|
|
39
|
+
}, z.nativeEnum(LogLevel));
|
|
40
|
+
// --- Crawlee config field definitions ---
|
|
41
|
+
export const crawleeConfigFields = {
|
|
42
|
+
/** @default 'default' */
|
|
43
|
+
defaultDatasetId: field(z.string().default('default'), 'CRAWLEE_DEFAULT_DATASET_ID'),
|
|
44
|
+
/** @default true */
|
|
45
|
+
purgeOnStart: field(coerceBoolean.default(true), 'CRAWLEE_PURGE_ON_START'),
|
|
46
|
+
/** @default 'default' */
|
|
47
|
+
defaultKeyValueStoreId: field(z.string().default('default'), 'CRAWLEE_DEFAULT_KEY_VALUE_STORE_ID'),
|
|
48
|
+
/** @default 'default' */
|
|
49
|
+
defaultRequestQueueId: field(z.string().default('default'), 'CRAWLEE_DEFAULT_REQUEST_QUEUE_ID'),
|
|
50
|
+
/** @default 0.95 */
|
|
51
|
+
maxUsedCpuRatio: field(coerceNumber.default(0.95)),
|
|
52
|
+
/** @default 0.25 */
|
|
53
|
+
availableMemoryRatio: field(coerceNumber.default(0.25), 'CRAWLEE_AVAILABLE_MEMORY_RATIO'),
|
|
54
|
+
memoryMbytes: field(coerceNumber.optional(), 'CRAWLEE_MEMORY_MBYTES'),
|
|
55
|
+
/** @default 60_000 */
|
|
56
|
+
persistStateIntervalMillis: field(coerceNumber.default(60_000), 'CRAWLEE_PERSIST_STATE_INTERVAL_MILLIS'),
|
|
57
|
+
/** @default 1_000 */
|
|
58
|
+
systemInfoIntervalMillis: field(coerceNumber.default(1_000)),
|
|
59
|
+
/** @default 'INPUT' */
|
|
60
|
+
inputKey: field(z.string().default('INPUT'), 'CRAWLEE_INPUT_KEY'),
|
|
61
|
+
/** @default true */
|
|
62
|
+
headless: field(coerceBoolean.default(true), 'CRAWLEE_HEADLESS'),
|
|
63
|
+
/** @default false */
|
|
64
|
+
xvfb: field(coerceBoolean.default(false), 'CRAWLEE_XVFB'),
|
|
65
|
+
chromeExecutablePath: field(z.string().optional(), 'CRAWLEE_CHROME_EXECUTABLE_PATH'),
|
|
66
|
+
defaultBrowserPath: field(z.string().optional(), 'CRAWLEE_DEFAULT_BROWSER_PATH'),
|
|
67
|
+
/** @default false */
|
|
68
|
+
disableBrowserSandbox: field(coerceBoolean.default(false), 'CRAWLEE_DISABLE_BROWSER_SANDBOX'),
|
|
69
|
+
logLevel: field(logLevelSchema.optional(), 'CRAWLEE_LOG_LEVEL'),
|
|
70
|
+
/** @default true */
|
|
71
|
+
persistStorage: field(coerceBoolean.default(true), 'CRAWLEE_PERSIST_STORAGE'),
|
|
72
|
+
containerized: field(coerceBoolean.optional(), 'CRAWLEE_CONTAINERIZED'),
|
|
73
|
+
};
|
|
10
74
|
/**
|
|
11
75
|
* `Configuration` is a value object holding Crawlee configuration. By default, there is a
|
|
12
76
|
* global singleton instance of this class available via `Configuration.getGlobalConfig()`.
|
|
@@ -19,13 +83,9 @@ import { entries } from './typedefs.js';
|
|
|
19
83
|
*
|
|
20
84
|
* // Get the global configuration
|
|
21
85
|
* const config = Configuration.getGlobalConfig();
|
|
22
|
-
* //
|
|
23
|
-
*
|
|
24
|
-
* config.
|
|
25
|
-
*
|
|
26
|
-
* // No need to pass the configuration to the crawler,
|
|
27
|
-
* // as it's using the global configuration by default
|
|
28
|
-
* const crawler = new BasicCrawler();
|
|
86
|
+
* // Access configuration values directly as properties
|
|
87
|
+
* console.log(config.headless);
|
|
88
|
+
* console.log(config.persistStateIntervalMillis);
|
|
29
89
|
* ```
|
|
30
90
|
*
|
|
31
91
|
* *Using custom configuration:*
|
|
@@ -35,15 +95,14 @@ import { entries } from './typedefs.js';
|
|
|
35
95
|
* // Create a new configuration
|
|
36
96
|
* const config = new Configuration({ persistStateIntervalMillis: 30_000 });
|
|
37
97
|
* // Pass the configuration to the crawler
|
|
38
|
-
* const crawler = new BasicCrawler({
|
|
98
|
+
* const crawler = new BasicCrawler({ configuration: config });
|
|
39
99
|
* ```
|
|
40
100
|
*
|
|
41
|
-
*
|
|
42
|
-
*
|
|
43
|
-
* so the options provided in constructor will override those. In other words, the precedence is:
|
|
101
|
+
* Configuration is immutable — values are set via the constructor and cannot be changed afterwards.
|
|
102
|
+
* The priority order for resolving values is (highest to lowest):
|
|
44
103
|
*
|
|
45
104
|
* ```text
|
|
46
|
-
*
|
|
105
|
+
* constructor options > environment variables > crawlee.json > schema defaults
|
|
47
106
|
* ```
|
|
48
107
|
*
|
|
49
108
|
* ## Supported Configuration Options
|
|
@@ -65,242 +124,119 @@ import { entries } from './typedefs.js';
|
|
|
65
124
|
* Key | Environment Variable | Default Value
|
|
66
125
|
* ---|---|---
|
|
67
126
|
* `inputKey` | `CRAWLEE_INPUT_KEY` | `'INPUT'`
|
|
68
|
-
* `xvfb` | `CRAWLEE_XVFB` |
|
|
127
|
+
* `xvfb` | `CRAWLEE_XVFB` | `false`
|
|
69
128
|
* `chromeExecutablePath` | `CRAWLEE_CHROME_EXECUTABLE_PATH` | -
|
|
70
129
|
* `defaultBrowserPath` | `CRAWLEE_DEFAULT_BROWSER_PATH` | -
|
|
71
130
|
* `disableBrowserSandbox` | `CRAWLEE_DISABLE_BROWSER_SANDBOX` | -
|
|
72
131
|
* `availableMemoryRatio` | `CRAWLEE_AVAILABLE_MEMORY_RATIO` | `0.25`
|
|
73
|
-
* `
|
|
74
|
-
* `containerized | `CRAWLEE_CONTAINERIZED | -
|
|
132
|
+
* `containerized` | `CRAWLEE_CONTAINERIZED` | -
|
|
75
133
|
*/
|
|
134
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-declaration-merging
|
|
76
135
|
export class Configuration {
|
|
77
136
|
/**
|
|
78
|
-
*
|
|
79
|
-
|
|
80
|
-
static ENV_MAP = {
|
|
81
|
-
CRAWLEE_AVAILABLE_MEMORY_RATIO: 'availableMemoryRatio',
|
|
82
|
-
CRAWLEE_PURGE_ON_START: 'purgeOnStart',
|
|
83
|
-
CRAWLEE_MEMORY_MBYTES: 'memoryMbytes',
|
|
84
|
-
CRAWLEE_DEFAULT_DATASET_ID: 'defaultDatasetId',
|
|
85
|
-
CRAWLEE_DEFAULT_KEY_VALUE_STORE_ID: 'defaultKeyValueStoreId',
|
|
86
|
-
CRAWLEE_DEFAULT_REQUEST_QUEUE_ID: 'defaultRequestQueueId',
|
|
87
|
-
CRAWLEE_INPUT_KEY: 'inputKey',
|
|
88
|
-
CRAWLEE_PERSIST_STATE_INTERVAL_MILLIS: 'persistStateIntervalMillis',
|
|
89
|
-
CRAWLEE_HEADLESS: 'headless',
|
|
90
|
-
CRAWLEE_XVFB: 'xvfb',
|
|
91
|
-
CRAWLEE_CHROME_EXECUTABLE_PATH: 'chromeExecutablePath',
|
|
92
|
-
CRAWLEE_DEFAULT_BROWSER_PATH: 'defaultBrowserPath',
|
|
93
|
-
CRAWLEE_DISABLE_BROWSER_SANDBOX: 'disableBrowserSandbox',
|
|
94
|
-
CRAWLEE_LOG_LEVEL: 'logLevel',
|
|
95
|
-
CRAWLEE_PERSIST_STORAGE: 'persistStorage',
|
|
96
|
-
CRAWLEE_SYSTEM_INFO_V2: 'systemInfoV2',
|
|
97
|
-
CRAWLEE_CONTAINERIZED: 'containerized',
|
|
98
|
-
};
|
|
99
|
-
static BOOLEAN_VARS = [
|
|
100
|
-
'purgeOnStart',
|
|
101
|
-
'headless',
|
|
102
|
-
'xvfb',
|
|
103
|
-
'disableBrowserSandbox',
|
|
104
|
-
'persistStorage',
|
|
105
|
-
'systemInfoV2',
|
|
106
|
-
'containerized',
|
|
107
|
-
];
|
|
108
|
-
static INTEGER_VARS = ['memoryMbytes', 'persistStateIntervalMillis', 'systemInfoIntervalMillis'];
|
|
109
|
-
static COMMA_SEPARATED_LIST_VARS = [];
|
|
110
|
-
static DEFAULTS = {
|
|
111
|
-
defaultKeyValueStoreId: 'default',
|
|
112
|
-
defaultDatasetId: 'default',
|
|
113
|
-
defaultRequestQueueId: 'default',
|
|
114
|
-
inputKey: 'INPUT',
|
|
115
|
-
maxUsedCpuRatio: 0.95,
|
|
116
|
-
availableMemoryRatio: 0.25,
|
|
117
|
-
storageClientOptions: {},
|
|
118
|
-
purgeOnStart: true,
|
|
119
|
-
headless: true,
|
|
120
|
-
persistStateIntervalMillis: 60_000,
|
|
121
|
-
systemInfoIntervalMillis: 1_000,
|
|
122
|
-
persistStorage: true,
|
|
123
|
-
systemInfoV2: false,
|
|
124
|
-
};
|
|
125
|
-
/**
|
|
126
|
-
* Provides access to the current-instance-scoped Configuration without passing it around in parameters.
|
|
127
|
-
* @internal
|
|
137
|
+
* Field definitions for this configuration class.
|
|
138
|
+
* Subclasses override this to register additional fields.
|
|
128
139
|
*/
|
|
129
|
-
static
|
|
130
|
-
|
|
131
|
-
services = new Map();
|
|
132
|
-
/** @internal */
|
|
133
|
-
static globalConfig;
|
|
134
|
-
storageManagers = new Map();
|
|
140
|
+
static fields = crawleeConfigFields;
|
|
141
|
+
resolvedValues;
|
|
135
142
|
/**
|
|
136
|
-
* Creates new `Configuration` instance with provided options.
|
|
143
|
+
* Creates new `Configuration` instance with provided options.
|
|
144
|
+
* Constructor options take precedence over environment variables, which take precedence
|
|
145
|
+
* over crawlee.json values, which take precedence over schema defaults.
|
|
137
146
|
*/
|
|
138
147
|
constructor(options = {}) {
|
|
139
|
-
this.
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
: LogLevel[String(logLevel).toUpperCase()];
|
|
148
|
-
log.setLevel(level);
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
/**
|
|
152
|
-
* Returns configured value. First checks the environment variables, then provided configuration,
|
|
153
|
-
* fallbacks to the `defaultValue` argument if provided, otherwise uses the default value as described
|
|
154
|
-
* in the above section.
|
|
155
|
-
*/
|
|
156
|
-
get(key, defaultValue) {
|
|
157
|
-
// prefer env vars, always iterate through the whole map as there might be duplicate env vars for the same option
|
|
158
|
-
let envValue;
|
|
159
|
-
for (const [k, v] of entries(Configuration.ENV_MAP)) {
|
|
160
|
-
if (key === v) {
|
|
161
|
-
envValue = process.env[k];
|
|
162
|
-
if (envValue) {
|
|
163
|
-
break;
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
if (envValue != null) {
|
|
168
|
-
return this._castEnvValue(key, envValue);
|
|
169
|
-
}
|
|
170
|
-
// check instance level options
|
|
171
|
-
if (this.options.has(key)) {
|
|
172
|
-
return this.options.get(key);
|
|
148
|
+
const fields = this.constructor.fields;
|
|
149
|
+
const fileOptions = Configuration.loadFileOptions();
|
|
150
|
+
this.resolvedValues = Configuration.resolveAll(fields, options, fileOptions);
|
|
151
|
+
this.registerAccessors();
|
|
152
|
+
// Set the log level
|
|
153
|
+
const logLevel = this.logLevel;
|
|
154
|
+
if (logLevel != null) {
|
|
155
|
+
log.setLevel(logLevel);
|
|
173
156
|
}
|
|
174
|
-
// fallback to defaults
|
|
175
|
-
return (defaultValue ?? Configuration.DEFAULTS[key] ?? envValue);
|
|
176
|
-
}
|
|
177
|
-
_castEnvValue(key, value) {
|
|
178
|
-
if (Configuration.INTEGER_VARS.includes(key)) {
|
|
179
|
-
return +value;
|
|
180
|
-
}
|
|
181
|
-
if (Configuration.BOOLEAN_VARS.includes(key)) {
|
|
182
|
-
// 0, false and empty string are considered falsy values
|
|
183
|
-
return !['0', 'false', ''].includes(String(value).toLowerCase());
|
|
184
|
-
}
|
|
185
|
-
if (Configuration.COMMA_SEPARATED_LIST_VARS.includes(key)) {
|
|
186
|
-
if (!value)
|
|
187
|
-
return [];
|
|
188
|
-
return String(value)
|
|
189
|
-
.split(',')
|
|
190
|
-
.map((v) => v.trim());
|
|
191
|
-
}
|
|
192
|
-
return value;
|
|
193
157
|
}
|
|
194
158
|
/**
|
|
195
|
-
*
|
|
196
|
-
* To reset a value, we can omit the `value` argument or pass `undefined` there.
|
|
197
|
-
*/
|
|
198
|
-
set(key, value) {
|
|
199
|
-
this.options.set(key, value);
|
|
200
|
-
}
|
|
201
|
-
/**
|
|
202
|
-
* Sets value for given option. Only affects the global `Configuration` instance, the value will not be propagated down to the env var.
|
|
203
|
-
* To reset a value, we can omit the `value` argument or pass `undefined` there.
|
|
204
|
-
*/
|
|
205
|
-
static set(key, value) {
|
|
206
|
-
this.getGlobalConfig().set(key, value);
|
|
207
|
-
}
|
|
208
|
-
/**
|
|
209
|
-
* Returns cached instance of {@link StorageClient} using options as defined in the environment variables or in
|
|
210
|
-
* this {@link Configuration} instance. Only first call of this method will create the client, following calls will
|
|
211
|
-
* return the same client instance.
|
|
159
|
+
* Returns the global configuration instance. It will respect the environment variables.
|
|
212
160
|
*
|
|
213
|
-
*
|
|
214
|
-
* multiple instances, one for each variant of the options.
|
|
215
|
-
* @internal
|
|
161
|
+
* Delegates to the global ServiceLocator, making it the single source of truth for service management.
|
|
216
162
|
*/
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
return this.options.get('storageClient');
|
|
220
|
-
}
|
|
221
|
-
const options = this.options.get('storageClientOptions');
|
|
222
|
-
return this.createMemoryStorage(options);
|
|
223
|
-
}
|
|
224
|
-
getEventManager() {
|
|
225
|
-
if (this.options.has('eventManager')) {
|
|
226
|
-
return this.options.get('eventManager');
|
|
227
|
-
}
|
|
228
|
-
if (this.services.has('eventManager')) {
|
|
229
|
-
return this.services.get('eventManager');
|
|
230
|
-
}
|
|
231
|
-
const eventManager = new LocalEventManager(this);
|
|
232
|
-
this.services.set('eventManager', eventManager);
|
|
233
|
-
return eventManager;
|
|
163
|
+
static getGlobalConfig() {
|
|
164
|
+
return serviceLocator.getConfiguration();
|
|
234
165
|
}
|
|
235
166
|
/**
|
|
236
|
-
*
|
|
237
|
-
*
|
|
167
|
+
* Resolves all field values once using the priority chain:
|
|
168
|
+
* constructor options > env vars > crawlee.json > schema defaults.
|
|
238
169
|
*/
|
|
239
|
-
|
|
240
|
-
const
|
|
241
|
-
|
|
242
|
-
|
|
170
|
+
static resolveAll(fields, userOptions, fileOptions) {
|
|
171
|
+
const values = {};
|
|
172
|
+
for (const [key, fieldDef] of Object.entries(fields)) {
|
|
173
|
+
// 1. Constructor options (highest priority)
|
|
174
|
+
if (key in userOptions && userOptions[key] !== undefined) {
|
|
175
|
+
values[key] = fieldDef.schema.parse(userOptions[key]);
|
|
176
|
+
continue;
|
|
177
|
+
}
|
|
178
|
+
// 2. Environment variables
|
|
179
|
+
const envValue = Configuration.readEnvVar(fieldDef);
|
|
180
|
+
if (envValue != null) {
|
|
181
|
+
values[key] = fieldDef.schema.parse(envValue);
|
|
182
|
+
continue;
|
|
183
|
+
}
|
|
184
|
+
// 3. crawlee.json file options
|
|
185
|
+
if (key in fileOptions && fileOptions[key] !== undefined) {
|
|
186
|
+
values[key] = fieldDef.schema.parse(fileOptions[key]);
|
|
187
|
+
continue;
|
|
188
|
+
}
|
|
189
|
+
// 4. Schema default (by parsing undefined through the schema)
|
|
190
|
+
const result = fieldDef.schema.safeParse(undefined);
|
|
191
|
+
values[key] = result.success ? result.data : undefined;
|
|
243
192
|
}
|
|
244
|
-
|
|
245
|
-
persistStorage: this.get('persistStorage'),
|
|
246
|
-
// Override persistStorage if user provides it via storageClientOptions
|
|
247
|
-
...options,
|
|
248
|
-
});
|
|
249
|
-
this.services.set(cacheKey, storage);
|
|
250
|
-
return storage;
|
|
251
|
-
}
|
|
252
|
-
useStorageClient(client) {
|
|
253
|
-
this.options.set('storageClient', client);
|
|
254
|
-
}
|
|
255
|
-
static useStorageClient(client) {
|
|
256
|
-
this.getGlobalConfig().useStorageClient(client);
|
|
257
|
-
}
|
|
258
|
-
useEventManager(events) {
|
|
259
|
-
this.options.set('eventManager', events);
|
|
193
|
+
return values;
|
|
260
194
|
}
|
|
261
195
|
/**
|
|
262
|
-
*
|
|
196
|
+
* Registers getters (and throwing setters) on the instance for each field.
|
|
263
197
|
*/
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
198
|
+
registerAccessors() {
|
|
199
|
+
const fields = this.constructor.fields;
|
|
200
|
+
const descriptors = {};
|
|
201
|
+
for (const key of Object.keys(fields)) {
|
|
202
|
+
descriptors[key] = {
|
|
203
|
+
get: () => this.resolvedValues[key],
|
|
204
|
+
set() {
|
|
205
|
+
throw new TypeError('Configuration is immutable. Pass options via the constructor instead.');
|
|
206
|
+
},
|
|
207
|
+
enumerable: true,
|
|
208
|
+
configurable: false,
|
|
209
|
+
};
|
|
267
210
|
}
|
|
268
|
-
|
|
269
|
-
return Configuration.globalConfig;
|
|
270
|
-
}
|
|
271
|
-
/**
|
|
272
|
-
* Gets default {@link StorageClient} instance.
|
|
273
|
-
*/
|
|
274
|
-
static getStorageClient() {
|
|
275
|
-
return this.getGlobalConfig().getStorageClient();
|
|
211
|
+
Object.defineProperties(this, descriptors);
|
|
276
212
|
}
|
|
277
213
|
/**
|
|
278
|
-
*
|
|
214
|
+
* Reads the first defined env var value for a field definition.
|
|
215
|
+
* Empty strings are treated as unset, falling through to crawlee.json or schema defaults.
|
|
216
|
+
* (Crawlee v3 coerced `''` to `false`/`0`/`''` per type — v4 drops that for consistency.)
|
|
279
217
|
*/
|
|
280
|
-
static
|
|
281
|
-
|
|
218
|
+
static readEnvVar(fieldDef) {
|
|
219
|
+
if (!fieldDef.envVar)
|
|
220
|
+
return undefined;
|
|
221
|
+
const envVars = Array.isArray(fieldDef.envVar) ? fieldDef.envVar : [fieldDef.envVar];
|
|
222
|
+
for (const envVar of envVars) {
|
|
223
|
+
const value = process.env[envVar];
|
|
224
|
+
if (value != null && value !== '')
|
|
225
|
+
return value;
|
|
226
|
+
}
|
|
227
|
+
return undefined;
|
|
282
228
|
}
|
|
283
229
|
/**
|
|
284
|
-
*
|
|
285
|
-
* if we want to change them, we need to first reset the global state. Used mainly for testing purposes.
|
|
230
|
+
* Loads config options from crawlee.json in the current working directory.
|
|
286
231
|
*/
|
|
287
|
-
static
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
try {
|
|
295
|
-
const file = readFileSync(path);
|
|
296
|
-
const optionsFromFileConfig = JSON.parse(file.toString());
|
|
297
|
-
Object.assign(options, optionsFromFileConfig);
|
|
298
|
-
}
|
|
299
|
-
catch {
|
|
300
|
-
// ignore
|
|
301
|
-
}
|
|
232
|
+
static loadFileOptions() {
|
|
233
|
+
try {
|
|
234
|
+
const file = readFileSync(join(process.cwd(), 'crawlee.json'));
|
|
235
|
+
return JSON.parse(file.toString());
|
|
236
|
+
}
|
|
237
|
+
catch {
|
|
238
|
+
return {};
|
|
302
239
|
}
|
|
303
|
-
this.options = new Map(entries(options));
|
|
304
240
|
}
|
|
305
241
|
}
|
|
306
242
|
//# sourceMappingURL=configuration.js.map
|
package/configuration.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"configuration.js","sourceRoot":"","sources":["../src/configuration.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"configuration.js","sourceRoot":"","sources":["../src/configuration.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAEtD,qGAAqG;AACrG,qGAAqG;AACrG,yGAAyG;AACzG,6GAA6G;AAC7G,YAAY,CAAC,mBAAmB,GAAG,EAAE,CAAC;AAStC,MAAM,UAAU,KAAK,CAAsB,MAAS,EAAE,MAA0B;IAC5E,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC;AAC9B,CAAC;AAED,4BAA4B;AAE5B,8DAA8D;AAC9D,MAAM,CAAC,MAAM,aAAa,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC,GAAG,EAAE,EAAE;IAC9C,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QAC1B,OAAO,CAAC,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,OAAO,GAAG,CAAC;AACf,CAAC,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;AAEhB,MAAM,CAAC,MAAM,YAAY,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC,GAAG,EAAE,EAAE;IAC7C,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC;IAChD,OAAO,GAAG,CAAC;AACf,CAAC,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;AAEf,0FAA0F;AAC1F,MAAM,cAAc,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC,GAAG,EAAE,EAAE;IACxC,IAAI,GAAG,IAAI,IAAI;QAAE,OAAO,GAAG,CAAC;IAC5B,MAAM,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;IACtB,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;QAAE,OAAO,CAAC,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,CAAC,CAAC,WAAW,EAA2B,CAAC;IACrD,IAAI,GAAG,IAAI,QAAQ;QAAE,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC;IAC1C,OAAO,GAAG,CAAC;AACf,CAAC,EAAE,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC;AAE3B,2CAA2C;AAE3C,MAAM,CAAC,MAAM,mBAAmB,GAAG;IAC/B,yBAAyB;IACzB,gBAAgB,EAAE,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,4BAA4B,CAAC;IACpF,oBAAoB;IACpB,YAAY,EAAE,KAAK,CAAC,aAAa,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,wBAAwB,CAAC;IAC1E,yBAAyB;IACzB,sBAAsB,EAAE,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,oCAAoC,CAAC;IAClG,yBAAyB;IACzB,qBAAqB,EAAE,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,kCAAkC,CAAC;IAC/F,oBAAoB;IACpB,eAAe,EAAE,KAAK,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAClD,oBAAoB;IACpB,oBAAoB,EAAE,KAAK,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,gCAAgC,CAAC;IACzF,YAAY,EAAE,KAAK,CAAC,YAAY,CAAC,QAAQ,EAAE,EAAE,uBAAuB,CAAC;IACrE,sBAAsB;IACtB,0BAA0B,EAAE,KAAK,CAAC,YAAY,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,uCAAuC,CAAC;IACxG,qBAAqB;IACrB,wBAAwB,EAAE,KAAK,CAAC,YAAY,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAC5D,uBAAuB;IACvB,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,mBAAmB,CAAC;IACjE,oBAAoB;IACpB,QAAQ,EAAE,KAAK,CAAC,aAAa,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,kBAAkB,CAAC;IAChE,qBAAqB;IACrB,IAAI,EAAE,KAAK,CAAC,aAAa,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,cAAc,CAAC;IACzD,oBAAoB,EAAE,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,EAAE,gCAAgC,CAAC;IACpF,kBAAkB,EAAE,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,EAAE,8BAA8B,CAAC;IAChF,qBAAqB;IACrB,qBAAqB,EAAE,KAAK,CAAC,aAAa,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,iCAAiC,CAAC;IAC7F,QAAQ,EAAE,KAAK,CAAC,cAAc,CAAC,QAAQ,EAAE,EAAE,mBAAmB,CAAC;IAC/D,oBAAoB;IACpB,cAAc,EAAE,KAAK,CAAC,aAAa,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,yBAAyB,CAAC;IAC7E,aAAa,EAAE,KAAK,CAAC,aAAa,CAAC,QAAQ,EAAE,EAAE,uBAAuB,CAAC;CAC1E,CAAC;AAyBF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2DG;AACH,4EAA4E;AAC5E,MAAM,OAAO,aAAa;IACtB;;;OAGG;IACO,MAAM,CAAC,MAAM,GAAgC,mBAAmB,CAAC;IAEnE,cAAc,CAA0B;IAEhD;;;;OAIG;IACH,YAAY,UAA8B,EAAE;QACxC,MAAM,MAAM,GAAI,IAAI,CAAC,WAAoC,CAAC,MAAM,CAAC;QACjE,MAAM,WAAW,GAAG,aAAa,CAAC,eAAe,EAAE,CAAC;QACpD,IAAI,CAAC,cAAc,GAAG,aAAa,CAAC,UAAU,CAAC,MAAM,EAAE,OAAkC,EAAE,WAAW,CAAC,CAAC;QACxG,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAEzB,oBAAoB;QACpB,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;QAC/B,IAAI,QAAQ,IAAI,IAAI,EAAE,CAAC;YACnB,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC3B,CAAC;IACL,CAAC;IAED;;;;OAIG;IACH,MAAM,CAAC,eAAe;QAClB,OAAO,cAAc,CAAC,gBAAgB,EAAE,CAAC;IAC7C,CAAC;IAED;;;OAGG;IACK,MAAM,CAAC,UAAU,CACrB,MAAmC,EACnC,WAAoC,EACpC,WAAoC;QAEpC,MAAM,MAAM,GAA4B,EAAE,CAAC;QAE3C,KAAK,MAAM,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YACnD,4CAA4C;YAC5C,IAAI,GAAG,IAAI,WAAW,IAAI,WAAW,CAAC,GAAG,CAAC,KAAK,SAAS,EAAE,CAAC;gBACvD,MAAM,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;gBACtD,SAAS;YACb,CAAC;YAED,2BAA2B;YAC3B,MAAM,QAAQ,GAAG,aAAa,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;YACpD,IAAI,QAAQ,IAAI,IAAI,EAAE,CAAC;gBACnB,MAAM,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;gBAC9C,SAAS;YACb,CAAC;YAED,+BAA+B;YAC/B,IAAI,GAAG,IAAI,WAAW,IAAI,WAAW,CAAC,GAAG,CAAC,KAAK,SAAS,EAAE,CAAC;gBACvD,MAAM,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;gBACtD,SAAS;YACb,CAAC;YAED,8DAA8D;YAC9D,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;YACpD,MAAM,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC;QAC3D,CAAC;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAED;;OAEG;IACK,iBAAiB;QACrB,MAAM,MAAM,GAAI,IAAI,CAAC,WAAoC,CAAC,MAAM,CAAC;QACjE,MAAM,WAAW,GAA0B,EAAE,CAAC;QAE9C,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;YACpC,WAAW,CAAC,GAAG,CAAC,GAAG;gBACf,GAAG,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC;gBACnC,GAAG;oBACC,MAAM,IAAI,SAAS,CAAC,uEAAuE,CAAC,CAAC;gBACjG,CAAC;gBACD,UAAU,EAAE,IAAI;gBAChB,YAAY,EAAE,KAAK;aACtB,CAAC;QACN,CAAC;QAED,MAAM,CAAC,gBAAgB,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;IAC/C,CAAC;IAED;;;;OAIG;IACK,MAAM,CAAC,UAAU,CAAC,QAAqB;QAC3C,IAAI,CAAC,QAAQ,CAAC,MAAM;YAAE,OAAO,SAAS,CAAC;QACvC,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QACrF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC3B,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YAClC,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,KAAK,EAAE;gBAAE,OAAO,KAAK,CAAC;QACpD,CAAC;QACD,OAAO,SAAS,CAAC;IACrB,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,eAAe;QAC1B,IAAI,CAAC;YACD,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,cAAc,CAAC,CAAC,CAAC;YAC/D,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;QACvC,CAAC;QAAC,MAAM,CAAC;YACL,OAAO,EAAE,CAAC;QACd,CAAC;IACL,CAAC"}
|
package/cookie_utils.d.ts
CHANGED
|
@@ -7,7 +7,7 @@ export interface ResponseLike {
|
|
|
7
7
|
/**
|
|
8
8
|
* @internal
|
|
9
9
|
*/
|
|
10
|
-
export declare function getCookiesFromResponse(response:
|
|
10
|
+
export declare function getCookiesFromResponse(response: Response): Cookie[];
|
|
11
11
|
/**
|
|
12
12
|
* Calculate cookie expiration date
|
|
13
13
|
* @param maxAgeSecs
|
package/cookie_utils.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cookie_utils.d.ts","sourceRoot":"","sources":["../src/cookie_utils.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,IAAI,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC7D,OAAO,EAAE,MAAM,EAAa,MAAM,cAAc,CAAC;AAKjD,MAAM,WAAW,YAAY;IACzB,GAAG,CAAC,EAAE,MAAM,GAAG,CAAC,MAAM,MAAM,CAAC,CAAC;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,SAAS,CAAC,GAAG,CAAC,MAAM,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,SAAS,CAAC,CAAC,CAAC;CACnH;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,
|
|
1
|
+
{"version":3,"file":"cookie_utils.d.ts","sourceRoot":"","sources":["../src/cookie_utils.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,IAAI,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC7D,OAAO,EAAE,MAAM,EAAa,MAAM,cAAc,CAAC;AAKjD,MAAM,WAAW,YAAY;IACzB,GAAG,CAAC,EAAE,MAAM,GAAG,CAAC,MAAM,MAAM,CAAC,CAAC;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,SAAS,CAAC,GAAG,CAAC,MAAM,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,SAAS,CAAC,CAAC,CAAC;CACnH;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,GAAG,MAAM,EAAE,CASnE;AAED;;;;;GAKG;AACH,wBAAgB,8BAA8B,CAAC,UAAU,EAAE,MAAM,QAEhE;AAED;;;;;GAKG;AACH,wBAAgB,8BAA8B,CAAC,WAAW,EAAE,MAAM,GAAG,YAAY,CAehF;AAED;;;;GAIG;AACH,wBAAgB,8BAA8B,CAAC,YAAY,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,UAiB5F;AAED;;;;GAIG;AACH,wBAAgB,yBAAyB,CAAC,YAAY,EAAE,MAAM,uBAQ7D;AAED;;;;GAIG;AACH,wBAAgB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,GAAG,MAAM,CAgCzE"}
|
package/cookie_utils.js
CHANGED
|
@@ -1,19 +1,17 @@
|
|
|
1
1
|
import { Cookie, CookieJar } from 'tough-cookie';
|
|
2
|
-
import {
|
|
2
|
+
import { serviceLocator } from './service_locator.js';
|
|
3
3
|
import { CookieParseError } from './session_pool/errors.js';
|
|
4
4
|
/**
|
|
5
5
|
* @internal
|
|
6
6
|
*/
|
|
7
7
|
export function getCookiesFromResponse(response) {
|
|
8
|
-
const headers =
|
|
9
|
-
const
|
|
8
|
+
const headers = response.headers;
|
|
9
|
+
const cookieHeaders = headers.getSetCookie();
|
|
10
10
|
try {
|
|
11
|
-
return
|
|
12
|
-
? cookieHeader.map((cookie) => Cookie.parse(cookie))
|
|
13
|
-
: [Cookie.parse(cookieHeader)];
|
|
11
|
+
return cookieHeaders.map((cookie) => Cookie.parse(cookie));
|
|
14
12
|
}
|
|
15
13
|
catch (e) {
|
|
16
|
-
throw new CookieParseError(
|
|
14
|
+
throw new CookieParseError(cookieHeaders);
|
|
17
15
|
}
|
|
18
16
|
}
|
|
19
17
|
/**
|
|
@@ -103,7 +101,9 @@ export function mergeCookies(url, sourceCookies) {
|
|
|
103
101
|
return cookie.key !== c.key && cookie.key.toLowerCase() === c.key.toLowerCase();
|
|
104
102
|
});
|
|
105
103
|
if (similarKeyCookie) {
|
|
106
|
-
|
|
104
|
+
serviceLocator
|
|
105
|
+
.getLogger()
|
|
106
|
+
.warningOnce(`Found cookies with similar name during cookie merging: '${cookie.key}' and '${similarKeyCookie.key}'`);
|
|
107
107
|
}
|
|
108
108
|
jar.setCookieSync(cookie, url);
|
|
109
109
|
}
|
package/cookie_utils.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cookie_utils.js","sourceRoot":"","sources":["../src/cookie_utils.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAEjD,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"cookie_utils.js","sourceRoot":"","sources":["../src/cookie_utils.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAEjD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAO5D;;GAEG;AACH,MAAM,UAAU,sBAAsB,CAAC,QAAkB;IACrD,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC;IACjC,MAAM,aAAa,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAE7C,IAAI,CAAC;QACD,OAAO,aAAa,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAE,CAAC,CAAC;IAChE,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACT,MAAM,IAAI,gBAAgB,CAAC,aAAa,CAAC,CAAC;IAC9C,CAAC;AACL,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,8BAA8B,CAAC,UAAkB;IAC7D,OAAO,IAAI,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,UAAU,GAAG,IAAI,CAAC,CAAC;AACpD,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,8BAA8B,CAAC,WAAmB;IAC9D,OAAO;QACH,IAAI,EAAE,WAAW,CAAC,GAAG;QACrB,KAAK,EAAE,WAAW,CAAC,KAAK;QACxB,iFAAiF;QACjF,6FAA6F;QAC7F,OAAO,EACH,WAAW,CAAC,OAAO,IAAI,IAAI,IAAI,WAAW,CAAC,OAAO,KAAK,UAAU;YAC7D,CAAC,CAAC,SAAS;YACX,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,IAAI;QAC9C,MAAM,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,WAAW,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,SAAS;QAClG,IAAI,EAAE,WAAW,CAAC,IAAI,IAAI,SAAS;QACnC,MAAM,EAAE,WAAW,CAAC,MAAM;QAC1B,QAAQ,EAAE,WAAW,CAAC,QAAQ;KACjC,CAAC;AACN,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,8BAA8B,CAAC,YAA0B,EAAE,UAAkB;IACzF,MAAM,cAAc,GAAG,YAAY,CAAC,OAAO,IAAI,OAAO,YAAY,CAAC,OAAO,KAAK,QAAQ,IAAI,YAAY,CAAC,OAAO,GAAG,CAAC,CAAC;IACpH,MAAM,OAAO,GAAG,cAAc;QAC1B,CAAC,CAAC,IAAI,IAAI,CAAC,YAAY,CAAC,OAAQ,GAAG,IAAI,CAAC;QACxC,CAAC,CAAC,8BAA8B,CAAC,UAAU,CAAC,CAAC;IACjD,MAAM,mBAAmB,GAAG,YAAY,CAAC,MAAM,EAAE,UAAU,EAAE,CAAC,GAAG,CAAC,CAAC;IACnE,MAAM,MAAM,GAAG,mBAAmB,CAAC,CAAC,CAAC,YAAY,CAAC,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC;IAC3F,OAAO,IAAI,MAAM,CAAC;QACd,GAAG,EAAE,YAAY,CAAC,IAAI;QACtB,KAAK,EAAE,YAAY,CAAC,KAAK;QACzB,OAAO;QACP,MAAM;QACN,IAAI,EAAE,YAAY,CAAC,IAAI;QACvB,MAAM,EAAE,YAAY,CAAC,MAAM;QAC3B,QAAQ,EAAE,YAAY,CAAC,QAAQ;QAC/B,QAAQ,EAAE,CAAC,mBAAmB;KACjC,CAAC,CAAC;AACP,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,yBAAyB,CAAC,YAAoB;IAC1D,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IAE1C,IAAI,MAAM,EAAE,CAAC;QACT,OAAO,8BAA8B,CAAC,MAAM,CAAC,CAAC;IAClD,CAAC;IAED,OAAO,IAAI,CAAC;AAChB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,YAAY,CAAC,GAAW,EAAE,aAAuB;IAC7D,MAAM,GAAG,GAAG,IAAI,SAAS,EAAE,CAAC;IAE5B,uBAAuB;IACvB,KAAK,MAAM,kBAAkB,IAAI,aAAa,EAAE,CAAC;QAC7C,uBAAuB;QACvB,IAAI,CAAC,kBAAkB;YAAE,SAAS;QAElC,MAAM,OAAO,GAAG,kBAAkB,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAElD,KAAK,MAAM,YAAY,IAAI,OAAO,EAAE,CAAC;YACjC,sBAAsB;YACtB,IAAI,CAAC,YAAY;gBAAE,SAAS;YAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,YAAY,CAAE,CAAC;YAC3C,MAAM,gBAAgB,GAAG,GAAG,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE;gBACxD,OAAO,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC;YACpF,CAAC,CAAC,CAAC;YAEH,IAAI,gBAAgB,EAAE,CAAC;gBACnB,cAAc;qBACT,SAAS,EAAE;qBACX,WAAW,CACR,2DAA2D,MAAM,CAAC,GAAG,UAAU,gBAAgB,CAAC,GAAG,GAAG,CACzG,CAAC;YACV,CAAC;YAED,GAAG,CAAC,aAAa,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QACnC,CAAC;IACL,CAAC;IAED,OAAO,GAAG,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC;AACxC,CAAC"}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import type { Awaitable } from '@crawlee/types';
|
|
2
|
+
/**
|
|
3
|
+
* Represents a middleware step in the context pipeline.
|
|
4
|
+
*
|
|
5
|
+
* @template TCrawlingContext - The input context type for this middleware
|
|
6
|
+
* @template TCrawlingContextExtension - The enhanced output context type
|
|
7
|
+
*/
|
|
8
|
+
export interface ContextMiddleware<TCrawlingContext, TCrawlingContextExtension> {
|
|
9
|
+
/** The main middleware function that enhances the context */
|
|
10
|
+
action: (context: TCrawlingContext) => Awaitable<TCrawlingContextExtension>;
|
|
11
|
+
/** Optional cleanup function called after the consumer finishes or fails */
|
|
12
|
+
cleanup?: (context: TCrawlingContext & TCrawlingContextExtension, error?: unknown) => Awaitable<void>;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Encapsulates the logic of gradually enhancing the crawling context with additional information and utilities.
|
|
16
|
+
*
|
|
17
|
+
* The enhancement is done by a chain of middlewares that are added to the pipeline after its creation.
|
|
18
|
+
* This class provides a type-safe way to build a pipeline of context transformations where each step
|
|
19
|
+
* can enhance the context with additional properties or utilities.
|
|
20
|
+
*
|
|
21
|
+
* @template TContextBase - The base context type that serves as the starting point
|
|
22
|
+
* @template TCrawlingContext - The final context type after all middleware transformations
|
|
23
|
+
*/
|
|
24
|
+
export declare abstract class ContextPipeline<TContextBase, TCrawlingContext extends TContextBase> {
|
|
25
|
+
/**
|
|
26
|
+
* Creates a new empty context pipeline.
|
|
27
|
+
*
|
|
28
|
+
* @template TContextBase - The base context type for the pipeline
|
|
29
|
+
* @returns A new ContextPipeline instance with no transformations
|
|
30
|
+
*/
|
|
31
|
+
static create<TContextBase>(): ContextPipeline<TContextBase, TContextBase>;
|
|
32
|
+
/**
|
|
33
|
+
* Adds a middleware to the pipeline, creating a new pipeline instance.
|
|
34
|
+
*
|
|
35
|
+
* This method provides a fluent interface for building context transformation pipelines.
|
|
36
|
+
* Each middleware can enhance the context with additional properties or utilities.
|
|
37
|
+
*
|
|
38
|
+
* @template TCrawlingContextExtension - The enhanced context type produced by this middleware
|
|
39
|
+
* @param middleware - The middleware to add to the pipeline
|
|
40
|
+
* @returns A new ContextPipeline instance with the added middleware
|
|
41
|
+
*/
|
|
42
|
+
abstract compose<TCrawlingContextExtension>(middleware: ContextMiddleware<TCrawlingContext, TCrawlingContextExtension>): ContextPipeline<TContextBase, TCrawlingContext & TCrawlingContextExtension>;
|
|
43
|
+
/**
|
|
44
|
+
* Chains another pipeline onto this one. The other pipeline's base context must match
|
|
45
|
+
* this pipeline's output context. Returns a new pipeline that runs this pipeline's
|
|
46
|
+
* middlewares first, then the other pipeline's middlewares.
|
|
47
|
+
*
|
|
48
|
+
* @template TFinalContext - The final context type after the chained pipeline's transformations
|
|
49
|
+
* @param other - The pipeline to append after this one
|
|
50
|
+
* @returns A new ContextPipeline combining both pipelines' middlewares
|
|
51
|
+
*/
|
|
52
|
+
abstract chain<TFinalContext extends TCrawlingContext>(other: ContextPipeline<TCrawlingContext, TFinalContext>): ContextPipeline<TContextBase, TFinalContext>;
|
|
53
|
+
/**
|
|
54
|
+
* Executes the middleware pipeline and passes the final context to a consumer function.
|
|
55
|
+
*
|
|
56
|
+
* This method runs the crawling context through the entire middleware chain, enhancing it
|
|
57
|
+
* at each step, and then passes the final enhanced context to the provided consumer function.
|
|
58
|
+
* Proper cleanup is performed even if exceptions occur during processing.
|
|
59
|
+
*
|
|
60
|
+
* @param crawlingContext - The initial context to process through the pipeline
|
|
61
|
+
* @param finalContextConsumer - The function that will receive the final enhanced context
|
|
62
|
+
*
|
|
63
|
+
* @throws {ContextPipelineInitializationError} When a middleware fails during initialization
|
|
64
|
+
* @throws {ContextPipelineInterruptedError} When the pipeline is intentionally interrupted during initialization
|
|
65
|
+
* @throws {RequestHandlerError} When the final context consumer throws an exception
|
|
66
|
+
* @throws {ContextPipelineCleanupError} When cleanup operations fail
|
|
67
|
+
* @throws {SessionError} Session errors are re-thrown as-is for special handling
|
|
68
|
+
*/
|
|
69
|
+
abstract call(crawlingContext: TContextBase, finalContextConsumer: (finalContext: TCrawlingContext) => Awaitable<unknown>): Promise<void>;
|
|
70
|
+
}
|
|
71
|
+
//# sourceMappingURL=context_pipeline.d.ts.map
|