apify 3.7.3-beta.9 → 4.0.0-beta.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,37 +1,62 @@
1
- import type { ConfigurationOptions as CoreConfigurationOptions } from '@crawlee/core';
1
+ import type { ConfigField, FieldsInput, FieldsOutput } from '@crawlee/core';
2
2
  import { Configuration as CoreConfiguration } from '@crawlee/core';
3
- import type { META_ORIGINS } from '@apify/consts';
4
- export interface ConfigurationOptions extends CoreConfigurationOptions {
5
- metamorphAfterSleepMillis?: number;
6
- actorEventsWsUrl?: string;
7
- token?: string;
8
- actorId?: string;
9
- actorRunId?: string;
10
- actorTaskId?: string;
11
- apiBaseUrl?: string;
12
- apiPublicBaseUrl?: string;
13
- containerPort?: number;
14
- containerUrl?: string;
15
- proxyHostname?: string;
16
- proxyPassword?: string;
17
- proxyPort?: number;
18
- proxyStatusUrl?: string;
19
- /**
20
- * @deprecated use `containerPort` instead
21
- */
22
- standbyPort?: number;
23
- standbyUrl?: string;
24
- isAtHome?: boolean;
25
- userId?: string;
26
- inputSecretsPrivateKeyPassphrase?: string;
27
- inputSecretsPrivateKeyFile?: string;
28
- maxTotalChargeUsd?: number;
29
- metaOrigin?: (typeof META_ORIGINS)[keyof typeof META_ORIGINS];
30
- testPayPerEvent?: boolean;
31
- useChargingLogDataset?: boolean;
32
- actorPricingInfo?: string;
33
- chargedEventCounts?: string;
34
- actorStoragesJson?: string;
3
+ import { z } from 'zod';
4
+ export declare const apifyConfigFields: {
5
+ defaultDatasetId: ConfigField<z.ZodDefault<z.ZodString>>;
6
+ defaultKeyValueStoreId: ConfigField<z.ZodDefault<z.ZodString>>;
7
+ defaultRequestQueueId: ConfigField<z.ZodDefault<z.ZodString>>;
8
+ inputKey: ConfigField<z.ZodDefault<z.ZodString>>;
9
+ memoryMbytes: ConfigField<z.ZodOptional<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodNumber>>>;
10
+ availableMemoryRatio: ConfigField<any>;
11
+ disableBrowserSandbox: ConfigField<any>;
12
+ persistStateIntervalMillis: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodNumber>>>;
13
+ headless: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodBoolean>>>;
14
+ xvfb: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodBoolean>>>;
15
+ chromeExecutablePath: ConfigField<z.ZodOptional<z.ZodString>>;
16
+ defaultBrowserPath: ConfigField<z.ZodOptional<z.ZodString>>;
17
+ purgeOnStart: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodBoolean>>>;
18
+ metamorphAfterSleepMillis: ConfigField<any>;
19
+ actorEventsWsUrl: ConfigField<z.ZodOptional<z.ZodString>>;
20
+ token: ConfigField<z.ZodOptional<z.ZodString>>;
21
+ actorId: ConfigField<z.ZodOptional<z.ZodString>>;
22
+ actorRunId: ConfigField<z.ZodOptional<z.ZodString>>;
23
+ actorTaskId: ConfigField<z.ZodOptional<z.ZodString>>;
24
+ apiBaseUrl: ConfigField<z.ZodDefault<z.ZodString>>;
25
+ apiPublicBaseUrl: ConfigField<z.ZodDefault<z.ZodString>>;
26
+ containerPort: ConfigField<any>;
27
+ containerUrl: ConfigField<z.ZodDefault<z.ZodString>>;
28
+ proxyHostname: ConfigField<z.ZodDefault<z.ZodString>>;
29
+ proxyPassword: ConfigField<z.ZodOptional<z.ZodString>>;
30
+ proxyPort: ConfigField<any>;
31
+ proxyStatusUrl: ConfigField<z.ZodDefault<z.ZodString>>;
32
+ /** @deprecated use `containerPort` instead */
33
+ standbyPort: ConfigField<any>;
34
+ standbyUrl: ConfigField<z.ZodOptional<z.ZodString>>;
35
+ isAtHome: ConfigField<any>;
36
+ userId: ConfigField<z.ZodOptional<z.ZodString>>;
37
+ userIsPaying: ConfigField<z.ZodOptional<z.ZodString>>;
38
+ actorPermissionLevel: ConfigField<z.ZodOptional<z.ZodString>>;
39
+ inputSecretsPrivateKeyPassphrase: ConfigField<z.ZodOptional<z.ZodString>>;
40
+ inputSecretsPrivateKeyFile: ConfigField<z.ZodOptional<z.ZodString>>;
41
+ maxTotalChargeUsd: ConfigField<any>;
42
+ metaOrigin: ConfigField<z.ZodOptional<z.ZodString>>;
43
+ testPayPerEvent: ConfigField<any>;
44
+ useChargingLogDataset: ConfigField<any>;
45
+ actorPricingInfo: ConfigField<z.ZodOptional<z.ZodString>>;
46
+ chargedEventCounts: ConfigField<z.ZodOptional<z.ZodString>>;
47
+ actorStoragesJson: ConfigField<z.ZodOptional<z.ZodString>>;
48
+ storageClientOptions: ConfigField<z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
49
+ maxUsedCpuRatio: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodNumber>>>;
50
+ systemInfoIntervalMillis: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodNumber>>>;
51
+ logLevel: ConfigField<z.ZodOptional<z.ZodPipe<z.ZodTransform<{} | null | undefined, unknown>, z.ZodEnum<typeof import("@apify/log").LogLevel>>>>;
52
+ persistStorage: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodBoolean>>>;
53
+ containerized: ConfigField<z.ZodOptional<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodBoolean>>>;
54
+ };
55
+ export type ApifyConfigurationInput = FieldsInput<typeof apifyConfigFields>;
56
+ export type ApifyResolvedConfigValues = FieldsOutput<typeof apifyConfigFields>;
57
+ /** @deprecated Use {@link ApifyConfigurationInput} instead. */
58
+ export type ConfigurationOptions = ApifyConfigurationInput;
59
+ export interface Configuration extends ApifyResolvedConfigValues {
35
60
  }
36
61
  /**
37
62
  * `Configuration` is a value object holding the SDK configuration. We can use it in two ways:
@@ -40,38 +65,34 @@ export interface ConfigurationOptions extends CoreConfigurationOptions {
40
65
  *
41
66
  * ```javascript
42
67
  * import { Actor } from 'apify';
43
- * import { BasicCrawler } from 'crawlee';
44
68
  *
45
69
  * const sdk = new Actor({ token: '123' });
46
- * console.log(sdk.config.get('token')); // '123'
47
- *
48
- * const crawler = new BasicCrawler({
49
- * // ... crawler options
50
- * }, sdk.config);
70
+ * console.log(sdk.config.token); // '123'
51
71
  * ```
52
72
  *
53
73
  * 2. To get the global configuration (singleton instance). It will respect the environment variables.
54
74
  *
55
75
  * ```javascript
56
- * import { BasicCrawler, Configuration } from 'crawlee';
76
+ * import { Configuration } from 'apify';
57
77
  *
58
- * // Get the global configuration
59
78
  * const config = Configuration.getGlobalConfig();
60
- * // Set the 'persistStateIntervalMillis' option
61
- * // of global configuration to 30 seconds
62
- * config.set('persistStateIntervalMillis', 30_000);
63
- *
64
- * // No need to pass the configuration to the crawler,
65
- * // as it's using the global configuration by default
66
- * const crawler = new BasicCrawler();
79
+ * console.log(config.headless);
80
+ * console.log(config.persistStateIntervalMillis);
67
81
  * ```
68
82
  *
83
+ * Configuration is immutable — values are set via the constructor and cannot be changed afterwards.
84
+ * The priority order for resolving values is (highest to lowest):
85
+ *
86
+ * ```text
87
+ * constructor options > environment variables > crawlee.json > schema defaults
88
+ * ```
89
+ *
69
90
  * ## Supported Configuration Options
70
91
  *
71
92
  * Key | Environment Variable | Default Value
72
93
  * ---|---|---
73
94
  * `memoryMbytes` | `ACTOR_MEMORY_MBYTES` | -
74
- * `headless` | `APIFY_HEADLESS` | -
95
+ * `headless` | `APIFY_HEADLESS` | `true`
75
96
  * `persistStateIntervalMillis` | `APIFY_PERSIST_STATE_INTERVAL_MILLIS` | `60e3`
76
97
  * `token` | `APIFY_TOKEN` | -
77
98
  * `isAtHome` | `APIFY_IS_AT_HOME` | -
@@ -85,7 +106,6 @@ export interface ConfigurationOptions extends CoreConfigurationOptions {
85
106
  * ---|---|---
86
107
  * `actorEventsWsUrl` | `ACTOR_EVENTS_WEBSOCKET_URL` | -
87
108
  * `actorId` | `ACTOR_ID` | -
88
- * `actorPermissionLevel` | `ACTOR_PERMISSION_LEVEL` | -
89
109
  * `actorRunId` | `ACTOR_RUN_ID` | -
90
110
  * `actorTaskId` | `ACTOR_TASK_ID` | -
91
111
  * `apiBaseUrl` | `APIFY_API_BASE_URL` | `'https://api.apify.com'`
@@ -99,7 +119,6 @@ export interface ConfigurationOptions extends CoreConfigurationOptions {
99
119
  * `proxyPort` | `APIFY_PROXY_PORT` | `8000`
100
120
  * `proxyStatusUrl` | `APIFY_PROXY_STATUS_URL` | `'http://proxy.apify.com'`
101
121
  * `userId` | `APIFY_USER_ID` | -
102
- * `userIsPaying` | `APIFY_USER_IS_PAYING` | -
103
122
  * `xvfb` | `APIFY_XVFB` | -
104
123
  * `standbyPort` | `ACTOR_STANDBY_PORT` | `4321`
105
124
  * `standbyUrl` | `ACTOR_STANDBY_URL` | -
@@ -107,99 +126,18 @@ export interface ConfigurationOptions extends CoreConfigurationOptions {
107
126
  * `defaultBrowserPath` | `APIFY_DEFAULT_BROWSER_PATH` | -
108
127
  */
109
128
  export declare class Configuration extends CoreConfiguration {
110
- /** @inheritDoc */
129
+ /** @internal */
111
130
  static globalConfig?: Configuration;
112
- protected static ENV_MAP: {
113
- APIFY_AVAILABLE_MEMORY_RATIO: string;
114
- APIFY_PURGE_ON_START: string;
115
- APIFY_MEMORY_MBYTES: string;
116
- APIFY_DEFAULT_DATASET_ID: string;
117
- APIFY_DEFAULT_KEY_VALUE_STORE_ID: string;
118
- APIFY_DEFAULT_REQUEST_QUEUE_ID: string;
119
- APIFY_INPUT_KEY: string;
120
- APIFY_PERSIST_STATE_INTERVAL_MILLIS: string;
121
- APIFY_HEADLESS: string;
122
- APIFY_XVFB: string;
123
- APIFY_CHROME_EXECUTABLE_PATH: string;
124
- APIFY_DEFAULT_BROWSER_PATH: string;
125
- APIFY_DISABLE_BROWSER_SANDBOX: string;
126
- APIFY_TOKEN: string;
127
- APIFY_METAMORPH_AFTER_SLEEP_MILLIS: string;
128
- APIFY_TEST_PERSIST_INTERVAL_MILLIS: string;
129
- APIFY_ACTOR_EVENTS_WS_URL: string;
130
- APIFY_ACTOR_ID: string;
131
- APIFY_API_BASE_URL: string;
132
- APIFY_API_PUBLIC_BASE_URL: string;
133
- APIFY_IS_AT_HOME: string;
134
- APIFY_ACTOR_RUN_ID: string;
135
- APIFY_ACTOR_TASK_ID: string;
136
- APIFY_CONTAINER_PORT: string;
137
- APIFY_CONTAINER_URL: string;
138
- APIFY_USER_ID: string;
139
- APIFY_USER_IS_PAYING: string;
140
- APIFY_PROXY_HOSTNAME: string;
141
- APIFY_PROXY_PASSWORD: string;
142
- APIFY_PROXY_STATUS_URL: string;
143
- APIFY_PROXY_PORT: string;
144
- APIFY_INPUT_SECRETS_PRIVATE_KEY_FILE: string;
145
- APIFY_INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE: string;
146
- APIFY_META_ORIGIN: string;
147
- ACTOR_DEFAULT_DATASET_ID: string;
148
- ACTOR_DEFAULT_KEY_VALUE_STORE_ID: string;
149
- ACTOR_DEFAULT_REQUEST_QUEUE_ID: string;
150
- ACTOR_EVENTS_WEBSOCKET_URL: string;
151
- ACTOR_ID: string;
152
- ACTOR_INPUT_KEY: string;
153
- ACTOR_MEMORY_MBYTES: string;
154
- ACTOR_PERMISSION_LEVEL: string;
155
- ACTOR_RUN_ID: string;
156
- ACTOR_STANDBY_PORT: string;
157
- ACTOR_STANDBY_URL: string;
158
- ACTOR_TASK_ID: string;
159
- ACTOR_WEB_SERVER_PORT: string;
160
- ACTOR_WEB_SERVER_URL: string;
161
- ACTOR_MAX_TOTAL_CHARGE_USD: string;
162
- ACTOR_TEST_PAY_PER_EVENT: string;
163
- ACTOR_USE_CHARGING_LOG_DATASET: string;
164
- APIFY_ACTOR_PRICING_INFO: string;
165
- APIFY_CHARGED_ACTOR_EVENT_COUNTS: string;
166
- ACTOR_STORAGES_JSON: string;
167
- };
168
- protected static INTEGER_VARS: string[];
169
- protected static BOOLEAN_VARS: string[];
170
- protected static DEFAULTS: {
171
- defaultKeyValueStoreId: string;
172
- defaultDatasetId: string;
173
- defaultRequestQueueId: string;
174
- inputKey: string;
175
- apiBaseUrl: string;
176
- apiPublicBaseUrl: string;
177
- proxyStatusUrl: string;
178
- proxyHostname: string;
179
- proxyPort: number;
180
- containerPort: number;
181
- containerUrl: string;
182
- standbyPort: number;
183
- metamorphAfterSleepMillis: number;
184
- persistStateIntervalMillis: number;
185
- testPayPerEvent: boolean;
186
- useChargingLogDataset: boolean;
187
- };
188
- /**
189
- * @inheritDoc
190
- */
191
- get<T extends keyof ConfigurationOptions, U extends ConfigurationOptions[T]>(key: T, defaultValue?: U): U;
192
- /**
193
- * @inheritDoc
194
- */
195
- set(key: keyof ConfigurationOptions, value?: any): void;
131
+ protected static fields: Record<string, ConfigField>;
132
+ constructor(options?: ApifyConfigurationInput);
196
133
  /**
197
134
  * @inheritDoc
135
+ *
136
+ * Returns the SDK's global {@link Configuration} singleton (an
137
+ * Apify-typed default that parses `APIFY_*` env vars). During an Actor run
138
+ * the active configuration is held by crawlee's `serviceLocator`, which is
139
+ * what crawlee internals resolve against; this singleton is only the
140
+ * fallback for code reaching for a configuration without an explicit one.
198
141
  */
199
142
  static getGlobalConfig(): Configuration;
200
- /**
201
- * Resets global configuration instance. The default instance holds configuration based on env vars,
202
- * if we want to change them, we need to first reset the global state. Used mainly for testing purposes.
203
- */
204
- static resetGlobalState(): void;
205
143
  }
@@ -1,8 +1,91 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.Configuration = void 0;
4
- const core_1 = require("@crawlee/core");
5
- const consts_1 = require("@apify/consts");
1
+ /* eslint-disable no-use-before-define */
2
+ import { coerceBoolean, coerceNumber, Configuration as CoreConfiguration, crawleeConfigFields, field, } from '@crawlee/core';
3
+ import { z } from 'zod';
4
+ import { ACTOR_ENV_VARS, APIFY_ENV_VARS, LOCAL_ACTOR_ENV_VARS, LOCAL_APIFY_ENV_VARS } from '@apify/consts';
5
+ // Evaluated lazily (per Configuration construction, not at import) so the
6
+ // at-home defaults for `availableMemoryRatio` / `disableBrowserSandbox` below
7
+ // react to `APIFY_IS_AT_HOME` even if it changes after import (tests, embedding).
8
+ // Reads the env var, not the resolved `isAtHome` field, so an explicit
9
+ // `new Configuration({ isAtHome: true })` won't flip these — env is the source.
10
+ const isAtHome = () => !!process.env[APIFY_ENV_VARS.IS_AT_HOME];
11
+ function withApifyEnv(base, apifyEnvVars, schema) {
12
+ const crawleeVars = base.envVar == null ? [] : [base.envVar].flat();
13
+ return field((schema ?? base.schema), [...[apifyEnvVars].flat(), ...crawleeVars]);
14
+ }
15
+ // --- Apify config field definitions ---
16
+ export const apifyConfigFields = {
17
+ // Inherit all crawlee fields as-is.
18
+ ...crawleeConfigFields,
19
+ // Crawlee fields the SDK extends with ACTOR_/APIFY_ env-var aliases (which
20
+ // take precedence; crawlee's own CRAWLEE_* var is reused as the fallback,
21
+ // never re-typed). A schema is passed only where the SDK needs a different
22
+ // default than crawlee's.
23
+ defaultDatasetId: withApifyEnv(crawleeConfigFields.defaultDatasetId, [ACTOR_ENV_VARS.DEFAULT_DATASET_ID, APIFY_ENV_VARS.DEFAULT_DATASET_ID], z.string().default(LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_DATASET_ID])),
24
+ defaultKeyValueStoreId: withApifyEnv(crawleeConfigFields.defaultKeyValueStoreId, [ACTOR_ENV_VARS.DEFAULT_KEY_VALUE_STORE_ID, APIFY_ENV_VARS.DEFAULT_KEY_VALUE_STORE_ID], z.string().default(LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_KEY_VALUE_STORE_ID])),
25
+ defaultRequestQueueId: withApifyEnv(crawleeConfigFields.defaultRequestQueueId, [ACTOR_ENV_VARS.DEFAULT_REQUEST_QUEUE_ID, APIFY_ENV_VARS.DEFAULT_REQUEST_QUEUE_ID], z.string().default(LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_REQUEST_QUEUE_ID])),
26
+ inputKey: withApifyEnv(crawleeConfigFields.inputKey, [ACTOR_ENV_VARS.INPUT_KEY, APIFY_ENV_VARS.INPUT_KEY]),
27
+ memoryMbytes: withApifyEnv(crawleeConfigFields.memoryMbytes, [
28
+ ACTOR_ENV_VARS.MEMORY_MBYTES,
29
+ APIFY_ENV_VARS.MEMORY_MBYTES,
30
+ ]),
31
+ availableMemoryRatio: withApifyEnv(crawleeConfigFields.availableMemoryRatio, 'APIFY_AVAILABLE_MEMORY_RATIO', coerceNumber.default(() => (isAtHome() ? 1 : 0.25))),
32
+ disableBrowserSandbox: withApifyEnv(crawleeConfigFields.disableBrowserSandbox, 'APIFY_DISABLE_BROWSER_SANDBOX', coerceBoolean.optional().default(() => (isAtHome() ? true : undefined))),
33
+ persistStateIntervalMillis: withApifyEnv(crawleeConfigFields.persistStateIntervalMillis, [
34
+ APIFY_ENV_VARS.PERSIST_STATE_INTERVAL_MILLIS,
35
+ 'APIFY_TEST_PERSIST_INTERVAL_MILLIS',
36
+ ]),
37
+ headless: withApifyEnv(crawleeConfigFields.headless, APIFY_ENV_VARS.HEADLESS),
38
+ xvfb: withApifyEnv(crawleeConfigFields.xvfb, APIFY_ENV_VARS.XVFB),
39
+ chromeExecutablePath: withApifyEnv(crawleeConfigFields.chromeExecutablePath, APIFY_ENV_VARS.CHROME_EXECUTABLE_PATH),
40
+ defaultBrowserPath: withApifyEnv(crawleeConfigFields.defaultBrowserPath, 'APIFY_DEFAULT_BROWSER_PATH'),
41
+ purgeOnStart: withApifyEnv(crawleeConfigFields.purgeOnStart, APIFY_ENV_VARS.PURGE_ON_START),
42
+ // Apify-specific fields
43
+ metamorphAfterSleepMillis: field(coerceNumber.default(300_000), APIFY_ENV_VARS.METAMORPH_AFTER_SLEEP_MILLIS),
44
+ actorEventsWsUrl: field(z.string().optional(), [
45
+ ACTOR_ENV_VARS.EVENTS_WEBSOCKET_URL,
46
+ APIFY_ENV_VARS.ACTOR_EVENTS_WS_URL,
47
+ ]),
48
+ token: field(z.string().optional(), APIFY_ENV_VARS.TOKEN),
49
+ actorId: field(z.string().optional(), [ACTOR_ENV_VARS.ID, APIFY_ENV_VARS.ACTOR_ID]),
50
+ actorRunId: field(z.string().optional(), [ACTOR_ENV_VARS.RUN_ID, APIFY_ENV_VARS.ACTOR_RUN_ID]),
51
+ actorTaskId: field(z.string().optional(), [ACTOR_ENV_VARS.TASK_ID, APIFY_ENV_VARS.ACTOR_TASK_ID]),
52
+ apiBaseUrl: field(z.string().default('https://api.apify.com'), APIFY_ENV_VARS.API_BASE_URL),
53
+ apiPublicBaseUrl: field(z.string().default('https://api.apify.com'), APIFY_ENV_VARS.API_PUBLIC_BASE_URL),
54
+ containerPort: field(coerceNumber.default(+LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.WEB_SERVER_PORT]), [
55
+ ACTOR_ENV_VARS.WEB_SERVER_PORT,
56
+ APIFY_ENV_VARS.CONTAINER_PORT,
57
+ ]),
58
+ containerUrl: field(z.string().default(LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.WEB_SERVER_URL]), [
59
+ ACTOR_ENV_VARS.WEB_SERVER_URL,
60
+ APIFY_ENV_VARS.CONTAINER_URL,
61
+ ]),
62
+ proxyHostname: field(z.string().default(LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_HOSTNAME]), APIFY_ENV_VARS.PROXY_HOSTNAME),
63
+ proxyPassword: field(z.string().optional(), APIFY_ENV_VARS.PROXY_PASSWORD),
64
+ proxyPort: field(coerceNumber.default(+LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_PORT]), APIFY_ENV_VARS.PROXY_PORT),
65
+ proxyStatusUrl: field(z.string().default('http://proxy.apify.com'), APIFY_ENV_VARS.PROXY_STATUS_URL),
66
+ /** @deprecated use `containerPort` instead */
67
+ standbyPort: field(coerceNumber.default(+LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.STANDBY_PORT]), ACTOR_ENV_VARS.STANDBY_PORT),
68
+ standbyUrl: field(z.string().optional(), ACTOR_ENV_VARS.STANDBY_URL),
69
+ isAtHome: field(coerceBoolean.default(false), APIFY_ENV_VARS.IS_AT_HOME),
70
+ userId: field(z.string().optional(), APIFY_ENV_VARS.USER_ID),
71
+ userIsPaying: field(z.string().optional(), APIFY_ENV_VARS.USER_IS_PAYING),
72
+ actorPermissionLevel: field(z.string().optional(), ACTOR_ENV_VARS.PERMISSION_LEVEL),
73
+ inputSecretsPrivateKeyPassphrase: field(z.string().optional(), APIFY_ENV_VARS.INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE),
74
+ inputSecretsPrivateKeyFile: field(z.string().optional(), APIFY_ENV_VARS.INPUT_SECRETS_PRIVATE_KEY_FILE),
75
+ // `0` is treated as "no limit" (mirrors the Apify platform contract).
76
+ maxTotalChargeUsd: field(coerceNumber.transform((val) => (val === 0 ? Infinity : val)).default(Infinity), ACTOR_ENV_VARS.MAX_TOTAL_CHARGE_USD),
77
+ metaOrigin: field(z.string().optional(), APIFY_ENV_VARS.META_ORIGIN),
78
+ testPayPerEvent: field(coerceBoolean.default(false), 'ACTOR_TEST_PAY_PER_EVENT'),
79
+ useChargingLogDataset: field(coerceBoolean.default(false), 'ACTOR_USE_CHARGING_LOG_DATASET'),
80
+ // Pay-per-event charging metadata injected by the platform (JSON strings).
81
+ actorPricingInfo: field(z.string().optional(), 'APIFY_ACTOR_PRICING_INFO'),
82
+ chargedEventCounts: field(z.string().optional(), 'APIFY_CHARGED_ACTOR_EVENT_COUNTS'),
83
+ actorStoragesJson: field(z.string().optional(), 'ACTOR_STORAGES_JSON'),
84
+ // Grab-bag of ApifyClient constructor options; the `storageDir` key is
85
+ // pulled out separately for local storage emulation, the rest is spread
86
+ // into `new ApifyClient({...})` in `Actor.newClient()`. No env var alias.
87
+ storageClientOptions: field(z.record(z.string(), z.unknown()).optional()),
88
+ };
6
89
  /**
7
90
  * `Configuration` is a value object holding the SDK configuration. We can use it in two ways:
8
91
  *
@@ -10,38 +93,34 @@ const consts_1 = require("@apify/consts");
10
93
  *
11
94
  * ```javascript
12
95
  * import { Actor } from 'apify';
13
- * import { BasicCrawler } from 'crawlee';
14
96
  *
15
97
  * const sdk = new Actor({ token: '123' });
16
- * console.log(sdk.config.get('token')); // '123'
17
- *
18
- * const crawler = new BasicCrawler({
19
- * // ... crawler options
20
- * }, sdk.config);
98
+ * console.log(sdk.config.token); // '123'
21
99
  * ```
22
100
  *
23
101
  * 2. To get the global configuration (singleton instance). It will respect the environment variables.
24
102
  *
25
103
  * ```javascript
26
- * import { BasicCrawler, Configuration } from 'crawlee';
104
+ * import { Configuration } from 'apify';
27
105
  *
28
- * // Get the global configuration
29
106
  * const config = Configuration.getGlobalConfig();
30
- * // Set the 'persistStateIntervalMillis' option
31
- * // of global configuration to 30 seconds
32
- * config.set('persistStateIntervalMillis', 30_000);
33
- *
34
- * // No need to pass the configuration to the crawler,
35
- * // as it's using the global configuration by default
36
- * const crawler = new BasicCrawler();
107
+ * console.log(config.headless);
108
+ * console.log(config.persistStateIntervalMillis);
37
109
  * ```
38
110
  *
111
+ * Configuration is immutable — values are set via the constructor and cannot be changed afterwards.
112
+ * The priority order for resolving values is (highest to lowest):
113
+ *
114
+ * ```text
115
+ * constructor options > environment variables > crawlee.json > schema defaults
116
+ * ```
117
+ *
39
118
  * ## Supported Configuration Options
40
119
  *
41
120
  * Key | Environment Variable | Default Value
42
121
  * ---|---|---
43
122
  * `memoryMbytes` | `ACTOR_MEMORY_MBYTES` | -
44
- * `headless` | `APIFY_HEADLESS` | -
123
+ * `headless` | `APIFY_HEADLESS` | `true`
45
124
  * `persistStateIntervalMillis` | `APIFY_PERSIST_STATE_INTERVAL_MILLIS` | `60e3`
46
125
  * `token` | `APIFY_TOKEN` | -
47
126
  * `isAtHome` | `APIFY_IS_AT_HOME` | -
@@ -55,7 +134,6 @@ const consts_1 = require("@apify/consts");
55
134
  * ---|---|---
56
135
  * `actorEventsWsUrl` | `ACTOR_EVENTS_WEBSOCKET_URL` | -
57
136
  * `actorId` | `ACTOR_ID` | -
58
- * `actorPermissionLevel` | `ACTOR_PERMISSION_LEVEL` | -
59
137
  * `actorRunId` | `ACTOR_RUN_ID` | -
60
138
  * `actorTaskId` | `ACTOR_TASK_ID` | -
61
139
  * `apiBaseUrl` | `APIFY_API_BASE_URL` | `'https://api.apify.com'`
@@ -69,166 +147,34 @@ const consts_1 = require("@apify/consts");
69
147
  * `proxyPort` | `APIFY_PROXY_PORT` | `8000`
70
148
  * `proxyStatusUrl` | `APIFY_PROXY_STATUS_URL` | `'http://proxy.apify.com'`
71
149
  * `userId` | `APIFY_USER_ID` | -
72
- * `userIsPaying` | `APIFY_USER_IS_PAYING` | -
73
150
  * `xvfb` | `APIFY_XVFB` | -
74
151
  * `standbyPort` | `ACTOR_STANDBY_PORT` | `4321`
75
152
  * `standbyUrl` | `ACTOR_STANDBY_URL` | -
76
153
  * `chromeExecutablePath` | `APIFY_CHROME_EXECUTABLE_PATH` | -
77
154
  * `defaultBrowserPath` | `APIFY_DEFAULT_BROWSER_PATH` | -
78
155
  */
79
- class Configuration extends core_1.Configuration {
80
- /**
81
- * @inheritDoc
82
- */
83
- get(key, defaultValue) {
84
- return super.get(key, defaultValue);
85
- }
86
- /**
87
- * @inheritDoc
88
- */
89
- set(key, value) {
90
- super.set(key, value);
156
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-declaration-merging
157
+ export class Configuration extends CoreConfiguration {
158
+ /** @internal */
159
+ static globalConfig;
160
+ static fields = apifyConfigFields;
161
+ constructor(options = {}) {
162
+ // `super` types its options against crawlee's field set; ours is a
163
+ // superset (apifyConfigFields spreads crawleeConfigFields), so the
164
+ // shapes are runtime-compatible but not TS-assignable — hence the cast.
165
+ super(options);
91
166
  }
92
167
  /**
93
168
  * @inheritDoc
169
+ *
170
+ * Returns the SDK's global {@link Configuration} singleton (an
171
+ * Apify-typed default that parses `APIFY_*` env vars). During an Actor run
172
+ * the active configuration is held by crawlee's `serviceLocator`, which is
173
+ * what crawlee internals resolve against; this singleton is only the
174
+ * fallback for code reaching for a configuration without an explicit one.
94
175
  */
95
176
  static getGlobalConfig() {
96
- if (Configuration.storage.getStore()) {
97
- return Configuration.storage.getStore();
98
- }
99
- Configuration.globalConfig ?? (Configuration.globalConfig = new Configuration());
177
+ Configuration.globalConfig ??= new Configuration();
100
178
  return Configuration.globalConfig;
101
179
  }
102
- /**
103
- * Resets global configuration instance. The default instance holds configuration based on env vars,
104
- * if we want to change them, we need to first reset the global state. Used mainly for testing purposes.
105
- */
106
- static resetGlobalState() {
107
- delete this.globalConfig;
108
- }
109
180
  }
110
- exports.Configuration = Configuration;
111
- // maps environment variables to config keys (e.g. `APIFY_MEMORY_MBYTES` to `memoryMbytes`)
112
- Object.defineProperty(Configuration, "ENV_MAP", {
113
- enumerable: true,
114
- configurable: true,
115
- writable: true,
116
- value: {
117
- // regular crawlee env vars are also supported
118
- ...core_1.Configuration.ENV_MAP,
119
- // support crawlee env vars prefixed with `APIFY_` too
120
- APIFY_AVAILABLE_MEMORY_RATIO: 'availableMemoryRatio',
121
- APIFY_PURGE_ON_START: 'purgeOnStart',
122
- APIFY_MEMORY_MBYTES: 'memoryMbytes',
123
- APIFY_DEFAULT_DATASET_ID: 'defaultDatasetId',
124
- APIFY_DEFAULT_KEY_VALUE_STORE_ID: 'defaultKeyValueStoreId',
125
- APIFY_DEFAULT_REQUEST_QUEUE_ID: 'defaultRequestQueueId',
126
- APIFY_INPUT_KEY: 'inputKey',
127
- APIFY_PERSIST_STATE_INTERVAL_MILLIS: 'persistStateIntervalMillis',
128
- APIFY_HEADLESS: 'headless',
129
- APIFY_XVFB: 'xvfb',
130
- APIFY_CHROME_EXECUTABLE_PATH: 'chromeExecutablePath',
131
- APIFY_DEFAULT_BROWSER_PATH: 'defaultBrowserPath',
132
- APIFY_DISABLE_BROWSER_SANDBOX: 'disableBrowserSandbox',
133
- // as well as apify specific ones
134
- APIFY_TOKEN: 'token',
135
- APIFY_METAMORPH_AFTER_SLEEP_MILLIS: 'metamorphAfterSleepMillis',
136
- APIFY_TEST_PERSIST_INTERVAL_MILLIS: 'persistStateIntervalMillis', // for BC, seems to be unused
137
- APIFY_ACTOR_EVENTS_WS_URL: 'actorEventsWsUrl',
138
- APIFY_ACTOR_ID: 'actorId',
139
- APIFY_API_BASE_URL: 'apiBaseUrl',
140
- APIFY_API_PUBLIC_BASE_URL: 'apiPublicBaseUrl',
141
- APIFY_IS_AT_HOME: 'isAtHome',
142
- APIFY_ACTOR_RUN_ID: 'actorRunId',
143
- APIFY_ACTOR_TASK_ID: 'actorTaskId',
144
- APIFY_CONTAINER_PORT: 'containerPort',
145
- APIFY_CONTAINER_URL: 'containerUrl',
146
- APIFY_USER_ID: 'userId',
147
- APIFY_USER_IS_PAYING: 'userIsPaying',
148
- APIFY_PROXY_HOSTNAME: 'proxyHostname',
149
- APIFY_PROXY_PASSWORD: 'proxyPassword',
150
- APIFY_PROXY_STATUS_URL: 'proxyStatusUrl',
151
- APIFY_PROXY_PORT: 'proxyPort',
152
- APIFY_INPUT_SECRETS_PRIVATE_KEY_FILE: 'inputSecretsPrivateKeyFile',
153
- APIFY_INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE: 'inputSecretsPrivateKeyPassphrase',
154
- APIFY_META_ORIGIN: 'metaOrigin',
155
- // Actor env vars
156
- ACTOR_DEFAULT_DATASET_ID: 'defaultDatasetId',
157
- ACTOR_DEFAULT_KEY_VALUE_STORE_ID: 'defaultKeyValueStoreId',
158
- ACTOR_DEFAULT_REQUEST_QUEUE_ID: 'defaultRequestQueueId',
159
- ACTOR_EVENTS_WEBSOCKET_URL: 'actorEventsWsUrl',
160
- ACTOR_ID: 'actorId',
161
- ACTOR_INPUT_KEY: 'inputKey',
162
- ACTOR_MEMORY_MBYTES: 'memoryMbytes',
163
- ACTOR_PERMISSION_LEVEL: 'actorPermissionLevel',
164
- ACTOR_RUN_ID: 'actorRunId',
165
- ACTOR_STANDBY_PORT: 'standbyPort',
166
- ACTOR_STANDBY_URL: 'standbyUrl',
167
- ACTOR_TASK_ID: 'actorTaskId',
168
- ACTOR_WEB_SERVER_PORT: 'containerPort',
169
- ACTOR_WEB_SERVER_URL: 'containerUrl',
170
- ACTOR_MAX_TOTAL_CHARGE_USD: 'maxTotalChargeUsd',
171
- ACTOR_TEST_PAY_PER_EVENT: 'testPayPerEvent',
172
- ACTOR_USE_CHARGING_LOG_DATASET: 'useChargingLogDataset',
173
- APIFY_ACTOR_PRICING_INFO: 'actorPricingInfo',
174
- APIFY_CHARGED_ACTOR_EVENT_COUNTS: 'chargedEventCounts',
175
- ACTOR_STORAGES_JSON: 'actorStoragesJson',
176
- }
177
- });
178
- Object.defineProperty(Configuration, "INTEGER_VARS", {
179
- enumerable: true,
180
- configurable: true,
181
- writable: true,
182
- value: [
183
- ...core_1.Configuration.INTEGER_VARS,
184
- 'proxyPort',
185
- 'containerPort',
186
- 'metamorphAfterSleepMillis',
187
- 'maxTotalChargeUsd',
188
- ]
189
- });
190
- Object.defineProperty(Configuration, "BOOLEAN_VARS", {
191
- enumerable: true,
192
- configurable: true,
193
- writable: true,
194
- value: [
195
- ...core_1.Configuration.BOOLEAN_VARS,
196
- 'isAtHome',
197
- 'testPayPerEvent',
198
- 'useChargingLogDataset',
199
- ]
200
- });
201
- Object.defineProperty(Configuration, "DEFAULTS", {
202
- enumerable: true,
203
- configurable: true,
204
- writable: true,
205
- value: {
206
- ...core_1.Configuration.DEFAULTS,
207
- defaultKeyValueStoreId: consts_1.LOCAL_ACTOR_ENV_VARS[consts_1.ACTOR_ENV_VARS.DEFAULT_KEY_VALUE_STORE_ID],
208
- defaultDatasetId: consts_1.LOCAL_ACTOR_ENV_VARS[consts_1.ACTOR_ENV_VARS.DEFAULT_DATASET_ID],
209
- defaultRequestQueueId: consts_1.LOCAL_ACTOR_ENV_VARS[consts_1.ACTOR_ENV_VARS.DEFAULT_REQUEST_QUEUE_ID],
210
- inputKey: 'INPUT',
211
- apiBaseUrl: 'https://api.apify.com',
212
- apiPublicBaseUrl: 'https://api.apify.com',
213
- proxyStatusUrl: 'http://proxy.apify.com',
214
- proxyHostname: consts_1.LOCAL_APIFY_ENV_VARS[consts_1.APIFY_ENV_VARS.PROXY_HOSTNAME],
215
- proxyPort: +consts_1.LOCAL_APIFY_ENV_VARS[consts_1.APIFY_ENV_VARS.PROXY_PORT],
216
- containerPort: +consts_1.LOCAL_ACTOR_ENV_VARS[consts_1.ACTOR_ENV_VARS.WEB_SERVER_PORT],
217
- containerUrl: consts_1.LOCAL_ACTOR_ENV_VARS[consts_1.ACTOR_ENV_VARS.WEB_SERVER_URL],
218
- standbyPort: +consts_1.LOCAL_ACTOR_ENV_VARS[consts_1.ACTOR_ENV_VARS.STANDBY_PORT],
219
- metamorphAfterSleepMillis: 300e3,
220
- persistStateIntervalMillis: 60e3, // This value is mentioned in jsdoc in `events.js`, if you update it here, update it there too.
221
- testPayPerEvent: false,
222
- useChargingLogDataset: false,
223
- }
224
- });
225
- // monkey patch the core class so it respects the new options too
226
- core_1.Configuration.getGlobalConfig = Configuration.getGlobalConfig;
227
- // @ts-expect-error protected property
228
- core_1.Configuration.ENV_MAP = Configuration.ENV_MAP;
229
- // @ts-expect-error protected property
230
- core_1.Configuration.INTEGER_VARS = Configuration.INTEGER_VARS;
231
- // @ts-expect-error protected property
232
- core_1.Configuration.BOOLEAN_VARS = Configuration.BOOLEAN_VARS;
233
- // @ts-expect-error protected property
234
- core_1.Configuration.DEFAULTS = Configuration.DEFAULTS;
package/dist/index.js CHANGED
@@ -1,22 +1,8 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.ApifyClient = exports.LoggerText = exports.LoggerJson = exports.Logger = exports.LogLevel = exports.Log = exports.log = exports.RequestQueue = exports.Dataset = exports.ChargingManager = void 0;
4
- const tslib_1 = require("tslib");
5
- tslib_1.__exportStar(require("./actor.js"), exports);
6
- var charging_js_1 = require("./charging.js");
7
- Object.defineProperty(exports, "ChargingManager", { enumerable: true, get: function () { return charging_js_1.ChargingManager; } });
8
- tslib_1.__exportStar(require("./configuration.js"), exports);
9
- tslib_1.__exportStar(require("./proxy_configuration.js"), exports);
10
- tslib_1.__exportStar(require("./platform_event_manager.js"), exports);
11
- tslib_1.__exportStar(require("./key_value_store.js"), exports);
12
- var core_1 = require("@crawlee/core");
13
- Object.defineProperty(exports, "Dataset", { enumerable: true, get: function () { return core_1.Dataset; } });
14
- Object.defineProperty(exports, "RequestQueue", { enumerable: true, get: function () { return core_1.RequestQueue; } });
15
- Object.defineProperty(exports, "log", { enumerable: true, get: function () { return core_1.log; } });
16
- Object.defineProperty(exports, "Log", { enumerable: true, get: function () { return core_1.Log; } });
17
- Object.defineProperty(exports, "LogLevel", { enumerable: true, get: function () { return core_1.LogLevel; } });
18
- Object.defineProperty(exports, "Logger", { enumerable: true, get: function () { return core_1.Logger; } });
19
- Object.defineProperty(exports, "LoggerJson", { enumerable: true, get: function () { return core_1.LoggerJson; } });
20
- Object.defineProperty(exports, "LoggerText", { enumerable: true, get: function () { return core_1.LoggerText; } });
21
- var apify_client_1 = require("apify-client");
22
- Object.defineProperty(exports, "ApifyClient", { enumerable: true, get: function () { return apify_client_1.ApifyClient; } });
1
+ export * from './actor.js';
2
+ export { ChargingManager } from './charging.js';
3
+ export * from './configuration.js';
4
+ export * from './proxy_configuration.js';
5
+ export * from './platform_event_manager.js';
6
+ export * from './key_value_store.js';
7
+ export { Dataset, RequestQueue, log, Log, LogLevel, Logger, LoggerJson, LoggerText, } from '@crawlee/core';
8
+ export { ApifyClient } from 'apify-client';