apify 4.0.0-beta.12 → 4.0.0-beta.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +12 -48
  2. package/dist/actor.d.ts +157 -61
  3. package/dist/actor.js +278 -91
  4. package/dist/apify_storage_client.d.ts +54 -0
  5. package/dist/apify_storage_client.js +152 -0
  6. package/dist/charging.d.ts +43 -2
  7. package/dist/charging.js +196 -54
  8. package/dist/configuration.d.ts +79 -132
  9. package/dist/configuration.js +114 -141
  10. package/dist/index.d.ts +2 -2
  11. package/dist/index.js +1 -2
  12. package/dist/input-schemas.d.ts +7 -0
  13. package/dist/input-schemas.js +58 -0
  14. package/dist/key_value_store.d.ts +8 -4
  15. package/dist/key_value_store.js +19 -11
  16. package/dist/platform_event_manager.d.ts +0 -1
  17. package/dist/platform_event_manager.js +5 -5
  18. package/dist/proxy_configuration.d.ts +41 -44
  19. package/dist/proxy_configuration.js +65 -103
  20. package/dist/storage.d.ts +58 -0
  21. package/dist/storage.js +79 -0
  22. package/dist/utils.d.ts +0 -1
  23. package/dist/utils.js +2 -4
  24. package/package.json +123 -73
  25. package/.turbo/turbo-build.log +0 -26
  26. package/.turbo/turbo-copy.log +0 -4
  27. package/dist/LICENSE.md +0 -201
  28. package/dist/README.md +0 -98
  29. package/dist/actor.d.ts.map +0 -1
  30. package/dist/actor.js.map +0 -1
  31. package/dist/charging.d.ts.map +0 -1
  32. package/dist/charging.js.map +0 -1
  33. package/dist/configuration.d.ts.map +0 -1
  34. package/dist/configuration.js.map +0 -1
  35. package/dist/index.d.ts.map +0 -1
  36. package/dist/index.js.map +0 -1
  37. package/dist/key_value_store.d.ts.map +0 -1
  38. package/dist/key_value_store.js.map +0 -1
  39. package/dist/package.json +0 -75
  40. package/dist/platform_event_manager.d.ts.map +0 -1
  41. package/dist/platform_event_manager.js.map +0 -1
  42. package/dist/proxy_configuration.d.ts.map +0 -1
  43. package/dist/proxy_configuration.js.map +0 -1
  44. package/dist/utils.d.ts.map +0 -1
  45. package/dist/utils.js.map +0 -1
@@ -1,34 +1,62 @@
1
- import type { ConfigurationOptions as CoreConfigurationOptions } from '@crawlee/core';
1
+ import type { ConfigField, FieldsInput, FieldsOutput } from '@crawlee/core';
2
2
  import { Configuration as CoreConfiguration } from '@crawlee/core';
3
- import type { META_ORIGINS } from '@apify/consts';
4
- export interface ConfigurationOptions extends CoreConfigurationOptions {
5
- metamorphAfterSleepMillis?: number;
6
- actorEventsWsUrl?: string;
7
- token?: string;
8
- actorId?: string;
9
- actorRunId?: string;
10
- actorTaskId?: string;
11
- apiBaseUrl?: string;
12
- apiPublicBaseUrl?: string;
13
- containerPort?: number;
14
- containerUrl?: string;
15
- proxyHostname?: string;
16
- proxyPassword?: string;
17
- proxyPort?: number;
18
- proxyStatusUrl?: string;
19
- /**
20
- * @deprecated use `containerPort` instead
21
- */
22
- standbyPort?: number;
23
- standbyUrl?: string;
24
- isAtHome?: boolean;
25
- userId?: string;
26
- inputSecretsPrivateKeyPassphrase?: string;
27
- inputSecretsPrivateKeyFile?: string;
28
- maxTotalChargeUsd?: number;
29
- metaOrigin?: (typeof META_ORIGINS)[keyof typeof META_ORIGINS];
30
- testPayPerEvent?: boolean;
31
- useChargingLogDataset?: boolean;
3
+ import { z } from 'zod';
4
+ export declare const apifyConfigFields: {
5
+ defaultDatasetId: ConfigField<z.ZodDefault<z.ZodString>>;
6
+ defaultKeyValueStoreId: ConfigField<z.ZodDefault<z.ZodString>>;
7
+ defaultRequestQueueId: ConfigField<z.ZodDefault<z.ZodString>>;
8
+ inputKey: ConfigField<z.ZodDefault<z.ZodString>>;
9
+ memoryMbytes: ConfigField<z.ZodOptional<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodNumber>>>;
10
+ availableMemoryRatio: ConfigField<any>;
11
+ disableBrowserSandbox: ConfigField<any>;
12
+ persistStateIntervalMillis: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodNumber>>>;
13
+ headless: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodBoolean>>>;
14
+ xvfb: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodBoolean>>>;
15
+ chromeExecutablePath: ConfigField<z.ZodOptional<z.ZodString>>;
16
+ defaultBrowserPath: ConfigField<z.ZodOptional<z.ZodString>>;
17
+ purgeOnStart: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodBoolean>>>;
18
+ metamorphAfterSleepMillis: ConfigField<any>;
19
+ actorEventsWsUrl: ConfigField<z.ZodOptional<z.ZodString>>;
20
+ token: ConfigField<z.ZodOptional<z.ZodString>>;
21
+ actorId: ConfigField<z.ZodOptional<z.ZodString>>;
22
+ actorRunId: ConfigField<z.ZodOptional<z.ZodString>>;
23
+ actorTaskId: ConfigField<z.ZodOptional<z.ZodString>>;
24
+ apiBaseUrl: ConfigField<z.ZodDefault<z.ZodString>>;
25
+ apiPublicBaseUrl: ConfigField<z.ZodDefault<z.ZodString>>;
26
+ containerPort: ConfigField<any>;
27
+ containerUrl: ConfigField<z.ZodDefault<z.ZodString>>;
28
+ proxyHostname: ConfigField<z.ZodDefault<z.ZodString>>;
29
+ proxyPassword: ConfigField<z.ZodOptional<z.ZodString>>;
30
+ proxyPort: ConfigField<any>;
31
+ proxyStatusUrl: ConfigField<z.ZodDefault<z.ZodString>>;
32
+ /** @deprecated use `containerPort` instead */
33
+ standbyPort: ConfigField<any>;
34
+ standbyUrl: ConfigField<z.ZodOptional<z.ZodString>>;
35
+ isAtHome: ConfigField<any>;
36
+ userId: ConfigField<z.ZodOptional<z.ZodString>>;
37
+ userIsPaying: ConfigField<z.ZodOptional<z.ZodString>>;
38
+ actorPermissionLevel: ConfigField<z.ZodOptional<z.ZodString>>;
39
+ inputSecretsPrivateKeyPassphrase: ConfigField<z.ZodOptional<z.ZodString>>;
40
+ inputSecretsPrivateKeyFile: ConfigField<z.ZodOptional<z.ZodString>>;
41
+ maxTotalChargeUsd: ConfigField<any>;
42
+ metaOrigin: ConfigField<z.ZodOptional<z.ZodString>>;
43
+ testPayPerEvent: ConfigField<any>;
44
+ useChargingLogDataset: ConfigField<any>;
45
+ actorPricingInfo: ConfigField<z.ZodOptional<z.ZodString>>;
46
+ chargedEventCounts: ConfigField<z.ZodOptional<z.ZodString>>;
47
+ actorStoragesJson: ConfigField<z.ZodOptional<z.ZodString>>;
48
+ storageClientOptions: ConfigField<z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
49
+ maxUsedCpuRatio: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodNumber>>>;
50
+ systemInfoIntervalMillis: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodNumber>>>;
51
+ logLevel: ConfigField<z.ZodOptional<z.ZodPipe<z.ZodTransform<{} | null | undefined, unknown>, z.ZodEnum<typeof import("@apify/log").LogLevel>>>>;
52
+ persistStorage: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodBoolean>>>;
53
+ containerized: ConfigField<z.ZodOptional<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodBoolean>>>;
54
+ };
55
+ export type ApifyConfigurationInput = FieldsInput<typeof apifyConfigFields>;
56
+ export type ApifyResolvedConfigValues = FieldsOutput<typeof apifyConfigFields>;
57
+ /** @deprecated Use {@link ApifyConfigurationInput} instead. */
58
+ export type ConfigurationOptions = ApifyConfigurationInput;
59
+ export interface Configuration extends ApifyResolvedConfigValues {
32
60
  }
33
61
  /**
34
62
  * `Configuration` is a value object holding the SDK configuration. We can use it in two ways:
@@ -37,38 +65,34 @@ export interface ConfigurationOptions extends CoreConfigurationOptions {
37
65
  *
38
66
  * ```javascript
39
67
  * import { Actor } from 'apify';
40
- * import { BasicCrawler } from 'crawlee';
41
68
  *
42
69
  * const sdk = new Actor({ token: '123' });
43
- * console.log(sdk.config.get('token')); // '123'
44
- *
45
- * const crawler = new BasicCrawler({
46
- * // ... crawler options
47
- * }, sdk.config);
70
+ * console.log(sdk.config.token); // '123'
48
71
  * ```
49
72
  *
50
73
  * 2. To get the global configuration (singleton instance). It will respect the environment variables.
51
74
  *
52
75
  * ```javascript
53
- * import { BasicCrawler, Configuration } from 'crawlee';
76
+ * import { Configuration } from 'apify';
54
77
  *
55
- * // Get the global configuration
56
78
  * const config = Configuration.getGlobalConfig();
57
- * // Set the 'persistStateIntervalMillis' option
58
- * // of global configuration to 30 seconds
59
- * config.set('persistStateIntervalMillis', 30_000);
60
- *
61
- * // No need to pass the configuration to the crawler,
62
- * // as it's using the global configuration by default
63
- * const crawler = new BasicCrawler();
79
+ * console.log(config.headless);
80
+ * console.log(config.persistStateIntervalMillis);
64
81
  * ```
65
82
  *
83
+ * Configuration is immutable — values are set via the constructor and cannot be changed afterwards.
84
+ * The priority order for resolving values is (highest to lowest):
85
+ *
86
+ * ```text
87
+ * constructor options > environment variables > crawlee.json > schema defaults
88
+ * ```
89
+ *
66
90
  * ## Supported Configuration Options
67
91
  *
68
92
  * Key | Environment Variable | Default Value
69
93
  * ---|---|---
70
94
  * `memoryMbytes` | `ACTOR_MEMORY_MBYTES` | -
71
- * `headless` | `APIFY_HEADLESS` | -
95
+ * `headless` | `APIFY_HEADLESS` | `true`
72
96
  * `persistStateIntervalMillis` | `APIFY_PERSIST_STATE_INTERVAL_MILLIS` | `60e3`
73
97
  * `token` | `APIFY_TOKEN` | -
74
98
  * `isAtHome` | `APIFY_IS_AT_HOME` | -
@@ -102,95 +126,18 @@ export interface ConfigurationOptions extends CoreConfigurationOptions {
102
126
  * `defaultBrowserPath` | `APIFY_DEFAULT_BROWSER_PATH` | -
103
127
  */
104
128
  export declare class Configuration extends CoreConfiguration {
105
- /** @inheritDoc */
129
+ /** @internal */
106
130
  static globalConfig?: Configuration;
107
- protected static ENV_MAP: {
108
- APIFY_AVAILABLE_MEMORY_RATIO: string;
109
- APIFY_PURGE_ON_START: string;
110
- APIFY_MEMORY_MBYTES: string;
111
- APIFY_DEFAULT_DATASET_ID: string;
112
- APIFY_DEFAULT_KEY_VALUE_STORE_ID: string;
113
- APIFY_DEFAULT_REQUEST_QUEUE_ID: string;
114
- APIFY_INPUT_KEY: string;
115
- APIFY_PERSIST_STATE_INTERVAL_MILLIS: string;
116
- APIFY_HEADLESS: string;
117
- APIFY_XVFB: string;
118
- APIFY_CHROME_EXECUTABLE_PATH: string;
119
- APIFY_DEFAULT_BROWSER_PATH: string;
120
- APIFY_DISABLE_BROWSER_SANDBOX: string;
121
- APIFY_TOKEN: string;
122
- APIFY_METAMORPH_AFTER_SLEEP_MILLIS: string;
123
- APIFY_TEST_PERSIST_INTERVAL_MILLIS: string;
124
- APIFY_ACTOR_EVENTS_WS_URL: string;
125
- APIFY_ACTOR_ID: string;
126
- APIFY_API_BASE_URL: string;
127
- APIFY_API_PUBLIC_BASE_URL: string;
128
- APIFY_IS_AT_HOME: string;
129
- APIFY_ACTOR_RUN_ID: string;
130
- APIFY_ACTOR_TASK_ID: string;
131
- APIFY_CONTAINER_PORT: string;
132
- APIFY_CONTAINER_URL: string;
133
- APIFY_USER_ID: string;
134
- APIFY_PROXY_HOSTNAME: string;
135
- APIFY_PROXY_PASSWORD: string;
136
- APIFY_PROXY_STATUS_URL: string;
137
- APIFY_PROXY_PORT: string;
138
- APIFY_INPUT_SECRETS_PRIVATE_KEY_FILE: string;
139
- APIFY_INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE: string;
140
- APIFY_META_ORIGIN: string;
141
- ACTOR_DEFAULT_DATASET_ID: string;
142
- ACTOR_DEFAULT_KEY_VALUE_STORE_ID: string;
143
- ACTOR_DEFAULT_REQUEST_QUEUE_ID: string;
144
- ACTOR_EVENTS_WEBSOCKET_URL: string;
145
- ACTOR_ID: string;
146
- ACTOR_INPUT_KEY: string;
147
- ACTOR_MEMORY_MBYTES: string;
148
- ACTOR_RUN_ID: string;
149
- ACTOR_STANDBY_PORT: string;
150
- ACTOR_STANDBY_URL: string;
151
- ACTOR_TASK_ID: string;
152
- ACTOR_WEB_SERVER_PORT: string;
153
- ACTOR_WEB_SERVER_URL: string;
154
- ACTOR_MAX_TOTAL_CHARGE_USD: string;
155
- ACTOR_TEST_PAY_PER_EVENT: string;
156
- ACTOR_USE_CHARGING_LOG_DATASET: string;
157
- };
158
- protected static INTEGER_VARS: string[];
159
- protected static BOOLEAN_VARS: string[];
160
- protected static DEFAULTS: {
161
- defaultKeyValueStoreId: string;
162
- defaultDatasetId: string;
163
- defaultRequestQueueId: string;
164
- inputKey: string;
165
- apiBaseUrl: string;
166
- apiPublicBaseUrl: string;
167
- proxyStatusUrl: string;
168
- proxyHostname: string;
169
- proxyPort: number;
170
- containerPort: number;
171
- containerUrl: string;
172
- standbyPort: number;
173
- metamorphAfterSleepMillis: number;
174
- persistStateIntervalMillis: number;
175
- testPayPerEvent: boolean;
176
- useChargingLogDataset: boolean;
177
- };
178
- /**
179
- * @inheritDoc
180
- */
181
- get<T extends keyof ConfigurationOptions, U extends ConfigurationOptions[T]>(key: T, defaultValue?: U): U;
182
- /**
183
- * @inheritDoc
184
- */
185
- set(key: keyof ConfigurationOptions, value?: any): void;
131
+ protected static fields: Record<string, ConfigField>;
132
+ constructor(options?: ApifyConfigurationInput);
186
133
  /**
187
134
  * @inheritDoc
135
+ *
136
+ * Returns the SDK's global {@link Configuration} singleton (an
137
+ * Apify-typed default that parses `APIFY_*` env vars). During an Actor run
138
+ * the active configuration is held by crawlee's `serviceLocator`, which is
139
+ * what crawlee internals resolve against; this singleton is only the
140
+ * fallback for code reaching for a configuration without an explicit one.
188
141
  */
189
142
  static getGlobalConfig(): Configuration;
190
- /**
191
- * Resets global configuration instance. The default instance holds configuration based on env vars,
192
- * if we want to change them, we need to first reset the global state. Used mainly for testing purposes.
193
- */
194
- static resetGlobalState(): void;
195
143
  }
196
- //# sourceMappingURL=configuration.d.ts.map
@@ -1,5 +1,91 @@
1
- import { Configuration as CoreConfiguration } from '@crawlee/core';
2
- import { ACTOR_ENV_VARS, APIFY_ENV_VARS, LOCAL_ACTOR_ENV_VARS, LOCAL_APIFY_ENV_VARS, } from '@apify/consts';
1
+ /* eslint-disable no-use-before-define */
2
+ import { coerceBoolean, coerceNumber, Configuration as CoreConfiguration, crawleeConfigFields, field, } from '@crawlee/core';
3
+ import { z } from 'zod';
4
+ import { ACTOR_ENV_VARS, APIFY_ENV_VARS, LOCAL_ACTOR_ENV_VARS, LOCAL_APIFY_ENV_VARS } from '@apify/consts';
5
+ // Evaluated lazily (per Configuration construction, not at import) so the
6
+ // at-home defaults for `availableMemoryRatio` / `disableBrowserSandbox` below
7
+ // react to `APIFY_IS_AT_HOME` even if it changes after import (tests, embedding).
8
+ // Reads the env var, not the resolved `isAtHome` field, so an explicit
9
+ // `new Configuration({ isAtHome: true })` won't flip these — env is the source.
10
+ const isAtHome = () => !!process.env[APIFY_ENV_VARS.IS_AT_HOME];
11
+ function withApifyEnv(base, apifyEnvVars, schema) {
12
+ const crawleeVars = base.envVar == null ? [] : [base.envVar].flat();
13
+ return field((schema ?? base.schema), [...[apifyEnvVars].flat(), ...crawleeVars]);
14
+ }
15
+ // --- Apify config field definitions ---
16
+ export const apifyConfigFields = {
17
+ // Inherit all crawlee fields as-is.
18
+ ...crawleeConfigFields,
19
+ // Crawlee fields the SDK extends with ACTOR_/APIFY_ env-var aliases (which
20
+ // take precedence; crawlee's own CRAWLEE_* var is reused as the fallback,
21
+ // never re-typed). A schema is passed only where the SDK needs a different
22
+ // default than crawlee's.
23
+ defaultDatasetId: withApifyEnv(crawleeConfigFields.defaultDatasetId, [ACTOR_ENV_VARS.DEFAULT_DATASET_ID, APIFY_ENV_VARS.DEFAULT_DATASET_ID], z.string().default(LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_DATASET_ID])),
24
+ defaultKeyValueStoreId: withApifyEnv(crawleeConfigFields.defaultKeyValueStoreId, [ACTOR_ENV_VARS.DEFAULT_KEY_VALUE_STORE_ID, APIFY_ENV_VARS.DEFAULT_KEY_VALUE_STORE_ID], z.string().default(LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_KEY_VALUE_STORE_ID])),
25
+ defaultRequestQueueId: withApifyEnv(crawleeConfigFields.defaultRequestQueueId, [ACTOR_ENV_VARS.DEFAULT_REQUEST_QUEUE_ID, APIFY_ENV_VARS.DEFAULT_REQUEST_QUEUE_ID], z.string().default(LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_REQUEST_QUEUE_ID])),
26
+ inputKey: withApifyEnv(crawleeConfigFields.inputKey, [ACTOR_ENV_VARS.INPUT_KEY, APIFY_ENV_VARS.INPUT_KEY]),
27
+ memoryMbytes: withApifyEnv(crawleeConfigFields.memoryMbytes, [
28
+ ACTOR_ENV_VARS.MEMORY_MBYTES,
29
+ APIFY_ENV_VARS.MEMORY_MBYTES,
30
+ ]),
31
+ availableMemoryRatio: withApifyEnv(crawleeConfigFields.availableMemoryRatio, 'APIFY_AVAILABLE_MEMORY_RATIO', coerceNumber.default(() => (isAtHome() ? 1 : 0.25))),
32
+ disableBrowserSandbox: withApifyEnv(crawleeConfigFields.disableBrowserSandbox, 'APIFY_DISABLE_BROWSER_SANDBOX', coerceBoolean.optional().default(() => (isAtHome() ? true : undefined))),
33
+ persistStateIntervalMillis: withApifyEnv(crawleeConfigFields.persistStateIntervalMillis, [
34
+ APIFY_ENV_VARS.PERSIST_STATE_INTERVAL_MILLIS,
35
+ 'APIFY_TEST_PERSIST_INTERVAL_MILLIS',
36
+ ]),
37
+ headless: withApifyEnv(crawleeConfigFields.headless, APIFY_ENV_VARS.HEADLESS),
38
+ xvfb: withApifyEnv(crawleeConfigFields.xvfb, APIFY_ENV_VARS.XVFB),
39
+ chromeExecutablePath: withApifyEnv(crawleeConfigFields.chromeExecutablePath, APIFY_ENV_VARS.CHROME_EXECUTABLE_PATH),
40
+ defaultBrowserPath: withApifyEnv(crawleeConfigFields.defaultBrowserPath, 'APIFY_DEFAULT_BROWSER_PATH'),
41
+ purgeOnStart: withApifyEnv(crawleeConfigFields.purgeOnStart, APIFY_ENV_VARS.PURGE_ON_START),
42
+ // Apify-specific fields
43
+ metamorphAfterSleepMillis: field(coerceNumber.default(300_000), APIFY_ENV_VARS.METAMORPH_AFTER_SLEEP_MILLIS),
44
+ actorEventsWsUrl: field(z.string().optional(), [
45
+ ACTOR_ENV_VARS.EVENTS_WEBSOCKET_URL,
46
+ APIFY_ENV_VARS.ACTOR_EVENTS_WS_URL,
47
+ ]),
48
+ token: field(z.string().optional(), APIFY_ENV_VARS.TOKEN),
49
+ actorId: field(z.string().optional(), [ACTOR_ENV_VARS.ID, APIFY_ENV_VARS.ACTOR_ID]),
50
+ actorRunId: field(z.string().optional(), [ACTOR_ENV_VARS.RUN_ID, APIFY_ENV_VARS.ACTOR_RUN_ID]),
51
+ actorTaskId: field(z.string().optional(), [ACTOR_ENV_VARS.TASK_ID, APIFY_ENV_VARS.ACTOR_TASK_ID]),
52
+ apiBaseUrl: field(z.string().default('https://api.apify.com'), APIFY_ENV_VARS.API_BASE_URL),
53
+ apiPublicBaseUrl: field(z.string().default('https://api.apify.com'), APIFY_ENV_VARS.API_PUBLIC_BASE_URL),
54
+ containerPort: field(coerceNumber.default(+LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.WEB_SERVER_PORT]), [
55
+ ACTOR_ENV_VARS.WEB_SERVER_PORT,
56
+ APIFY_ENV_VARS.CONTAINER_PORT,
57
+ ]),
58
+ containerUrl: field(z.string().default(LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.WEB_SERVER_URL]), [
59
+ ACTOR_ENV_VARS.WEB_SERVER_URL,
60
+ APIFY_ENV_VARS.CONTAINER_URL,
61
+ ]),
62
+ proxyHostname: field(z.string().default(LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_HOSTNAME]), APIFY_ENV_VARS.PROXY_HOSTNAME),
63
+ proxyPassword: field(z.string().optional(), APIFY_ENV_VARS.PROXY_PASSWORD),
64
+ proxyPort: field(coerceNumber.default(+LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_PORT]), APIFY_ENV_VARS.PROXY_PORT),
65
+ proxyStatusUrl: field(z.string().default('http://proxy.apify.com'), APIFY_ENV_VARS.PROXY_STATUS_URL),
66
+ /** @deprecated use `containerPort` instead */
67
+ standbyPort: field(coerceNumber.default(+LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.STANDBY_PORT]), ACTOR_ENV_VARS.STANDBY_PORT),
68
+ standbyUrl: field(z.string().optional(), ACTOR_ENV_VARS.STANDBY_URL),
69
+ isAtHome: field(coerceBoolean.default(false), APIFY_ENV_VARS.IS_AT_HOME),
70
+ userId: field(z.string().optional(), APIFY_ENV_VARS.USER_ID),
71
+ userIsPaying: field(z.string().optional(), APIFY_ENV_VARS.USER_IS_PAYING),
72
+ actorPermissionLevel: field(z.string().optional(), ACTOR_ENV_VARS.PERMISSION_LEVEL),
73
+ inputSecretsPrivateKeyPassphrase: field(z.string().optional(), APIFY_ENV_VARS.INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE),
74
+ inputSecretsPrivateKeyFile: field(z.string().optional(), APIFY_ENV_VARS.INPUT_SECRETS_PRIVATE_KEY_FILE),
75
+ // `0` is treated as "no limit" (mirrors the Apify platform contract).
76
+ maxTotalChargeUsd: field(coerceNumber.transform((val) => (val === 0 ? Infinity : val)).default(Infinity), ACTOR_ENV_VARS.MAX_TOTAL_CHARGE_USD),
77
+ metaOrigin: field(z.string().optional(), APIFY_ENV_VARS.META_ORIGIN),
78
+ testPayPerEvent: field(coerceBoolean.default(false), 'ACTOR_TEST_PAY_PER_EVENT'),
79
+ useChargingLogDataset: field(coerceBoolean.default(false), 'ACTOR_USE_CHARGING_LOG_DATASET'),
80
+ // Pay-per-event charging metadata injected by the platform (JSON strings).
81
+ actorPricingInfo: field(z.string().optional(), 'APIFY_ACTOR_PRICING_INFO'),
82
+ chargedEventCounts: field(z.string().optional(), 'APIFY_CHARGED_ACTOR_EVENT_COUNTS'),
83
+ actorStoragesJson: field(z.string().optional(), 'ACTOR_STORAGES_JSON'),
84
+ // Grab-bag of ApifyClient constructor options; the `storageDir` key is
85
+ // pulled out separately for local storage emulation, the rest is spread
86
+ // into `new ApifyClient({...})` in `Actor.newClient()`. No env var alias.
87
+ storageClientOptions: field(z.record(z.string(), z.unknown()).optional()),
88
+ };
3
89
  /**
4
90
  * `Configuration` is a value object holding the SDK configuration. We can use it in two ways:
5
91
  *
@@ -7,38 +93,34 @@ import { ACTOR_ENV_VARS, APIFY_ENV_VARS, LOCAL_ACTOR_ENV_VARS, LOCAL_APIFY_ENV_V
7
93
  *
8
94
  * ```javascript
9
95
  * import { Actor } from 'apify';
10
- * import { BasicCrawler } from 'crawlee';
11
96
  *
12
97
  * const sdk = new Actor({ token: '123' });
13
- * console.log(sdk.config.get('token')); // '123'
14
- *
15
- * const crawler = new BasicCrawler({
16
- * // ... crawler options
17
- * }, sdk.config);
98
+ * console.log(sdk.config.token); // '123'
18
99
  * ```
19
100
  *
20
101
  * 2. To get the global configuration (singleton instance). It will respect the environment variables.
21
102
  *
22
103
  * ```javascript
23
- * import { BasicCrawler, Configuration } from 'crawlee';
104
+ * import { Configuration } from 'apify';
24
105
  *
25
- * // Get the global configuration
26
106
  * const config = Configuration.getGlobalConfig();
27
- * // Set the 'persistStateIntervalMillis' option
28
- * // of global configuration to 30 seconds
29
- * config.set('persistStateIntervalMillis', 30_000);
30
- *
31
- * // No need to pass the configuration to the crawler,
32
- * // as it's using the global configuration by default
33
- * const crawler = new BasicCrawler();
107
+ * console.log(config.headless);
108
+ * console.log(config.persistStateIntervalMillis);
34
109
  * ```
35
110
  *
111
+ * Configuration is immutable — values are set via the constructor and cannot be changed afterwards.
112
+ * The priority order for resolving values is (highest to lowest):
113
+ *
114
+ * ```text
115
+ * constructor options > environment variables > crawlee.json > schema defaults
116
+ * ```
117
+ *
36
118
  * ## Supported Configuration Options
37
119
  *
38
120
  * Key | Environment Variable | Default Value
39
121
  * ---|---|---
40
122
  * `memoryMbytes` | `ACTOR_MEMORY_MBYTES` | -
41
- * `headless` | `APIFY_HEADLESS` | -
123
+ * `headless` | `APIFY_HEADLESS` | `true`
42
124
  * `persistStateIntervalMillis` | `APIFY_PERSIST_STATE_INTERVAL_MILLIS` | `60e3`
43
125
  * `token` | `APIFY_TOKEN` | -
44
126
  * `isAtHome` | `APIFY_IS_AT_HOME` | -
@@ -71,137 +153,28 @@ import { ACTOR_ENV_VARS, APIFY_ENV_VARS, LOCAL_ACTOR_ENV_VARS, LOCAL_APIFY_ENV_V
71
153
  * `chromeExecutablePath` | `APIFY_CHROME_EXECUTABLE_PATH` | -
72
154
  * `defaultBrowserPath` | `APIFY_DEFAULT_BROWSER_PATH` | -
73
155
  */
156
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-declaration-merging
74
157
  export class Configuration extends CoreConfiguration {
75
- /** @inheritDoc */
76
- // eslint-disable-next-line no-use-before-define -- Self-reference
158
+ /** @internal */
77
159
  static globalConfig;
78
- // maps environment variables to config keys (e.g. `APIFY_MEMORY_MBYTES` to `memoryMbytes`)
79
- static ENV_MAP = {
80
- // regular crawlee env vars are also supported
81
- ...CoreConfiguration.ENV_MAP,
82
- // support crawlee env vars prefixed with `APIFY_` too
83
- APIFY_AVAILABLE_MEMORY_RATIO: 'availableMemoryRatio',
84
- APIFY_PURGE_ON_START: 'purgeOnStart',
85
- APIFY_MEMORY_MBYTES: 'memoryMbytes',
86
- APIFY_DEFAULT_DATASET_ID: 'defaultDatasetId',
87
- APIFY_DEFAULT_KEY_VALUE_STORE_ID: 'defaultKeyValueStoreId',
88
- APIFY_DEFAULT_REQUEST_QUEUE_ID: 'defaultRequestQueueId',
89
- APIFY_INPUT_KEY: 'inputKey',
90
- APIFY_PERSIST_STATE_INTERVAL_MILLIS: 'persistStateIntervalMillis',
91
- APIFY_HEADLESS: 'headless',
92
- APIFY_XVFB: 'xvfb',
93
- APIFY_CHROME_EXECUTABLE_PATH: 'chromeExecutablePath',
94
- APIFY_DEFAULT_BROWSER_PATH: 'defaultBrowserPath',
95
- APIFY_DISABLE_BROWSER_SANDBOX: 'disableBrowserSandbox',
96
- // as well as apify specific ones
97
- APIFY_TOKEN: 'token',
98
- APIFY_METAMORPH_AFTER_SLEEP_MILLIS: 'metamorphAfterSleepMillis',
99
- APIFY_TEST_PERSIST_INTERVAL_MILLIS: 'persistStateIntervalMillis', // for BC, seems to be unused
100
- APIFY_ACTOR_EVENTS_WS_URL: 'actorEventsWsUrl',
101
- APIFY_ACTOR_ID: 'actorId',
102
- APIFY_API_BASE_URL: 'apiBaseUrl',
103
- APIFY_API_PUBLIC_BASE_URL: 'apiPublicBaseUrl',
104
- APIFY_IS_AT_HOME: 'isAtHome',
105
- APIFY_ACTOR_RUN_ID: 'actorRunId',
106
- APIFY_ACTOR_TASK_ID: 'actorTaskId',
107
- APIFY_CONTAINER_PORT: 'containerPort',
108
- APIFY_CONTAINER_URL: 'containerUrl',
109
- APIFY_USER_ID: 'userId',
110
- APIFY_PROXY_HOSTNAME: 'proxyHostname',
111
- APIFY_PROXY_PASSWORD: 'proxyPassword',
112
- APIFY_PROXY_STATUS_URL: 'proxyStatusUrl',
113
- APIFY_PROXY_PORT: 'proxyPort',
114
- APIFY_INPUT_SECRETS_PRIVATE_KEY_FILE: 'inputSecretsPrivateKeyFile',
115
- APIFY_INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE: 'inputSecretsPrivateKeyPassphrase',
116
- APIFY_META_ORIGIN: 'metaOrigin',
117
- // Actor env vars
118
- ACTOR_DEFAULT_DATASET_ID: 'defaultDatasetId',
119
- ACTOR_DEFAULT_KEY_VALUE_STORE_ID: 'defaultKeyValueStoreId',
120
- ACTOR_DEFAULT_REQUEST_QUEUE_ID: 'defaultRequestQueueId',
121
- ACTOR_EVENTS_WEBSOCKET_URL: 'actorEventsWsUrl',
122
- ACTOR_ID: 'actorId',
123
- ACTOR_INPUT_KEY: 'inputKey',
124
- ACTOR_MEMORY_MBYTES: 'memoryMbytes',
125
- ACTOR_RUN_ID: 'actorRunId',
126
- ACTOR_STANDBY_PORT: 'standbyPort',
127
- ACTOR_STANDBY_URL: 'standbyUrl',
128
- ACTOR_TASK_ID: 'actorTaskId',
129
- ACTOR_WEB_SERVER_PORT: 'containerPort',
130
- ACTOR_WEB_SERVER_URL: 'containerUrl',
131
- ACTOR_MAX_TOTAL_CHARGE_USD: 'maxTotalChargeUsd',
132
- ACTOR_TEST_PAY_PER_EVENT: 'testPayPerEvent',
133
- ACTOR_USE_CHARGING_LOG_DATASET: 'useChargingLogDataset',
134
- };
135
- static INTEGER_VARS = [
136
- ...CoreConfiguration.INTEGER_VARS,
137
- 'proxyPort',
138
- 'containerPort',
139
- 'metamorphAfterSleepMillis',
140
- 'maxTotalChargeUsd',
141
- ];
142
- static BOOLEAN_VARS = [
143
- ...CoreConfiguration.BOOLEAN_VARS,
144
- 'isAtHome',
145
- 'testPayPerEvent',
146
- 'useChargingLogDataset',
147
- ];
148
- static DEFAULTS = {
149
- ...CoreConfiguration.DEFAULTS,
150
- defaultKeyValueStoreId: LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_KEY_VALUE_STORE_ID],
151
- defaultDatasetId: LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_DATASET_ID],
152
- defaultRequestQueueId: LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_REQUEST_QUEUE_ID],
153
- inputKey: 'INPUT',
154
- apiBaseUrl: 'https://api.apify.com',
155
- apiPublicBaseUrl: 'https://api.apify.com',
156
- proxyStatusUrl: 'http://proxy.apify.com',
157
- proxyHostname: LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_HOSTNAME],
158
- proxyPort: +LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_PORT],
159
- containerPort: +LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.WEB_SERVER_PORT],
160
- containerUrl: LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.WEB_SERVER_URL],
161
- standbyPort: +LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.STANDBY_PORT],
162
- metamorphAfterSleepMillis: 300e3,
163
- persistStateIntervalMillis: 60e3, // This value is mentioned in jsdoc in `events.js`, if you update it here, update it there too.
164
- testPayPerEvent: false,
165
- useChargingLogDataset: false,
166
- };
167
- /**
168
- * @inheritDoc
169
- */
170
- get(key, defaultValue) {
171
- return super.get(key, defaultValue);
172
- }
173
- /**
174
- * @inheritDoc
175
- */
176
- set(key, value) {
177
- super.set(key, value);
160
+ static fields = apifyConfigFields;
161
+ constructor(options = {}) {
162
+ // `super` types its options against crawlee's field set; ours is a
163
+ // superset (apifyConfigFields spreads crawleeConfigFields), so the
164
+ // shapes are runtime-compatible but not TS-assignable hence the cast.
165
+ super(options);
178
166
  }
179
167
  /**
180
168
  * @inheritDoc
169
+ *
170
+ * Returns the SDK's global {@link Configuration} singleton (an
171
+ * Apify-typed default that parses `APIFY_*` env vars). During an Actor run
172
+ * the active configuration is held by crawlee's `serviceLocator`, which is
173
+ * what crawlee internals resolve against; this singleton is only the
174
+ * fallback for code reaching for a configuration without an explicit one.
181
175
  */
182
176
  static getGlobalConfig() {
183
- if (Configuration.storage.getStore()) {
184
- return Configuration.storage.getStore();
185
- }
186
177
  Configuration.globalConfig ??= new Configuration();
187
178
  return Configuration.globalConfig;
188
179
  }
189
- /**
190
- * Resets global configuration instance. The default instance holds configuration based on env vars,
191
- * if we want to change them, we need to first reset the global state. Used mainly for testing purposes.
192
- */
193
- static resetGlobalState() {
194
- delete this.globalConfig;
195
- }
196
180
  }
197
- // monkey patch the core class so it respects the new options too
198
- CoreConfiguration.getGlobalConfig = Configuration.getGlobalConfig;
199
- // @ts-expect-error protected property
200
- CoreConfiguration.ENV_MAP = Configuration.ENV_MAP;
201
- // @ts-expect-error protected property
202
- CoreConfiguration.INTEGER_VARS = Configuration.INTEGER_VARS;
203
- // @ts-expect-error protected property
204
- CoreConfiguration.BOOLEAN_VARS = Configuration.BOOLEAN_VARS;
205
- // @ts-expect-error protected property
206
- CoreConfiguration.DEFAULTS = Configuration.DEFAULTS;
207
- //# sourceMappingURL=configuration.js.map
package/dist/index.d.ts CHANGED
@@ -1,9 +1,9 @@
1
1
  export * from './actor.js';
2
- export * from './charging.js';
2
+ export type { OpenStorageOptions, StorageAlias, StorageId, StorageName, StorageIdentifier, StorageIdentifierWithoutAlias, } from './storage.js';
3
+ export { ChargeOptions, ChargeResult, ActorPricingInfo, ChargingManager } from './charging.js';
3
4
  export * from './configuration.js';
4
5
  export * from './proxy_configuration.js';
5
6
  export * from './platform_event_manager.js';
6
7
  export * from './key_value_store.js';
7
8
  export { Dataset, DatasetDataOptions, DatasetIteratorOptions, DatasetConsumer, DatasetMapper, DatasetReducer, DatasetOptions, DatasetContent, RequestQueue, QueueOperationInfo, RequestQueueOperationOptions, RequestQueueOptions, KeyConsumer, KeyValueStoreOptions, RecordOptions, KeyValueStoreIteratorOptions, log, Log, LoggerOptions, LogLevel, Logger, LoggerJson, LoggerText, } from '@crawlee/core';
8
9
  export { ApifyClient, ApifyClientOptions } from 'apify-client';
9
- //# sourceMappingURL=index.d.ts.map
package/dist/index.js CHANGED
@@ -1,9 +1,8 @@
1
1
  export * from './actor.js';
2
- export * from './charging.js';
2
+ export { ChargingManager } from './charging.js';
3
3
  export * from './configuration.js';
4
4
  export * from './proxy_configuration.js';
5
5
  export * from './platform_event_manager.js';
6
6
  export * from './key_value_store.js';
7
7
  export { Dataset, RequestQueue, log, Log, LogLevel, Logger, LoggerJson, LoggerText, } from '@crawlee/core';
8
8
  export { ApifyClient } from 'apify-client';
9
- //# sourceMappingURL=index.js.map
@@ -0,0 +1,7 @@
1
+ import type { Dictionary } from '@crawlee/utils';
2
+ /**
3
+ * @ignore
4
+ */
5
+ export declare const noActorInputSchemaDefinedMarker: unique symbol;
6
+ export declare const readInputSchema: () => Dictionary | null | typeof noActorInputSchemaDefinedMarker;
7
+ export declare const getDefaultsFromInputSchema: (inputSchema: any) => Record<string, unknown>;
@@ -0,0 +1,58 @@
1
+ // TODO: https://github.com/apify/apify-shared-js/issues/547
2
+ import { existsSync, readFileSync } from 'node:fs';
3
+ import { join } from 'node:path';
4
+ import process from 'node:process';
5
+ // These paths are used *if* there is no `input` field in the actor.json configuration file!
6
+ const DEFAULT_INPUT_SCHEMA_PATHS = [
7
+ ['.actor', 'INPUT_SCHEMA.json'],
8
+ ['INPUT_SCHEMA.json'],
9
+ ['.actor', 'input_schema.json'],
10
+ ['input_schema.json'],
11
+ ];
12
+ const ACTOR_SPECIFICATION_FOLDER = '.actor';
13
+ const LOCAL_CONFIG_NAME = 'actor.json';
14
+ const readJSONIfExists = (path) => {
15
+ if (existsSync(path)) {
16
+ const content = readFileSync(path, 'utf8');
17
+ return JSON.parse(content);
18
+ }
19
+ return null;
20
+ };
21
+ /**
22
+ * @ignore
23
+ */
24
+ export const noActorInputSchemaDefinedMarker = Symbol.for('apify.noActorInputSchemaDefined');
25
+ export const readInputSchema = () => {
26
+ const localConfig = readJSONIfExists(join(process.cwd(), ACTOR_SPECIFICATION_FOLDER, LOCAL_CONFIG_NAME));
27
+ // Input schema nested in the actor config
28
+ if (typeof localConfig?.input === 'object') {
29
+ return localConfig.input;
30
+ }
31
+ // Input schema path from the actor config
32
+ if (typeof localConfig?.input === 'string') {
33
+ const fullPath = join(process.cwd(), ACTOR_SPECIFICATION_FOLDER, localConfig.input);
34
+ return readJSONIfExists(fullPath);
35
+ }
36
+ // Try to find it from possible default paths
37
+ for (const path of DEFAULT_INPUT_SCHEMA_PATHS) {
38
+ const fullPath = join(process.cwd(), ...path);
39
+ const result = readJSONIfExists(fullPath);
40
+ if (result) {
41
+ return result;
42
+ }
43
+ }
44
+ // If we are in an Actor context, BUT we do not have an input schema defined, we want to skip the warning
45
+ if (!localConfig?.input) {
46
+ return noActorInputSchemaDefinedMarker;
47
+ }
48
+ return null;
49
+ };
50
+ export const getDefaultsFromInputSchema = (inputSchema) => {
51
+ const defaults = {};
52
+ for (const [key, fieldSchema] of Object.entries(inputSchema.properties)) {
53
+ if (fieldSchema.default !== undefined) {
54
+ defaults[key] = fieldSchema.default;
55
+ }
56
+ }
57
+ return defaults;
58
+ };
@@ -1,4 +1,4 @@
1
- import type { StorageManagerOptions } from '@crawlee/core';
1
+ import type { StorageOpenOptions } from '@crawlee/core';
2
2
  import { KeyValueStore as CoreKeyValueStore } from '@crawlee/core';
3
3
  /**
4
4
  * @inheritDoc
@@ -7,11 +7,15 @@ export declare class KeyValueStore extends CoreKeyValueStore {
7
7
  /**
8
8
  * Returns a URL for the given key that may be used to publicly
9
9
  * access the value in the remote key-value store.
10
+ *
11
+ * On the Apify platform the URL is signed with the store's
12
+ * `urlSigningSecretKey` so that anyone with the URL can read the record
13
+ * without authentication. Locally we delegate to crawlee's default
14
+ * implementation (which produces a `file://` URL or returns `undefined`).
10
15
  */
11
- getPublicUrl(key: string): string;
16
+ getPublicUrl(key: string): Promise<string | undefined>;
12
17
  /**
13
18
  * @inheritDoc
14
19
  */
15
- static open(storeIdOrName?: string | null, options?: StorageManagerOptions): Promise<KeyValueStore>;
20
+ static open(storeIdOrName?: string | null, options?: StorageOpenOptions): Promise<KeyValueStore>;
16
21
  }
17
- //# sourceMappingURL=key_value_store.d.ts.map