apify 4.0.0-beta.12 → 4.0.0-beta.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -48
- package/dist/actor.d.ts +157 -61
- package/dist/actor.js +278 -91
- package/dist/apify_storage_client.d.ts +54 -0
- package/dist/apify_storage_client.js +152 -0
- package/dist/charging.d.ts +43 -2
- package/dist/charging.js +196 -54
- package/dist/configuration.d.ts +79 -132
- package/dist/configuration.js +114 -141
- package/dist/index.d.ts +2 -2
- package/dist/index.js +1 -2
- package/dist/input-schemas.d.ts +7 -0
- package/dist/input-schemas.js +58 -0
- package/dist/key_value_store.d.ts +8 -4
- package/dist/key_value_store.js +19 -11
- package/dist/platform_event_manager.d.ts +0 -1
- package/dist/platform_event_manager.js +5 -5
- package/dist/proxy_configuration.d.ts +41 -44
- package/dist/proxy_configuration.js +65 -103
- package/dist/storage.d.ts +58 -0
- package/dist/storage.js +79 -0
- package/dist/utils.d.ts +0 -1
- package/dist/utils.js +2 -4
- package/package.json +123 -73
- package/.turbo/turbo-build.log +0 -26
- package/.turbo/turbo-copy.log +0 -4
- package/dist/LICENSE.md +0 -201
- package/dist/README.md +0 -98
- package/dist/actor.d.ts.map +0 -1
- package/dist/actor.js.map +0 -1
- package/dist/charging.d.ts.map +0 -1
- package/dist/charging.js.map +0 -1
- package/dist/configuration.d.ts.map +0 -1
- package/dist/configuration.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/key_value_store.d.ts.map +0 -1
- package/dist/key_value_store.js.map +0 -1
- package/dist/package.json +0 -75
- package/dist/platform_event_manager.d.ts.map +0 -1
- package/dist/platform_event_manager.js.map +0 -1
- package/dist/proxy_configuration.d.ts.map +0 -1
- package/dist/proxy_configuration.js.map +0 -1
- package/dist/utils.d.ts.map +0 -1
- package/dist/utils.js.map +0 -1
package/dist/configuration.d.ts
CHANGED
|
@@ -1,34 +1,62 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { ConfigField, FieldsInput, FieldsOutput } from '@crawlee/core';
|
|
2
2
|
import { Configuration as CoreConfiguration } from '@crawlee/core';
|
|
3
|
-
import
|
|
4
|
-
export
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
3
|
+
import { z } from 'zod';
|
|
4
|
+
export declare const apifyConfigFields: {
|
|
5
|
+
defaultDatasetId: ConfigField<z.ZodDefault<z.ZodString>>;
|
|
6
|
+
defaultKeyValueStoreId: ConfigField<z.ZodDefault<z.ZodString>>;
|
|
7
|
+
defaultRequestQueueId: ConfigField<z.ZodDefault<z.ZodString>>;
|
|
8
|
+
inputKey: ConfigField<z.ZodDefault<z.ZodString>>;
|
|
9
|
+
memoryMbytes: ConfigField<z.ZodOptional<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodNumber>>>;
|
|
10
|
+
availableMemoryRatio: ConfigField<any>;
|
|
11
|
+
disableBrowserSandbox: ConfigField<any>;
|
|
12
|
+
persistStateIntervalMillis: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodNumber>>>;
|
|
13
|
+
headless: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodBoolean>>>;
|
|
14
|
+
xvfb: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodBoolean>>>;
|
|
15
|
+
chromeExecutablePath: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
16
|
+
defaultBrowserPath: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
17
|
+
purgeOnStart: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodBoolean>>>;
|
|
18
|
+
metamorphAfterSleepMillis: ConfigField<any>;
|
|
19
|
+
actorEventsWsUrl: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
20
|
+
token: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
21
|
+
actorId: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
22
|
+
actorRunId: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
23
|
+
actorTaskId: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
24
|
+
apiBaseUrl: ConfigField<z.ZodDefault<z.ZodString>>;
|
|
25
|
+
apiPublicBaseUrl: ConfigField<z.ZodDefault<z.ZodString>>;
|
|
26
|
+
containerPort: ConfigField<any>;
|
|
27
|
+
containerUrl: ConfigField<z.ZodDefault<z.ZodString>>;
|
|
28
|
+
proxyHostname: ConfigField<z.ZodDefault<z.ZodString>>;
|
|
29
|
+
proxyPassword: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
30
|
+
proxyPort: ConfigField<any>;
|
|
31
|
+
proxyStatusUrl: ConfigField<z.ZodDefault<z.ZodString>>;
|
|
32
|
+
/** @deprecated use `containerPort` instead */
|
|
33
|
+
standbyPort: ConfigField<any>;
|
|
34
|
+
standbyUrl: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
35
|
+
isAtHome: ConfigField<any>;
|
|
36
|
+
userId: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
37
|
+
userIsPaying: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
38
|
+
actorPermissionLevel: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
39
|
+
inputSecretsPrivateKeyPassphrase: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
40
|
+
inputSecretsPrivateKeyFile: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
41
|
+
maxTotalChargeUsd: ConfigField<any>;
|
|
42
|
+
metaOrigin: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
43
|
+
testPayPerEvent: ConfigField<any>;
|
|
44
|
+
useChargingLogDataset: ConfigField<any>;
|
|
45
|
+
actorPricingInfo: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
46
|
+
chargedEventCounts: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
47
|
+
actorStoragesJson: ConfigField<z.ZodOptional<z.ZodString>>;
|
|
48
|
+
storageClientOptions: ConfigField<z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
49
|
+
maxUsedCpuRatio: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodNumber>>>;
|
|
50
|
+
systemInfoIntervalMillis: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodNumber>>>;
|
|
51
|
+
logLevel: ConfigField<z.ZodOptional<z.ZodPipe<z.ZodTransform<{} | null | undefined, unknown>, z.ZodEnum<typeof import("@apify/log").LogLevel>>>>;
|
|
52
|
+
persistStorage: ConfigField<z.ZodDefault<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodBoolean>>>;
|
|
53
|
+
containerized: ConfigField<z.ZodOptional<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodBoolean>>>;
|
|
54
|
+
};
|
|
55
|
+
export type ApifyConfigurationInput = FieldsInput<typeof apifyConfigFields>;
|
|
56
|
+
export type ApifyResolvedConfigValues = FieldsOutput<typeof apifyConfigFields>;
|
|
57
|
+
/** @deprecated Use {@link ApifyConfigurationInput} instead. */
|
|
58
|
+
export type ConfigurationOptions = ApifyConfigurationInput;
|
|
59
|
+
export interface Configuration extends ApifyResolvedConfigValues {
|
|
32
60
|
}
|
|
33
61
|
/**
|
|
34
62
|
* `Configuration` is a value object holding the SDK configuration. We can use it in two ways:
|
|
@@ -37,38 +65,34 @@ export interface ConfigurationOptions extends CoreConfigurationOptions {
|
|
|
37
65
|
*
|
|
38
66
|
* ```javascript
|
|
39
67
|
* import { Actor } from 'apify';
|
|
40
|
-
* import { BasicCrawler } from 'crawlee';
|
|
41
68
|
*
|
|
42
69
|
* const sdk = new Actor({ token: '123' });
|
|
43
|
-
* console.log(sdk.config.
|
|
44
|
-
*
|
|
45
|
-
* const crawler = new BasicCrawler({
|
|
46
|
-
* // ... crawler options
|
|
47
|
-
* }, sdk.config);
|
|
70
|
+
* console.log(sdk.config.token); // '123'
|
|
48
71
|
* ```
|
|
49
72
|
*
|
|
50
73
|
* 2. To get the global configuration (singleton instance). It will respect the environment variables.
|
|
51
74
|
*
|
|
52
75
|
* ```javascript
|
|
53
|
-
* import {
|
|
76
|
+
* import { Configuration } from 'apify';
|
|
54
77
|
*
|
|
55
|
-
* // Get the global configuration
|
|
56
78
|
* const config = Configuration.getGlobalConfig();
|
|
57
|
-
*
|
|
58
|
-
*
|
|
59
|
-
* config.set('persistStateIntervalMillis', 30_000);
|
|
60
|
-
*
|
|
61
|
-
* // No need to pass the configuration to the crawler,
|
|
62
|
-
* // as it's using the global configuration by default
|
|
63
|
-
* const crawler = new BasicCrawler();
|
|
79
|
+
* console.log(config.headless);
|
|
80
|
+
* console.log(config.persistStateIntervalMillis);
|
|
64
81
|
* ```
|
|
65
82
|
*
|
|
83
|
+
* Configuration is immutable — values are set via the constructor and cannot be changed afterwards.
|
|
84
|
+
* The priority order for resolving values is (highest to lowest):
|
|
85
|
+
*
|
|
86
|
+
* ```text
|
|
87
|
+
* constructor options > environment variables > crawlee.json > schema defaults
|
|
88
|
+
* ```
|
|
89
|
+
*
|
|
66
90
|
* ## Supported Configuration Options
|
|
67
91
|
*
|
|
68
92
|
* Key | Environment Variable | Default Value
|
|
69
93
|
* ---|---|---
|
|
70
94
|
* `memoryMbytes` | `ACTOR_MEMORY_MBYTES` | -
|
|
71
|
-
* `headless` | `APIFY_HEADLESS` |
|
|
95
|
+
* `headless` | `APIFY_HEADLESS` | `true`
|
|
72
96
|
* `persistStateIntervalMillis` | `APIFY_PERSIST_STATE_INTERVAL_MILLIS` | `60e3`
|
|
73
97
|
* `token` | `APIFY_TOKEN` | -
|
|
74
98
|
* `isAtHome` | `APIFY_IS_AT_HOME` | -
|
|
@@ -102,95 +126,18 @@ export interface ConfigurationOptions extends CoreConfigurationOptions {
|
|
|
102
126
|
* `defaultBrowserPath` | `APIFY_DEFAULT_BROWSER_PATH` | -
|
|
103
127
|
*/
|
|
104
128
|
export declare class Configuration extends CoreConfiguration {
|
|
105
|
-
/** @
|
|
129
|
+
/** @internal */
|
|
106
130
|
static globalConfig?: Configuration;
|
|
107
|
-
protected static
|
|
108
|
-
|
|
109
|
-
APIFY_PURGE_ON_START: string;
|
|
110
|
-
APIFY_MEMORY_MBYTES: string;
|
|
111
|
-
APIFY_DEFAULT_DATASET_ID: string;
|
|
112
|
-
APIFY_DEFAULT_KEY_VALUE_STORE_ID: string;
|
|
113
|
-
APIFY_DEFAULT_REQUEST_QUEUE_ID: string;
|
|
114
|
-
APIFY_INPUT_KEY: string;
|
|
115
|
-
APIFY_PERSIST_STATE_INTERVAL_MILLIS: string;
|
|
116
|
-
APIFY_HEADLESS: string;
|
|
117
|
-
APIFY_XVFB: string;
|
|
118
|
-
APIFY_CHROME_EXECUTABLE_PATH: string;
|
|
119
|
-
APIFY_DEFAULT_BROWSER_PATH: string;
|
|
120
|
-
APIFY_DISABLE_BROWSER_SANDBOX: string;
|
|
121
|
-
APIFY_TOKEN: string;
|
|
122
|
-
APIFY_METAMORPH_AFTER_SLEEP_MILLIS: string;
|
|
123
|
-
APIFY_TEST_PERSIST_INTERVAL_MILLIS: string;
|
|
124
|
-
APIFY_ACTOR_EVENTS_WS_URL: string;
|
|
125
|
-
APIFY_ACTOR_ID: string;
|
|
126
|
-
APIFY_API_BASE_URL: string;
|
|
127
|
-
APIFY_API_PUBLIC_BASE_URL: string;
|
|
128
|
-
APIFY_IS_AT_HOME: string;
|
|
129
|
-
APIFY_ACTOR_RUN_ID: string;
|
|
130
|
-
APIFY_ACTOR_TASK_ID: string;
|
|
131
|
-
APIFY_CONTAINER_PORT: string;
|
|
132
|
-
APIFY_CONTAINER_URL: string;
|
|
133
|
-
APIFY_USER_ID: string;
|
|
134
|
-
APIFY_PROXY_HOSTNAME: string;
|
|
135
|
-
APIFY_PROXY_PASSWORD: string;
|
|
136
|
-
APIFY_PROXY_STATUS_URL: string;
|
|
137
|
-
APIFY_PROXY_PORT: string;
|
|
138
|
-
APIFY_INPUT_SECRETS_PRIVATE_KEY_FILE: string;
|
|
139
|
-
APIFY_INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE: string;
|
|
140
|
-
APIFY_META_ORIGIN: string;
|
|
141
|
-
ACTOR_DEFAULT_DATASET_ID: string;
|
|
142
|
-
ACTOR_DEFAULT_KEY_VALUE_STORE_ID: string;
|
|
143
|
-
ACTOR_DEFAULT_REQUEST_QUEUE_ID: string;
|
|
144
|
-
ACTOR_EVENTS_WEBSOCKET_URL: string;
|
|
145
|
-
ACTOR_ID: string;
|
|
146
|
-
ACTOR_INPUT_KEY: string;
|
|
147
|
-
ACTOR_MEMORY_MBYTES: string;
|
|
148
|
-
ACTOR_RUN_ID: string;
|
|
149
|
-
ACTOR_STANDBY_PORT: string;
|
|
150
|
-
ACTOR_STANDBY_URL: string;
|
|
151
|
-
ACTOR_TASK_ID: string;
|
|
152
|
-
ACTOR_WEB_SERVER_PORT: string;
|
|
153
|
-
ACTOR_WEB_SERVER_URL: string;
|
|
154
|
-
ACTOR_MAX_TOTAL_CHARGE_USD: string;
|
|
155
|
-
ACTOR_TEST_PAY_PER_EVENT: string;
|
|
156
|
-
ACTOR_USE_CHARGING_LOG_DATASET: string;
|
|
157
|
-
};
|
|
158
|
-
protected static INTEGER_VARS: string[];
|
|
159
|
-
protected static BOOLEAN_VARS: string[];
|
|
160
|
-
protected static DEFAULTS: {
|
|
161
|
-
defaultKeyValueStoreId: string;
|
|
162
|
-
defaultDatasetId: string;
|
|
163
|
-
defaultRequestQueueId: string;
|
|
164
|
-
inputKey: string;
|
|
165
|
-
apiBaseUrl: string;
|
|
166
|
-
apiPublicBaseUrl: string;
|
|
167
|
-
proxyStatusUrl: string;
|
|
168
|
-
proxyHostname: string;
|
|
169
|
-
proxyPort: number;
|
|
170
|
-
containerPort: number;
|
|
171
|
-
containerUrl: string;
|
|
172
|
-
standbyPort: number;
|
|
173
|
-
metamorphAfterSleepMillis: number;
|
|
174
|
-
persistStateIntervalMillis: number;
|
|
175
|
-
testPayPerEvent: boolean;
|
|
176
|
-
useChargingLogDataset: boolean;
|
|
177
|
-
};
|
|
178
|
-
/**
|
|
179
|
-
* @inheritDoc
|
|
180
|
-
*/
|
|
181
|
-
get<T extends keyof ConfigurationOptions, U extends ConfigurationOptions[T]>(key: T, defaultValue?: U): U;
|
|
182
|
-
/**
|
|
183
|
-
* @inheritDoc
|
|
184
|
-
*/
|
|
185
|
-
set(key: keyof ConfigurationOptions, value?: any): void;
|
|
131
|
+
protected static fields: Record<string, ConfigField>;
|
|
132
|
+
constructor(options?: ApifyConfigurationInput);
|
|
186
133
|
/**
|
|
187
134
|
* @inheritDoc
|
|
135
|
+
*
|
|
136
|
+
* Returns the SDK's global {@link Configuration} singleton (an
|
|
137
|
+
* Apify-typed default that parses `APIFY_*` env vars). During an Actor run
|
|
138
|
+
* the active configuration is held by crawlee's `serviceLocator`, which is
|
|
139
|
+
* what crawlee internals resolve against; this singleton is only the
|
|
140
|
+
* fallback for code reaching for a configuration without an explicit one.
|
|
188
141
|
*/
|
|
189
142
|
static getGlobalConfig(): Configuration;
|
|
190
|
-
/**
|
|
191
|
-
* Resets global configuration instance. The default instance holds configuration based on env vars,
|
|
192
|
-
* if we want to change them, we need to first reset the global state. Used mainly for testing purposes.
|
|
193
|
-
*/
|
|
194
|
-
static resetGlobalState(): void;
|
|
195
143
|
}
|
|
196
|
-
//# sourceMappingURL=configuration.d.ts.map
|
package/dist/configuration.js
CHANGED
|
@@ -1,5 +1,91 @@
|
|
|
1
|
-
|
|
2
|
-
import {
|
|
1
|
+
/* eslint-disable no-use-before-define */
|
|
2
|
+
import { coerceBoolean, coerceNumber, Configuration as CoreConfiguration, crawleeConfigFields, field, } from '@crawlee/core';
|
|
3
|
+
import { z } from 'zod';
|
|
4
|
+
import { ACTOR_ENV_VARS, APIFY_ENV_VARS, LOCAL_ACTOR_ENV_VARS, LOCAL_APIFY_ENV_VARS } from '@apify/consts';
|
|
5
|
+
// Evaluated lazily (per Configuration construction, not at import) so the
|
|
6
|
+
// at-home defaults for `availableMemoryRatio` / `disableBrowserSandbox` below
|
|
7
|
+
// react to `APIFY_IS_AT_HOME` even if it changes after import (tests, embedding).
|
|
8
|
+
// Reads the env var, not the resolved `isAtHome` field, so an explicit
|
|
9
|
+
// `new Configuration({ isAtHome: true })` won't flip these — env is the source.
|
|
10
|
+
const isAtHome = () => !!process.env[APIFY_ENV_VARS.IS_AT_HOME];
|
|
11
|
+
function withApifyEnv(base, apifyEnvVars, schema) {
|
|
12
|
+
const crawleeVars = base.envVar == null ? [] : [base.envVar].flat();
|
|
13
|
+
return field((schema ?? base.schema), [...[apifyEnvVars].flat(), ...crawleeVars]);
|
|
14
|
+
}
|
|
15
|
+
// --- Apify config field definitions ---
|
|
16
|
+
export const apifyConfigFields = {
|
|
17
|
+
// Inherit all crawlee fields as-is.
|
|
18
|
+
...crawleeConfigFields,
|
|
19
|
+
// Crawlee fields the SDK extends with ACTOR_/APIFY_ env-var aliases (which
|
|
20
|
+
// take precedence; crawlee's own CRAWLEE_* var is reused as the fallback,
|
|
21
|
+
// never re-typed). A schema is passed only where the SDK needs a different
|
|
22
|
+
// default than crawlee's.
|
|
23
|
+
defaultDatasetId: withApifyEnv(crawleeConfigFields.defaultDatasetId, [ACTOR_ENV_VARS.DEFAULT_DATASET_ID, APIFY_ENV_VARS.DEFAULT_DATASET_ID], z.string().default(LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_DATASET_ID])),
|
|
24
|
+
defaultKeyValueStoreId: withApifyEnv(crawleeConfigFields.defaultKeyValueStoreId, [ACTOR_ENV_VARS.DEFAULT_KEY_VALUE_STORE_ID, APIFY_ENV_VARS.DEFAULT_KEY_VALUE_STORE_ID], z.string().default(LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_KEY_VALUE_STORE_ID])),
|
|
25
|
+
defaultRequestQueueId: withApifyEnv(crawleeConfigFields.defaultRequestQueueId, [ACTOR_ENV_VARS.DEFAULT_REQUEST_QUEUE_ID, APIFY_ENV_VARS.DEFAULT_REQUEST_QUEUE_ID], z.string().default(LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_REQUEST_QUEUE_ID])),
|
|
26
|
+
inputKey: withApifyEnv(crawleeConfigFields.inputKey, [ACTOR_ENV_VARS.INPUT_KEY, APIFY_ENV_VARS.INPUT_KEY]),
|
|
27
|
+
memoryMbytes: withApifyEnv(crawleeConfigFields.memoryMbytes, [
|
|
28
|
+
ACTOR_ENV_VARS.MEMORY_MBYTES,
|
|
29
|
+
APIFY_ENV_VARS.MEMORY_MBYTES,
|
|
30
|
+
]),
|
|
31
|
+
availableMemoryRatio: withApifyEnv(crawleeConfigFields.availableMemoryRatio, 'APIFY_AVAILABLE_MEMORY_RATIO', coerceNumber.default(() => (isAtHome() ? 1 : 0.25))),
|
|
32
|
+
disableBrowserSandbox: withApifyEnv(crawleeConfigFields.disableBrowserSandbox, 'APIFY_DISABLE_BROWSER_SANDBOX', coerceBoolean.optional().default(() => (isAtHome() ? true : undefined))),
|
|
33
|
+
persistStateIntervalMillis: withApifyEnv(crawleeConfigFields.persistStateIntervalMillis, [
|
|
34
|
+
APIFY_ENV_VARS.PERSIST_STATE_INTERVAL_MILLIS,
|
|
35
|
+
'APIFY_TEST_PERSIST_INTERVAL_MILLIS',
|
|
36
|
+
]),
|
|
37
|
+
headless: withApifyEnv(crawleeConfigFields.headless, APIFY_ENV_VARS.HEADLESS),
|
|
38
|
+
xvfb: withApifyEnv(crawleeConfigFields.xvfb, APIFY_ENV_VARS.XVFB),
|
|
39
|
+
chromeExecutablePath: withApifyEnv(crawleeConfigFields.chromeExecutablePath, APIFY_ENV_VARS.CHROME_EXECUTABLE_PATH),
|
|
40
|
+
defaultBrowserPath: withApifyEnv(crawleeConfigFields.defaultBrowserPath, 'APIFY_DEFAULT_BROWSER_PATH'),
|
|
41
|
+
purgeOnStart: withApifyEnv(crawleeConfigFields.purgeOnStart, APIFY_ENV_VARS.PURGE_ON_START),
|
|
42
|
+
// Apify-specific fields
|
|
43
|
+
metamorphAfterSleepMillis: field(coerceNumber.default(300_000), APIFY_ENV_VARS.METAMORPH_AFTER_SLEEP_MILLIS),
|
|
44
|
+
actorEventsWsUrl: field(z.string().optional(), [
|
|
45
|
+
ACTOR_ENV_VARS.EVENTS_WEBSOCKET_URL,
|
|
46
|
+
APIFY_ENV_VARS.ACTOR_EVENTS_WS_URL,
|
|
47
|
+
]),
|
|
48
|
+
token: field(z.string().optional(), APIFY_ENV_VARS.TOKEN),
|
|
49
|
+
actorId: field(z.string().optional(), [ACTOR_ENV_VARS.ID, APIFY_ENV_VARS.ACTOR_ID]),
|
|
50
|
+
actorRunId: field(z.string().optional(), [ACTOR_ENV_VARS.RUN_ID, APIFY_ENV_VARS.ACTOR_RUN_ID]),
|
|
51
|
+
actorTaskId: field(z.string().optional(), [ACTOR_ENV_VARS.TASK_ID, APIFY_ENV_VARS.ACTOR_TASK_ID]),
|
|
52
|
+
apiBaseUrl: field(z.string().default('https://api.apify.com'), APIFY_ENV_VARS.API_BASE_URL),
|
|
53
|
+
apiPublicBaseUrl: field(z.string().default('https://api.apify.com'), APIFY_ENV_VARS.API_PUBLIC_BASE_URL),
|
|
54
|
+
containerPort: field(coerceNumber.default(+LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.WEB_SERVER_PORT]), [
|
|
55
|
+
ACTOR_ENV_VARS.WEB_SERVER_PORT,
|
|
56
|
+
APIFY_ENV_VARS.CONTAINER_PORT,
|
|
57
|
+
]),
|
|
58
|
+
containerUrl: field(z.string().default(LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.WEB_SERVER_URL]), [
|
|
59
|
+
ACTOR_ENV_VARS.WEB_SERVER_URL,
|
|
60
|
+
APIFY_ENV_VARS.CONTAINER_URL,
|
|
61
|
+
]),
|
|
62
|
+
proxyHostname: field(z.string().default(LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_HOSTNAME]), APIFY_ENV_VARS.PROXY_HOSTNAME),
|
|
63
|
+
proxyPassword: field(z.string().optional(), APIFY_ENV_VARS.PROXY_PASSWORD),
|
|
64
|
+
proxyPort: field(coerceNumber.default(+LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_PORT]), APIFY_ENV_VARS.PROXY_PORT),
|
|
65
|
+
proxyStatusUrl: field(z.string().default('http://proxy.apify.com'), APIFY_ENV_VARS.PROXY_STATUS_URL),
|
|
66
|
+
/** @deprecated use `containerPort` instead */
|
|
67
|
+
standbyPort: field(coerceNumber.default(+LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.STANDBY_PORT]), ACTOR_ENV_VARS.STANDBY_PORT),
|
|
68
|
+
standbyUrl: field(z.string().optional(), ACTOR_ENV_VARS.STANDBY_URL),
|
|
69
|
+
isAtHome: field(coerceBoolean.default(false), APIFY_ENV_VARS.IS_AT_HOME),
|
|
70
|
+
userId: field(z.string().optional(), APIFY_ENV_VARS.USER_ID),
|
|
71
|
+
userIsPaying: field(z.string().optional(), APIFY_ENV_VARS.USER_IS_PAYING),
|
|
72
|
+
actorPermissionLevel: field(z.string().optional(), ACTOR_ENV_VARS.PERMISSION_LEVEL),
|
|
73
|
+
inputSecretsPrivateKeyPassphrase: field(z.string().optional(), APIFY_ENV_VARS.INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE),
|
|
74
|
+
inputSecretsPrivateKeyFile: field(z.string().optional(), APIFY_ENV_VARS.INPUT_SECRETS_PRIVATE_KEY_FILE),
|
|
75
|
+
// `0` is treated as "no limit" (mirrors the Apify platform contract).
|
|
76
|
+
maxTotalChargeUsd: field(coerceNumber.transform((val) => (val === 0 ? Infinity : val)).default(Infinity), ACTOR_ENV_VARS.MAX_TOTAL_CHARGE_USD),
|
|
77
|
+
metaOrigin: field(z.string().optional(), APIFY_ENV_VARS.META_ORIGIN),
|
|
78
|
+
testPayPerEvent: field(coerceBoolean.default(false), 'ACTOR_TEST_PAY_PER_EVENT'),
|
|
79
|
+
useChargingLogDataset: field(coerceBoolean.default(false), 'ACTOR_USE_CHARGING_LOG_DATASET'),
|
|
80
|
+
// Pay-per-event charging metadata injected by the platform (JSON strings).
|
|
81
|
+
actorPricingInfo: field(z.string().optional(), 'APIFY_ACTOR_PRICING_INFO'),
|
|
82
|
+
chargedEventCounts: field(z.string().optional(), 'APIFY_CHARGED_ACTOR_EVENT_COUNTS'),
|
|
83
|
+
actorStoragesJson: field(z.string().optional(), 'ACTOR_STORAGES_JSON'),
|
|
84
|
+
// Grab-bag of ApifyClient constructor options; the `storageDir` key is
|
|
85
|
+
// pulled out separately for local storage emulation, the rest is spread
|
|
86
|
+
// into `new ApifyClient({...})` in `Actor.newClient()`. No env var alias.
|
|
87
|
+
storageClientOptions: field(z.record(z.string(), z.unknown()).optional()),
|
|
88
|
+
};
|
|
3
89
|
/**
|
|
4
90
|
* `Configuration` is a value object holding the SDK configuration. We can use it in two ways:
|
|
5
91
|
*
|
|
@@ -7,38 +93,34 @@ import { ACTOR_ENV_VARS, APIFY_ENV_VARS, LOCAL_ACTOR_ENV_VARS, LOCAL_APIFY_ENV_V
|
|
|
7
93
|
*
|
|
8
94
|
* ```javascript
|
|
9
95
|
* import { Actor } from 'apify';
|
|
10
|
-
* import { BasicCrawler } from 'crawlee';
|
|
11
96
|
*
|
|
12
97
|
* const sdk = new Actor({ token: '123' });
|
|
13
|
-
* console.log(sdk.config.
|
|
14
|
-
*
|
|
15
|
-
* const crawler = new BasicCrawler({
|
|
16
|
-
* // ... crawler options
|
|
17
|
-
* }, sdk.config);
|
|
98
|
+
* console.log(sdk.config.token); // '123'
|
|
18
99
|
* ```
|
|
19
100
|
*
|
|
20
101
|
* 2. To get the global configuration (singleton instance). It will respect the environment variables.
|
|
21
102
|
*
|
|
22
103
|
* ```javascript
|
|
23
|
-
* import {
|
|
104
|
+
* import { Configuration } from 'apify';
|
|
24
105
|
*
|
|
25
|
-
* // Get the global configuration
|
|
26
106
|
* const config = Configuration.getGlobalConfig();
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
* config.set('persistStateIntervalMillis', 30_000);
|
|
30
|
-
*
|
|
31
|
-
* // No need to pass the configuration to the crawler,
|
|
32
|
-
* // as it's using the global configuration by default
|
|
33
|
-
* const crawler = new BasicCrawler();
|
|
107
|
+
* console.log(config.headless);
|
|
108
|
+
* console.log(config.persistStateIntervalMillis);
|
|
34
109
|
* ```
|
|
35
110
|
*
|
|
111
|
+
* Configuration is immutable — values are set via the constructor and cannot be changed afterwards.
|
|
112
|
+
* The priority order for resolving values is (highest to lowest):
|
|
113
|
+
*
|
|
114
|
+
* ```text
|
|
115
|
+
* constructor options > environment variables > crawlee.json > schema defaults
|
|
116
|
+
* ```
|
|
117
|
+
*
|
|
36
118
|
* ## Supported Configuration Options
|
|
37
119
|
*
|
|
38
120
|
* Key | Environment Variable | Default Value
|
|
39
121
|
* ---|---|---
|
|
40
122
|
* `memoryMbytes` | `ACTOR_MEMORY_MBYTES` | -
|
|
41
|
-
* `headless` | `APIFY_HEADLESS` |
|
|
123
|
+
* `headless` | `APIFY_HEADLESS` | `true`
|
|
42
124
|
* `persistStateIntervalMillis` | `APIFY_PERSIST_STATE_INTERVAL_MILLIS` | `60e3`
|
|
43
125
|
* `token` | `APIFY_TOKEN` | -
|
|
44
126
|
* `isAtHome` | `APIFY_IS_AT_HOME` | -
|
|
@@ -71,137 +153,28 @@ import { ACTOR_ENV_VARS, APIFY_ENV_VARS, LOCAL_ACTOR_ENV_VARS, LOCAL_APIFY_ENV_V
|
|
|
71
153
|
* `chromeExecutablePath` | `APIFY_CHROME_EXECUTABLE_PATH` | -
|
|
72
154
|
* `defaultBrowserPath` | `APIFY_DEFAULT_BROWSER_PATH` | -
|
|
73
155
|
*/
|
|
156
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-declaration-merging
|
|
74
157
|
export class Configuration extends CoreConfiguration {
|
|
75
|
-
/** @
|
|
76
|
-
// eslint-disable-next-line no-use-before-define -- Self-reference
|
|
158
|
+
/** @internal */
|
|
77
159
|
static globalConfig;
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
//
|
|
81
|
-
|
|
82
|
-
//
|
|
83
|
-
|
|
84
|
-
APIFY_PURGE_ON_START: 'purgeOnStart',
|
|
85
|
-
APIFY_MEMORY_MBYTES: 'memoryMbytes',
|
|
86
|
-
APIFY_DEFAULT_DATASET_ID: 'defaultDatasetId',
|
|
87
|
-
APIFY_DEFAULT_KEY_VALUE_STORE_ID: 'defaultKeyValueStoreId',
|
|
88
|
-
APIFY_DEFAULT_REQUEST_QUEUE_ID: 'defaultRequestQueueId',
|
|
89
|
-
APIFY_INPUT_KEY: 'inputKey',
|
|
90
|
-
APIFY_PERSIST_STATE_INTERVAL_MILLIS: 'persistStateIntervalMillis',
|
|
91
|
-
APIFY_HEADLESS: 'headless',
|
|
92
|
-
APIFY_XVFB: 'xvfb',
|
|
93
|
-
APIFY_CHROME_EXECUTABLE_PATH: 'chromeExecutablePath',
|
|
94
|
-
APIFY_DEFAULT_BROWSER_PATH: 'defaultBrowserPath',
|
|
95
|
-
APIFY_DISABLE_BROWSER_SANDBOX: 'disableBrowserSandbox',
|
|
96
|
-
// as well as apify specific ones
|
|
97
|
-
APIFY_TOKEN: 'token',
|
|
98
|
-
APIFY_METAMORPH_AFTER_SLEEP_MILLIS: 'metamorphAfterSleepMillis',
|
|
99
|
-
APIFY_TEST_PERSIST_INTERVAL_MILLIS: 'persistStateIntervalMillis', // for BC, seems to be unused
|
|
100
|
-
APIFY_ACTOR_EVENTS_WS_URL: 'actorEventsWsUrl',
|
|
101
|
-
APIFY_ACTOR_ID: 'actorId',
|
|
102
|
-
APIFY_API_BASE_URL: 'apiBaseUrl',
|
|
103
|
-
APIFY_API_PUBLIC_BASE_URL: 'apiPublicBaseUrl',
|
|
104
|
-
APIFY_IS_AT_HOME: 'isAtHome',
|
|
105
|
-
APIFY_ACTOR_RUN_ID: 'actorRunId',
|
|
106
|
-
APIFY_ACTOR_TASK_ID: 'actorTaskId',
|
|
107
|
-
APIFY_CONTAINER_PORT: 'containerPort',
|
|
108
|
-
APIFY_CONTAINER_URL: 'containerUrl',
|
|
109
|
-
APIFY_USER_ID: 'userId',
|
|
110
|
-
APIFY_PROXY_HOSTNAME: 'proxyHostname',
|
|
111
|
-
APIFY_PROXY_PASSWORD: 'proxyPassword',
|
|
112
|
-
APIFY_PROXY_STATUS_URL: 'proxyStatusUrl',
|
|
113
|
-
APIFY_PROXY_PORT: 'proxyPort',
|
|
114
|
-
APIFY_INPUT_SECRETS_PRIVATE_KEY_FILE: 'inputSecretsPrivateKeyFile',
|
|
115
|
-
APIFY_INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE: 'inputSecretsPrivateKeyPassphrase',
|
|
116
|
-
APIFY_META_ORIGIN: 'metaOrigin',
|
|
117
|
-
// Actor env vars
|
|
118
|
-
ACTOR_DEFAULT_DATASET_ID: 'defaultDatasetId',
|
|
119
|
-
ACTOR_DEFAULT_KEY_VALUE_STORE_ID: 'defaultKeyValueStoreId',
|
|
120
|
-
ACTOR_DEFAULT_REQUEST_QUEUE_ID: 'defaultRequestQueueId',
|
|
121
|
-
ACTOR_EVENTS_WEBSOCKET_URL: 'actorEventsWsUrl',
|
|
122
|
-
ACTOR_ID: 'actorId',
|
|
123
|
-
ACTOR_INPUT_KEY: 'inputKey',
|
|
124
|
-
ACTOR_MEMORY_MBYTES: 'memoryMbytes',
|
|
125
|
-
ACTOR_RUN_ID: 'actorRunId',
|
|
126
|
-
ACTOR_STANDBY_PORT: 'standbyPort',
|
|
127
|
-
ACTOR_STANDBY_URL: 'standbyUrl',
|
|
128
|
-
ACTOR_TASK_ID: 'actorTaskId',
|
|
129
|
-
ACTOR_WEB_SERVER_PORT: 'containerPort',
|
|
130
|
-
ACTOR_WEB_SERVER_URL: 'containerUrl',
|
|
131
|
-
ACTOR_MAX_TOTAL_CHARGE_USD: 'maxTotalChargeUsd',
|
|
132
|
-
ACTOR_TEST_PAY_PER_EVENT: 'testPayPerEvent',
|
|
133
|
-
ACTOR_USE_CHARGING_LOG_DATASET: 'useChargingLogDataset',
|
|
134
|
-
};
|
|
135
|
-
static INTEGER_VARS = [
|
|
136
|
-
...CoreConfiguration.INTEGER_VARS,
|
|
137
|
-
'proxyPort',
|
|
138
|
-
'containerPort',
|
|
139
|
-
'metamorphAfterSleepMillis',
|
|
140
|
-
'maxTotalChargeUsd',
|
|
141
|
-
];
|
|
142
|
-
static BOOLEAN_VARS = [
|
|
143
|
-
...CoreConfiguration.BOOLEAN_VARS,
|
|
144
|
-
'isAtHome',
|
|
145
|
-
'testPayPerEvent',
|
|
146
|
-
'useChargingLogDataset',
|
|
147
|
-
];
|
|
148
|
-
static DEFAULTS = {
|
|
149
|
-
...CoreConfiguration.DEFAULTS,
|
|
150
|
-
defaultKeyValueStoreId: LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_KEY_VALUE_STORE_ID],
|
|
151
|
-
defaultDatasetId: LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_DATASET_ID],
|
|
152
|
-
defaultRequestQueueId: LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_REQUEST_QUEUE_ID],
|
|
153
|
-
inputKey: 'INPUT',
|
|
154
|
-
apiBaseUrl: 'https://api.apify.com',
|
|
155
|
-
apiPublicBaseUrl: 'https://api.apify.com',
|
|
156
|
-
proxyStatusUrl: 'http://proxy.apify.com',
|
|
157
|
-
proxyHostname: LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_HOSTNAME],
|
|
158
|
-
proxyPort: +LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_PORT],
|
|
159
|
-
containerPort: +LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.WEB_SERVER_PORT],
|
|
160
|
-
containerUrl: LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.WEB_SERVER_URL],
|
|
161
|
-
standbyPort: +LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.STANDBY_PORT],
|
|
162
|
-
metamorphAfterSleepMillis: 300e3,
|
|
163
|
-
persistStateIntervalMillis: 60e3, // This value is mentioned in jsdoc in `events.js`, if you update it here, update it there too.
|
|
164
|
-
testPayPerEvent: false,
|
|
165
|
-
useChargingLogDataset: false,
|
|
166
|
-
};
|
|
167
|
-
/**
|
|
168
|
-
* @inheritDoc
|
|
169
|
-
*/
|
|
170
|
-
get(key, defaultValue) {
|
|
171
|
-
return super.get(key, defaultValue);
|
|
172
|
-
}
|
|
173
|
-
/**
|
|
174
|
-
* @inheritDoc
|
|
175
|
-
*/
|
|
176
|
-
set(key, value) {
|
|
177
|
-
super.set(key, value);
|
|
160
|
+
static fields = apifyConfigFields;
|
|
161
|
+
constructor(options = {}) {
|
|
162
|
+
// `super` types its options against crawlee's field set; ours is a
|
|
163
|
+
// superset (apifyConfigFields spreads crawleeConfigFields), so the
|
|
164
|
+
// shapes are runtime-compatible but not TS-assignable — hence the cast.
|
|
165
|
+
super(options);
|
|
178
166
|
}
|
|
179
167
|
/**
|
|
180
168
|
* @inheritDoc
|
|
169
|
+
*
|
|
170
|
+
* Returns the SDK's global {@link Configuration} singleton (an
|
|
171
|
+
* Apify-typed default that parses `APIFY_*` env vars). During an Actor run
|
|
172
|
+
* the active configuration is held by crawlee's `serviceLocator`, which is
|
|
173
|
+
* what crawlee internals resolve against; this singleton is only the
|
|
174
|
+
* fallback for code reaching for a configuration without an explicit one.
|
|
181
175
|
*/
|
|
182
176
|
static getGlobalConfig() {
|
|
183
|
-
if (Configuration.storage.getStore()) {
|
|
184
|
-
return Configuration.storage.getStore();
|
|
185
|
-
}
|
|
186
177
|
Configuration.globalConfig ??= new Configuration();
|
|
187
178
|
return Configuration.globalConfig;
|
|
188
179
|
}
|
|
189
|
-
/**
|
|
190
|
-
* Resets global configuration instance. The default instance holds configuration based on env vars,
|
|
191
|
-
* if we want to change them, we need to first reset the global state. Used mainly for testing purposes.
|
|
192
|
-
*/
|
|
193
|
-
static resetGlobalState() {
|
|
194
|
-
delete this.globalConfig;
|
|
195
|
-
}
|
|
196
180
|
}
|
|
197
|
-
// monkey patch the core class so it respects the new options too
|
|
198
|
-
CoreConfiguration.getGlobalConfig = Configuration.getGlobalConfig;
|
|
199
|
-
// @ts-expect-error protected property
|
|
200
|
-
CoreConfiguration.ENV_MAP = Configuration.ENV_MAP;
|
|
201
|
-
// @ts-expect-error protected property
|
|
202
|
-
CoreConfiguration.INTEGER_VARS = Configuration.INTEGER_VARS;
|
|
203
|
-
// @ts-expect-error protected property
|
|
204
|
-
CoreConfiguration.BOOLEAN_VARS = Configuration.BOOLEAN_VARS;
|
|
205
|
-
// @ts-expect-error protected property
|
|
206
|
-
CoreConfiguration.DEFAULTS = Configuration.DEFAULTS;
|
|
207
|
-
//# sourceMappingURL=configuration.js.map
|
package/dist/index.d.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
export * from './actor.js';
|
|
2
|
-
export
|
|
2
|
+
export type { OpenStorageOptions, StorageAlias, StorageId, StorageName, StorageIdentifier, StorageIdentifierWithoutAlias, } from './storage.js';
|
|
3
|
+
export { ChargeOptions, ChargeResult, ActorPricingInfo, ChargingManager } from './charging.js';
|
|
3
4
|
export * from './configuration.js';
|
|
4
5
|
export * from './proxy_configuration.js';
|
|
5
6
|
export * from './platform_event_manager.js';
|
|
6
7
|
export * from './key_value_store.js';
|
|
7
8
|
export { Dataset, DatasetDataOptions, DatasetIteratorOptions, DatasetConsumer, DatasetMapper, DatasetReducer, DatasetOptions, DatasetContent, RequestQueue, QueueOperationInfo, RequestQueueOperationOptions, RequestQueueOptions, KeyConsumer, KeyValueStoreOptions, RecordOptions, KeyValueStoreIteratorOptions, log, Log, LoggerOptions, LogLevel, Logger, LoggerJson, LoggerText, } from '@crawlee/core';
|
|
8
9
|
export { ApifyClient, ApifyClientOptions } from 'apify-client';
|
|
9
|
-
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.js
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
export * from './actor.js';
|
|
2
|
-
export
|
|
2
|
+
export { ChargingManager } from './charging.js';
|
|
3
3
|
export * from './configuration.js';
|
|
4
4
|
export * from './proxy_configuration.js';
|
|
5
5
|
export * from './platform_event_manager.js';
|
|
6
6
|
export * from './key_value_store.js';
|
|
7
7
|
export { Dataset, RequestQueue, log, Log, LogLevel, Logger, LoggerJson, LoggerText, } from '@crawlee/core';
|
|
8
8
|
export { ApifyClient } from 'apify-client';
|
|
9
|
-
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { Dictionary } from '@crawlee/utils';
|
|
2
|
+
/**
|
|
3
|
+
* @ignore
|
|
4
|
+
*/
|
|
5
|
+
export declare const noActorInputSchemaDefinedMarker: unique symbol;
|
|
6
|
+
export declare const readInputSchema: () => Dictionary | null | typeof noActorInputSchemaDefinedMarker;
|
|
7
|
+
export declare const getDefaultsFromInputSchema: (inputSchema: any) => Record<string, unknown>;
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
// TODO: https://github.com/apify/apify-shared-js/issues/547
|
|
2
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import process from 'node:process';
|
|
5
|
+
// These paths are used *if* there is no `input` field in the actor.json configuration file!
|
|
6
|
+
const DEFAULT_INPUT_SCHEMA_PATHS = [
|
|
7
|
+
['.actor', 'INPUT_SCHEMA.json'],
|
|
8
|
+
['INPUT_SCHEMA.json'],
|
|
9
|
+
['.actor', 'input_schema.json'],
|
|
10
|
+
['input_schema.json'],
|
|
11
|
+
];
|
|
12
|
+
const ACTOR_SPECIFICATION_FOLDER = '.actor';
|
|
13
|
+
const LOCAL_CONFIG_NAME = 'actor.json';
|
|
14
|
+
const readJSONIfExists = (path) => {
|
|
15
|
+
if (existsSync(path)) {
|
|
16
|
+
const content = readFileSync(path, 'utf8');
|
|
17
|
+
return JSON.parse(content);
|
|
18
|
+
}
|
|
19
|
+
return null;
|
|
20
|
+
};
|
|
21
|
+
/**
|
|
22
|
+
* @ignore
|
|
23
|
+
*/
|
|
24
|
+
export const noActorInputSchemaDefinedMarker = Symbol.for('apify.noActorInputSchemaDefined');
|
|
25
|
+
export const readInputSchema = () => {
|
|
26
|
+
const localConfig = readJSONIfExists(join(process.cwd(), ACTOR_SPECIFICATION_FOLDER, LOCAL_CONFIG_NAME));
|
|
27
|
+
// Input schema nested in the actor config
|
|
28
|
+
if (typeof localConfig?.input === 'object') {
|
|
29
|
+
return localConfig.input;
|
|
30
|
+
}
|
|
31
|
+
// Input schema path from the actor config
|
|
32
|
+
if (typeof localConfig?.input === 'string') {
|
|
33
|
+
const fullPath = join(process.cwd(), ACTOR_SPECIFICATION_FOLDER, localConfig.input);
|
|
34
|
+
return readJSONIfExists(fullPath);
|
|
35
|
+
}
|
|
36
|
+
// Try to find it from possible default paths
|
|
37
|
+
for (const path of DEFAULT_INPUT_SCHEMA_PATHS) {
|
|
38
|
+
const fullPath = join(process.cwd(), ...path);
|
|
39
|
+
const result = readJSONIfExists(fullPath);
|
|
40
|
+
if (result) {
|
|
41
|
+
return result;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
// If we are in an Actor context, BUT we do not have an input schema defined, we want to skip the warning
|
|
45
|
+
if (!localConfig?.input) {
|
|
46
|
+
return noActorInputSchemaDefinedMarker;
|
|
47
|
+
}
|
|
48
|
+
return null;
|
|
49
|
+
};
|
|
50
|
+
export const getDefaultsFromInputSchema = (inputSchema) => {
|
|
51
|
+
const defaults = {};
|
|
52
|
+
for (const [key, fieldSchema] of Object.entries(inputSchema.properties)) {
|
|
53
|
+
if (fieldSchema.default !== undefined) {
|
|
54
|
+
defaults[key] = fieldSchema.default;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return defaults;
|
|
58
|
+
};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { StorageOpenOptions } from '@crawlee/core';
|
|
2
2
|
import { KeyValueStore as CoreKeyValueStore } from '@crawlee/core';
|
|
3
3
|
/**
|
|
4
4
|
* @inheritDoc
|
|
@@ -7,11 +7,15 @@ export declare class KeyValueStore extends CoreKeyValueStore {
|
|
|
7
7
|
/**
|
|
8
8
|
* Returns a URL for the given key that may be used to publicly
|
|
9
9
|
* access the value in the remote key-value store.
|
|
10
|
+
*
|
|
11
|
+
* On the Apify platform the URL is signed with the store's
|
|
12
|
+
* `urlSigningSecretKey` so that anyone with the URL can read the record
|
|
13
|
+
* without authentication. Locally we delegate to crawlee's default
|
|
14
|
+
* implementation (which produces a `file://` URL or returns `undefined`).
|
|
10
15
|
*/
|
|
11
|
-
getPublicUrl(key: string): string
|
|
16
|
+
getPublicUrl(key: string): Promise<string | undefined>;
|
|
12
17
|
/**
|
|
13
18
|
* @inheritDoc
|
|
14
19
|
*/
|
|
15
|
-
static open(storeIdOrName?: string | null, options?:
|
|
20
|
+
static open(storeIdOrName?: string | null, options?: StorageOpenOptions): Promise<KeyValueStore>;
|
|
16
21
|
}
|
|
17
|
-
//# sourceMappingURL=key_value_store.d.ts.map
|