apify 3.1.1 → 3.1.2-beta.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +20 -0
- package/.turbo/turbo-copy.log +4 -0
- package/dist/LICENSE.md +201 -0
- package/dist/README.md +98 -0
- package/{actor.d.ts → dist/actor.d.ts} +1 -1
- package/{actor.d.ts.map → dist/actor.d.ts.map} +1 -1
- package/{actor.js → dist/actor.js} +0 -0
- package/{actor.js.map → dist/actor.js.map} +0 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +9 -0
- package/dist/cli.js.map +1 -0
- package/{configuration.d.ts → dist/configuration.d.ts} +0 -0
- package/{configuration.d.ts.map → dist/configuration.d.ts.map} +0 -0
- package/{configuration.js → dist/configuration.js} +0 -0
- package/{configuration.js.map → dist/configuration.js.map} +0 -0
- package/{index.d.ts → dist/index.d.ts} +0 -0
- package/{index.d.ts.map → dist/index.d.ts.map} +0 -0
- package/{index.js → dist/index.js} +0 -0
- package/{index.js.map → dist/index.js.map} +0 -0
- package/{index.mjs → dist/index.mjs} +0 -0
- package/{key_value_store.d.ts → dist/key_value_store.d.ts} +0 -0
- package/{key_value_store.d.ts.map → dist/key_value_store.d.ts.map} +0 -0
- package/{key_value_store.js → dist/key_value_store.js} +0 -0
- package/{key_value_store.js.map → dist/key_value_store.js.map} +0 -0
- package/dist/package.json +76 -0
- package/{platform_event_manager.d.ts → dist/platform_event_manager.d.ts} +0 -0
- package/{platform_event_manager.d.ts.map → dist/platform_event_manager.d.ts.map} +0 -0
- package/{platform_event_manager.js → dist/platform_event_manager.js} +0 -0
- package/{platform_event_manager.js.map → dist/platform_event_manager.js.map} +0 -0
- package/{proxy_configuration.d.ts → dist/proxy_configuration.d.ts} +0 -0
- package/{proxy_configuration.d.ts.map → dist/proxy_configuration.d.ts.map} +0 -0
- package/{proxy_configuration.js → dist/proxy_configuration.js} +0 -0
- package/{proxy_configuration.js.map → dist/proxy_configuration.js.map} +0 -0
- package/{utils.d.ts → dist/utils.d.ts} +0 -0
- package/{utils.d.ts.map → dist/utils.d.ts.map} +0 -0
- package/{utils.js → dist/utils.js} +0 -0
- package/{utils.js.map → dist/utils.js.map} +0 -0
- package/package.json +14 -9
- package/src/actor.ts +1614 -0
- package/src/cli.ts +9 -0
- package/src/configuration.ts +202 -0
- package/src/index.ts +11 -0
- package/src/key_value_store.ts +25 -0
- package/src/platform_event_manager.ts +118 -0
- package/src/proxy_configuration.ts +395 -0
- package/src/utils.ts +38 -0
- package/tsconfig.build.tsbuildinfo +0 -1
package/src/cli.ts
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
import { ENV_VARS, LOCAL_ENV_VARS } from '@apify/consts';
|
|
2
|
+
import { Configuration as CoreConfiguration } from '@crawlee/core';
|
|
3
|
+
import type { ConfigurationOptions as CoreConfigurationOptions } from '@crawlee/core';
|
|
4
|
+
|
|
5
|
+
export interface ConfigurationOptions extends CoreConfigurationOptions {
|
|
6
|
+
metamorphAfterSleepMillis?: number;
|
|
7
|
+
actorEventsWsUrl?: string;
|
|
8
|
+
token?: string;
|
|
9
|
+
actorId?: string;
|
|
10
|
+
actorRunId?: string;
|
|
11
|
+
actorTaskId?: string;
|
|
12
|
+
apiBaseUrl?: string;
|
|
13
|
+
containerPort?: number;
|
|
14
|
+
containerUrl?: string;
|
|
15
|
+
proxyHostname?: string;
|
|
16
|
+
proxyPassword?: string;
|
|
17
|
+
proxyPort?: number;
|
|
18
|
+
proxyStatusUrl?: string;
|
|
19
|
+
isAtHome?: boolean;
|
|
20
|
+
userId?: string;
|
|
21
|
+
inputSecretsPrivateKeyPassphrase?: string;
|
|
22
|
+
inputSecretsPrivateKeyFile?: string;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* `Configuration` is a value object holding the SDK configuration. We can use it in two ways:
|
|
27
|
+
*
|
|
28
|
+
* 1. When using `Actor` class, we can get the instance configuration via `sdk.config`
|
|
29
|
+
*
|
|
30
|
+
* ```javascript
|
|
31
|
+
* import { Actor } from 'apify';
|
|
32
|
+
* import { BasicCrawler } from 'crawlee';
|
|
33
|
+
*
|
|
34
|
+
* const sdk = new Actor({ token: '123' });
|
|
35
|
+
* console.log(sdk.config.get('token')); // '123'
|
|
36
|
+
*
|
|
37
|
+
* const crawler = new BasicCrawler({
|
|
38
|
+
* // ... crawler options
|
|
39
|
+
* }, sdk.config);
|
|
40
|
+
* ```
|
|
41
|
+
*
|
|
42
|
+
* 2. To get the global configuration (singleton instance). It will respect the environment variables.
|
|
43
|
+
*
|
|
44
|
+
* ```javascript
|
|
45
|
+
* import { BasicCrawler, Configuration } from 'crawlee';
|
|
46
|
+
*
|
|
47
|
+
* // Get the global configuration
|
|
48
|
+
* const config = Configuration.getGlobalConfig();
|
|
49
|
+
* // Set the 'persistStateIntervalMillis' option
|
|
50
|
+
* // of global configuration to 30 seconds
|
|
51
|
+
* config.set('persistStateIntervalMillis', 30_000);
|
|
52
|
+
*
|
|
53
|
+
* // No need to pass the configuration to the crawler,
|
|
54
|
+
* // as it's using the global configuration by default
|
|
55
|
+
* const crawler = new BasicCrawler();
|
|
56
|
+
* ```
|
|
57
|
+
*
|
|
58
|
+
* ## Supported Configuration Options
|
|
59
|
+
*
|
|
60
|
+
* Key | Environment Variable | Default Value
|
|
61
|
+
* ---|---|---
|
|
62
|
+
* `memoryMbytes` | `APIFY_MEMORY_MBYTES` | -
|
|
63
|
+
* `headless` | `APIFY_HEADLESS` | -
|
|
64
|
+
* `persistStateIntervalMillis` | `APIFY_PERSIST_STATE_INTERVAL_MILLIS` | `60e3`
|
|
65
|
+
* `token` | `APIFY_TOKEN` | -
|
|
66
|
+
* `isAtHome` | `APIFY_IS_AT_HOME` | -
|
|
67
|
+
* `defaultDatasetId` | `APIFY_DEFAULT_DATASET_ID` | `'default'`
|
|
68
|
+
* `defaultKeyValueStoreId` | `APIFY_DEFAULT_KEY_VALUE_STORE_ID` | `'default'`
|
|
69
|
+
* `defaultRequestQueueId` | `APIFY_DEFAULT_REQUEST_QUEUE_ID` | `'default'`
|
|
70
|
+
*
|
|
71
|
+
* ## Advanced Configuration Options
|
|
72
|
+
*
|
|
73
|
+
* Key | Environment Variable | Default Value
|
|
74
|
+
* ---|---|---
|
|
75
|
+
* `actorEventsWsUrl` | `APIFY_ACTOR_EVENTS_WS_URL` | -
|
|
76
|
+
* `actorId` | `APIFY_ACTOR_ID` | -
|
|
77
|
+
* `actorRunId` | `APIFY_ACTOR_RUN_ID` | -
|
|
78
|
+
* `actorTaskId` | `APIFY_ACTOR_TASK_ID` | -
|
|
79
|
+
* `apiBaseUrl` | `APIFY_API_BASE_URL` | `'https://api.apify.com'`
|
|
80
|
+
* `containerPort` | `APIFY_CONTAINER_PORT` | `4321`
|
|
81
|
+
* `containerUrl` | `APIFY_CONTAINER_URL` | `'http://localhost:4321'`
|
|
82
|
+
* `inputKey` | `APIFY_INPUT_KEY` | `'INPUT'`
|
|
83
|
+
* `metamorphAfterSleepMillis` | `APIFY_METAMORPH_AFTER_SLEEP_MILLIS` | `300e3`
|
|
84
|
+
* `proxyHostname` | `APIFY_PROXY_HOSTNAME` | `'proxy.apify.com'`
|
|
85
|
+
* `proxyPassword` | `APIFY_PROXY_PASSWORD` | -
|
|
86
|
+
* `proxyPort` | `APIFY_PROXY_PORT` | `8000`
|
|
87
|
+
* `proxyStatusUrl` | `APIFY_PROXY_STATUS_URL` | `'http://proxy.apify.com'`
|
|
88
|
+
* `userId` | `APIFY_USER_ID` | -
|
|
89
|
+
* `xvfb` | `APIFY_XVFB` | -
|
|
90
|
+
* `chromeExecutablePath` | `APIFY_CHROME_EXECUTABLE_PATH` | -
|
|
91
|
+
* `defaultBrowserPath` | `APIFY_DEFAULT_BROWSER_PATH` | -
|
|
92
|
+
*/
|
|
93
|
+
export class Configuration extends CoreConfiguration {
|
|
94
|
+
/** @inheritDoc */
|
|
95
|
+
static override globalConfig?: Configuration;
|
|
96
|
+
|
|
97
|
+
// maps environment variables to config keys (e.g. `APIFY_MEMORY_MBYTES` to `memoryMbytes`)
|
|
98
|
+
protected static override ENV_MAP = {
|
|
99
|
+
// regular crawlee env vars are also supported
|
|
100
|
+
...super.ENV_MAP,
|
|
101
|
+
|
|
102
|
+
// support crawlee env vars prefixed with `APIFY_` too
|
|
103
|
+
APIFY_AVAILABLE_MEMORY_RATIO: 'availableMemoryRatio',
|
|
104
|
+
APIFY_PURGE_ON_START: 'purgeOnStart',
|
|
105
|
+
APIFY_MEMORY_MBYTES: 'memoryMbytes',
|
|
106
|
+
APIFY_DEFAULT_DATASET_ID: 'defaultDatasetId',
|
|
107
|
+
APIFY_DEFAULT_KEY_VALUE_STORE_ID: 'defaultKeyValueStoreId',
|
|
108
|
+
APIFY_DEFAULT_REQUEST_QUEUE_ID: 'defaultRequestQueueId',
|
|
109
|
+
APIFY_INPUT_KEY: 'inputKey',
|
|
110
|
+
APIFY_PERSIST_STATE_INTERVAL_MILLIS: 'persistStateIntervalMillis',
|
|
111
|
+
APIFY_HEADLESS: 'headless',
|
|
112
|
+
APIFY_XVFB: 'xvfb',
|
|
113
|
+
APIFY_CHROME_EXECUTABLE_PATH: 'chromeExecutablePath',
|
|
114
|
+
APIFY_DEFAULT_BROWSER_PATH: 'defaultBrowserPath',
|
|
115
|
+
APIFY_DISABLE_BROWSER_SANDBOX: 'disableBrowserSandbox',
|
|
116
|
+
|
|
117
|
+
// as well as apify specific ones
|
|
118
|
+
APIFY_TOKEN: 'token',
|
|
119
|
+
APIFY_METAMORPH_AFTER_SLEEP_MILLIS: 'metamorphAfterSleepMillis',
|
|
120
|
+
APIFY_TEST_PERSIST_INTERVAL_MILLIS: 'persistStateIntervalMillis', // for BC, seems to be unused
|
|
121
|
+
APIFY_ACTOR_EVENTS_WS_URL: 'actorEventsWsUrl',
|
|
122
|
+
APIFY_ACTOR_ID: 'actorId',
|
|
123
|
+
APIFY_API_BASE_URL: 'apiBaseUrl',
|
|
124
|
+
APIFY_IS_AT_HOME: 'isAtHome',
|
|
125
|
+
APIFY_ACTOR_RUN_ID: 'actorRunId',
|
|
126
|
+
APIFY_ACTOR_TASK_ID: 'actorTaskId',
|
|
127
|
+
APIFY_CONTAINER_PORT: 'containerPort',
|
|
128
|
+
APIFY_CONTAINER_URL: 'containerUrl',
|
|
129
|
+
APIFY_USER_ID: 'userId',
|
|
130
|
+
APIFY_PROXY_HOSTNAME: 'proxyHostname',
|
|
131
|
+
APIFY_PROXY_PASSWORD: 'proxyPassword',
|
|
132
|
+
APIFY_PROXY_STATUS_URL: 'proxyStatusUrl',
|
|
133
|
+
APIFY_PROXY_PORT: 'proxyPort',
|
|
134
|
+
APIFY_INPUT_SECRETS_PRIVATE_KEY_FILE: 'inputSecretsPrivateKeyFile',
|
|
135
|
+
APIFY_INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE: 'inputSecretsPrivateKeyPassphrase',
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
protected static override INTEGER_VARS = [...super.INTEGER_VARS, 'proxyPort', 'containerPort', 'metamorphAfterSleepMillis'];
|
|
139
|
+
|
|
140
|
+
protected static override BOOLEAN_VARS = [...super.BOOLEAN_VARS, 'isAtHome'];
|
|
141
|
+
|
|
142
|
+
protected static override DEFAULTS = {
|
|
143
|
+
...super.DEFAULTS,
|
|
144
|
+
defaultKeyValueStoreId: LOCAL_ENV_VARS[ENV_VARS.DEFAULT_KEY_VALUE_STORE_ID],
|
|
145
|
+
defaultDatasetId: LOCAL_ENV_VARS[ENV_VARS.DEFAULT_DATASET_ID],
|
|
146
|
+
defaultRequestQueueId: LOCAL_ENV_VARS[ENV_VARS.DEFAULT_REQUEST_QUEUE_ID],
|
|
147
|
+
inputKey: 'INPUT',
|
|
148
|
+
apiBaseUrl: 'https://api.apify.com',
|
|
149
|
+
proxyStatusUrl: 'http://proxy.apify.com',
|
|
150
|
+
proxyHostname: LOCAL_ENV_VARS[ENV_VARS.PROXY_HOSTNAME],
|
|
151
|
+
proxyPort: +LOCAL_ENV_VARS[ENV_VARS.PROXY_PORT],
|
|
152
|
+
containerPort: +LOCAL_ENV_VARS[ENV_VARS.CONTAINER_PORT],
|
|
153
|
+
containerUrl: LOCAL_ENV_VARS[ENV_VARS.CONTAINER_URL],
|
|
154
|
+
metamorphAfterSleepMillis: 300e3,
|
|
155
|
+
persistStateIntervalMillis: 60e3, // This value is mentioned in jsdoc in `events.js`, if you update it here, update it there too.
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* @inheritDoc
|
|
160
|
+
*/
|
|
161
|
+
override get<T extends keyof ConfigurationOptions, U extends ConfigurationOptions[T]>(key: T, defaultValue?: U): U {
|
|
162
|
+
return super.get(key as keyof CoreConfigurationOptions, defaultValue);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* @inheritDoc
|
|
167
|
+
*/
|
|
168
|
+
override set(key: keyof ConfigurationOptions, value?: any) {
|
|
169
|
+
super.set(key as keyof CoreConfigurationOptions, value);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* @inheritDoc
|
|
174
|
+
*/
|
|
175
|
+
static override getGlobalConfig(): Configuration {
|
|
176
|
+
if (Configuration.storage.getStore()) {
|
|
177
|
+
return Configuration.storage.getStore() as Configuration;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
Configuration.globalConfig ??= new Configuration();
|
|
181
|
+
return Configuration.globalConfig as Configuration;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Resets global configuration instance. The default instance holds configuration based on env vars,
|
|
186
|
+
* if we want to change them, we need to first reset the global state. Used mainly for testing purposes.
|
|
187
|
+
*/
|
|
188
|
+
static override resetGlobalState(): void {
|
|
189
|
+
delete this.globalConfig;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// monkey patch the core class so it respects the new options too
|
|
194
|
+
CoreConfiguration.getGlobalConfig = Configuration.getGlobalConfig;
|
|
195
|
+
// @ts-expect-error protected property
|
|
196
|
+
CoreConfiguration.ENV_MAP = Configuration.ENV_MAP;
|
|
197
|
+
// @ts-expect-error protected property
|
|
198
|
+
CoreConfiguration.INTEGER_VARS = Configuration.INTEGER_VARS;
|
|
199
|
+
// @ts-expect-error protected property
|
|
200
|
+
CoreConfiguration.BOOLEAN_VARS = Configuration.BOOLEAN_VARS;
|
|
201
|
+
// @ts-expect-error protected property
|
|
202
|
+
CoreConfiguration.DEFAULTS = Configuration.DEFAULTS;
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export * from './actor';
|
|
2
|
+
export * from './configuration';
|
|
3
|
+
export * from './proxy_configuration';
|
|
4
|
+
export * from './platform_event_manager';
|
|
5
|
+
export * from './key_value_store';
|
|
6
|
+
export {
|
|
7
|
+
Dataset, DatasetDataOptions, DatasetIteratorOptions, DatasetConsumer, DatasetMapper, DatasetReducer, DatasetOptions, DatasetContent,
|
|
8
|
+
RequestQueue, QueueOperationInfo, RequestQueueOperationOptions, RequestQueueOptions, QueueOperationInfoOptions,
|
|
9
|
+
KeyConsumer, KeyValueStoreOptions, RecordOptions, KeyValueStoreIteratorOptions, log, Log, LoggerOptions, LogLevel, Logger, LoggerJson, LoggerText,
|
|
10
|
+
} from '@crawlee/core';
|
|
11
|
+
export { ApifyClient, ApifyClientOptions } from 'apify-client';
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { StorageManagerOptions } from '@crawlee/core';
|
|
2
|
+
import { KeyValueStore as CoreKeyValueStore } from '@crawlee/core';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* @inheritDoc
|
|
6
|
+
*/
|
|
7
|
+
export class KeyValueStore extends CoreKeyValueStore {
|
|
8
|
+
/**
|
|
9
|
+
* Returns a URL for the given key that may be used to publicly
|
|
10
|
+
* access the value in the remote key-value store.
|
|
11
|
+
*/
|
|
12
|
+
getPublicUrl(key: string): string {
|
|
13
|
+
return `https://api.apify.com/v2/key-value-stores/${this.id}/records/${key}`;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* @inheritDoc
|
|
18
|
+
*/
|
|
19
|
+
static override async open(storeIdOrName?: string | null, options: StorageManagerOptions = {}): Promise<KeyValueStore> {
|
|
20
|
+
return super.open(storeIdOrName, options) as unknown as KeyValueStore;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// @ts-expect-error extension of the core class to make this only a type-issue
|
|
25
|
+
CoreKeyValueStore.prototype.getPublicUrl = KeyValueStore.prototype.getPublicUrl;
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { ACTOR_EVENT_NAMES, ENV_VARS } from '@apify/consts';
|
|
2
|
+
import WebSocket from 'ws';
|
|
3
|
+
import { EventType, EventManager } from '@crawlee/core';
|
|
4
|
+
import { betterClearInterval } from '@apify/utilities';
|
|
5
|
+
import { Configuration } from './configuration';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Gets an instance of a Node.js'
|
|
9
|
+
* [EventEmitter](https://nodejs.org/api/events.html#events_class_eventemitter)
|
|
10
|
+
* class that emits various events from the SDK or the Apify platform.
|
|
11
|
+
* The event emitter is initialized by calling the {@apilink Actor.main} function.
|
|
12
|
+
*
|
|
13
|
+
* **Example usage:**
|
|
14
|
+
*
|
|
15
|
+
* ```javascript
|
|
16
|
+
* Actor.on('cpuInfo', (data) => {
|
|
17
|
+
* if (data.isCpuOverloaded) console.log('Oh no, the CPU is overloaded!');
|
|
18
|
+
* });
|
|
19
|
+
* ```
|
|
20
|
+
*
|
|
21
|
+
* The following events are emitted:
|
|
22
|
+
*
|
|
23
|
+
* - `cpuInfo`: `{ "isCpuOverloaded": Boolean }`
|
|
24
|
+
* The event is emitted approximately every second
|
|
25
|
+
* and it indicates whether the actor is using the maximum of available CPU resources.
|
|
26
|
+
* If that's the case, the actor should not add more workload.
|
|
27
|
+
* For example, this event is used by the {@apilink AutoscaledPool} class.
|
|
28
|
+
* - `migrating`: `void`
|
|
29
|
+
* Emitted when the actor running on the Apify platform is going to be migrated to another worker server soon.
|
|
30
|
+
* You can use it to persist the state of the actor and abort the run, to speed up migration.
|
|
31
|
+
* For example, this is used by the {@apilink RequestList} class.
|
|
32
|
+
* - `aborting`: `void`
|
|
33
|
+
* When a user aborts an actor run on the Apify platform, they can choose to abort gracefully to allow
|
|
34
|
+
* the actor some time before getting killed. This graceful abort emits the `aborting` event which the SDK
|
|
35
|
+
* uses to gracefully stop running crawls and you can use it to do your own cleanup as well.
|
|
36
|
+
* - `persistState`: `{ "isMigrating": Boolean }`
|
|
37
|
+
* Emitted in regular intervals (by default 60 seconds) to notify all components of Apify SDK that it is time to persist
|
|
38
|
+
* their state, in order to avoid repeating all work when the actor restarts.
|
|
39
|
+
* This event is automatically emitted together with the `migrating` event,
|
|
40
|
+
* in which case the `isMigrating` flag is set to `true`. Otherwise the flag is `false`.
|
|
41
|
+
* Note that the `persistState` event is provided merely for user convenience,
|
|
42
|
+
* you can achieve the same effect using `setInterval()` and listening for the `migrating` event.
|
|
43
|
+
*/
|
|
44
|
+
export class PlatformEventManager extends EventManager {
|
|
45
|
+
/** Websocket connection to actor events. */
|
|
46
|
+
private eventsWs?: WebSocket;
|
|
47
|
+
|
|
48
|
+
constructor(override readonly config = Configuration.getGlobalConfig()) {
|
|
49
|
+
super();
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Initializes `Actor.events` event emitter by creating a connection to a websocket that provides them.
|
|
54
|
+
* This is an internal function that is automatically called by `Actor.main()`.
|
|
55
|
+
*/
|
|
56
|
+
override async init() {
|
|
57
|
+
if (this.initialized) {
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
await super.init();
|
|
62
|
+
const eventsWsUrl = this.config.get('actorEventsWsUrl');
|
|
63
|
+
|
|
64
|
+
// Locally there is no web socket to connect, so just print a log message.
|
|
65
|
+
if (!eventsWsUrl) {
|
|
66
|
+
this.log.debug(`Environment variable ${ENV_VARS.ACTOR_EVENTS_WS_URL} is not set, no events from Apify platform will be emitted.`);
|
|
67
|
+
return;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
this.createWebSocketConnection(eventsWsUrl);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
private createWebSocketConnection(eventsWsUrl: string) {
|
|
74
|
+
this.eventsWs = new WebSocket(eventsWsUrl);
|
|
75
|
+
this.eventsWs.on('message', (message) => {
|
|
76
|
+
if (!message) return;
|
|
77
|
+
|
|
78
|
+
try {
|
|
79
|
+
const {
|
|
80
|
+
name,
|
|
81
|
+
data,
|
|
82
|
+
} = JSON.parse(String(message));
|
|
83
|
+
this.events.emit(name, data);
|
|
84
|
+
|
|
85
|
+
if (name === ACTOR_EVENT_NAMES.MIGRATING) {
|
|
86
|
+
betterClearInterval(this.intervals.persistState!); // Don't send any other persist state event.
|
|
87
|
+
this.events.emit(EventType.PERSIST_STATE, { isMigrating: true });
|
|
88
|
+
}
|
|
89
|
+
} catch (err) {
|
|
90
|
+
this.log.exception(err as Error, 'Cannot parse actor event');
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
this.eventsWs.on('error', (err) => {
|
|
94
|
+
// Don't print this error as this happens in the case of very short Actor.main().
|
|
95
|
+
if (err.message === 'WebSocket was closed before the connection was established') return;
|
|
96
|
+
|
|
97
|
+
this.log.exception(err, 'web socket connection failed');
|
|
98
|
+
});
|
|
99
|
+
this.eventsWs.on('close', () => {
|
|
100
|
+
this.log.debug('web socket has been closed');
|
|
101
|
+
this.eventsWs = undefined;
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Closes websocket providing events from Actor infrastructure and also stops sending internal events
|
|
107
|
+
* of Apify package such as `persistState`.
|
|
108
|
+
* This is automatically called at the end of `Actor.main()`.
|
|
109
|
+
*/
|
|
110
|
+
override async close() {
|
|
111
|
+
if (!this.initialized) {
|
|
112
|
+
return;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
await super.close();
|
|
116
|
+
this.eventsWs?.close();
|
|
117
|
+
}
|
|
118
|
+
}
|