apify 3.7.3-beta.9 → 4.0.0-beta.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/actor.d.ts +19 -4
- package/dist/actor.js +207 -239
- package/dist/apify_storage_client.d.ts +54 -0
- package/dist/apify_storage_client.js +152 -0
- package/dist/charging.js +45 -122
- package/dist/configuration.d.ts +79 -141
- package/dist/configuration.js +117 -171
- package/dist/index.js +8 -22
- package/dist/input-schemas.js +12 -18
- package/dist/key_value_store.d.ts +8 -3
- package/dist/key_value_store.js +22 -21
- package/dist/platform_event_manager.d.ts +0 -5
- package/dist/platform_event_manager.js +18 -34
- package/dist/proxy_configuration.d.ts +26 -55
- package/dist/proxy_configuration.js +80 -174
- package/dist/storage.d.ts +6 -4
- package/dist/storage.js +17 -17
- package/dist/utils.d.ts +5 -0
- package/dist/utils.js +39 -23
- package/package.json +16 -15
- package/dist/index.mjs +0 -19
- package/dist/patched_apify_client.d.ts +0 -25
- package/dist/patched_apify_client.js +0 -70
package/dist/actor.js
CHANGED
|
@@ -1,31 +1,28 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
const proxy_configuration_js_1 = require("./proxy_configuration.js");
|
|
20
|
-
const storage_js_1 = require("./storage.js");
|
|
21
|
-
const utils_js_1 = require("./utils.js");
|
|
1
|
+
import { createPrivateKey } from 'node:crypto';
|
|
2
|
+
import { Dataset, purgeDefaultStorages, RequestQueue, serviceLocator } from '@crawlee/core';
|
|
3
|
+
import { sleep, snakeCaseToCamelCase } from '@crawlee/utils';
|
|
4
|
+
import { ApifyClient } from 'apify-client';
|
|
5
|
+
import ow from 'ow';
|
|
6
|
+
import { ACTOR_ENV_VARS, ACTOR_EVENT_NAMES, APIFY_ENV_VARS, INTEGER_ENV_VARS, } from '@apify/consts';
|
|
7
|
+
import { decryptInputSecrets } from '@apify/input_secrets';
|
|
8
|
+
import log from '@apify/log';
|
|
9
|
+
import { addTimeoutToPromise } from '@apify/timeout';
|
|
10
|
+
import { ApifyStorageClient, pushDataChargingContext, USES_PUSH_DATA_INTERCEPTION, } from './apify_storage_client.js';
|
|
11
|
+
import { ChargingManager, pushDataAndCharge } from './charging.js';
|
|
12
|
+
import { Configuration } from './configuration.js';
|
|
13
|
+
import { getDefaultsFromInputSchema, noActorInputSchemaDefinedMarker, readInputSchema } from './input-schemas.js';
|
|
14
|
+
import { KeyValueStore } from './key_value_store.js';
|
|
15
|
+
import { PlatformEventManager } from './platform_event_manager.js';
|
|
16
|
+
import { ProxyConfiguration } from './proxy_configuration.js';
|
|
17
|
+
import { openStorage } from './storage.js';
|
|
18
|
+
import { checkCrawleeVersion, getSystemInfo, printOutdatedSdkWarning } from './utils.js';
|
|
22
19
|
/**
|
|
23
20
|
* Exit codes for the Actor process.
|
|
24
21
|
* The error codes must be in the range 1-128, to avoid collision with signal exits
|
|
25
22
|
* and to ensure Docker will handle them correctly!
|
|
26
23
|
* @internal should be removed if we decide to remove `Actor.main()`
|
|
27
24
|
*/
|
|
28
|
-
|
|
25
|
+
export const EXIT_CODES = {
|
|
29
26
|
SUCCESS: 0,
|
|
30
27
|
ERROR_USER_FUNCTION_THREW: 91,
|
|
31
28
|
ERROR_UNKNOWN: 92,
|
|
@@ -35,106 +32,77 @@ exports.EXIT_CODES = {
|
|
|
35
32
|
* that will be used on the instance methods. Environment variables will have precedence over this configuration.
|
|
36
33
|
* See {@link Configuration} for details about what can be configured and what are the default values.
|
|
37
34
|
*/
|
|
38
|
-
class Actor {
|
|
35
|
+
export class Actor {
|
|
36
|
+
/** @internal */
|
|
37
|
+
static _instance;
|
|
38
|
+
/**
|
|
39
|
+
* Configuration of this SDK instance (provided to its constructor). See {@link Configuration} for details.
|
|
40
|
+
* @internal
|
|
41
|
+
*/
|
|
42
|
+
config;
|
|
43
|
+
/**
|
|
44
|
+
* Default {@link ApifyClient} instance.
|
|
45
|
+
* @internal
|
|
46
|
+
*/
|
|
47
|
+
apifyClient;
|
|
48
|
+
/**
|
|
49
|
+
* Default {@link EventManager} instance.
|
|
50
|
+
* @internal
|
|
51
|
+
*/
|
|
52
|
+
eventManager;
|
|
53
|
+
/**
|
|
54
|
+
* Whether the Actor instance was initialized. This is set by calling {@link Actor.init}.
|
|
55
|
+
*/
|
|
56
|
+
initialized = false;
|
|
57
|
+
/**
|
|
58
|
+
* Set if the Actor called a method that requires the instance to be initialized, but did not do so.
|
|
59
|
+
* A call to `init` after this warning is emitted is considered an invalid state and will throw an error.
|
|
60
|
+
*/
|
|
61
|
+
warnedAboutMissingInitCall = false;
|
|
62
|
+
/**
|
|
63
|
+
* Set if the Actor is currently rebooting.
|
|
64
|
+
*/
|
|
65
|
+
isRebooting = false;
|
|
66
|
+
/**
|
|
67
|
+
* Set if the Actor is currently exiting. Prevents double-exit from graceful shutdown handlers.
|
|
68
|
+
*/
|
|
69
|
+
isExiting = false;
|
|
70
|
+
/**
|
|
71
|
+
* References to graceful shutdown handlers so they can be removed during cleanup.
|
|
72
|
+
*/
|
|
73
|
+
gracefulShutdownHandlers = {};
|
|
74
|
+
chargingManager;
|
|
75
|
+
/**
|
|
76
|
+
* Tracks which aliased storages have been purged during this session,
|
|
77
|
+
* so we only purge them once (on first open) when running locally.
|
|
78
|
+
* @internal
|
|
79
|
+
*/
|
|
80
|
+
purgedStorageAliases = new Set();
|
|
39
81
|
constructor(options = {}) {
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
Object.
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
* Default {@link EventManager} instance.
|
|
62
|
-
* @internal
|
|
63
|
-
*/
|
|
64
|
-
Object.defineProperty(this, "eventManager", {
|
|
65
|
-
enumerable: true,
|
|
66
|
-
configurable: true,
|
|
67
|
-
writable: true,
|
|
68
|
-
value: void 0
|
|
69
|
-
});
|
|
70
|
-
/**
|
|
71
|
-
* Whether the Actor instance was initialized. This is set by calling {@link Actor.init}.
|
|
72
|
-
*/
|
|
73
|
-
Object.defineProperty(this, "initialized", {
|
|
74
|
-
enumerable: true,
|
|
75
|
-
configurable: true,
|
|
76
|
-
writable: true,
|
|
77
|
-
value: false
|
|
78
|
-
});
|
|
79
|
-
/**
|
|
80
|
-
* Set if the Actor called a method that requires the instance to be initialized, but did not do so.
|
|
81
|
-
* A call to `init` after this warning is emitted is considered an invalid state and will throw an error.
|
|
82
|
-
*/
|
|
83
|
-
Object.defineProperty(this, "warnedAboutMissingInitCall", {
|
|
84
|
-
enumerable: true,
|
|
85
|
-
configurable: true,
|
|
86
|
-
writable: true,
|
|
87
|
-
value: false
|
|
88
|
-
});
|
|
89
|
-
/**
|
|
90
|
-
* Set if the Actor is currently rebooting.
|
|
91
|
-
*/
|
|
92
|
-
Object.defineProperty(this, "isRebooting", {
|
|
93
|
-
enumerable: true,
|
|
94
|
-
configurable: true,
|
|
95
|
-
writable: true,
|
|
96
|
-
value: false
|
|
97
|
-
});
|
|
98
|
-
/**
|
|
99
|
-
* Set if the Actor is currently exiting. Prevents double-exit from graceful shutdown handlers.
|
|
100
|
-
*/
|
|
101
|
-
Object.defineProperty(this, "isExiting", {
|
|
102
|
-
enumerable: true,
|
|
103
|
-
configurable: true,
|
|
104
|
-
writable: true,
|
|
105
|
-
value: false
|
|
106
|
-
});
|
|
107
|
-
/**
|
|
108
|
-
* References to graceful shutdown handlers so they can be removed during cleanup.
|
|
109
|
-
*/
|
|
110
|
-
Object.defineProperty(this, "gracefulShutdownHandlers", {
|
|
111
|
-
enumerable: true,
|
|
112
|
-
configurable: true,
|
|
113
|
-
writable: true,
|
|
114
|
-
value: {}
|
|
115
|
-
});
|
|
116
|
-
Object.defineProperty(this, "chargingManager", {
|
|
117
|
-
enumerable: true,
|
|
118
|
-
configurable: true,
|
|
119
|
-
writable: true,
|
|
120
|
-
value: void 0
|
|
121
|
-
});
|
|
122
|
-
/**
|
|
123
|
-
* Tracks which aliased storages have been purged during this session,
|
|
124
|
-
* so we only purge them once (on first open) when running locally.
|
|
125
|
-
* @internal
|
|
126
|
-
*/
|
|
127
|
-
Object.defineProperty(this, "purgedStorageAliases", {
|
|
128
|
-
enumerable: true,
|
|
129
|
-
configurable: true,
|
|
130
|
-
writable: true,
|
|
131
|
-
value: new Set()
|
|
132
|
-
});
|
|
133
|
-
// use default configuration object if nothing overridden (it fallbacks to env vars)
|
|
134
|
-
this.config = Object.keys(options).length === 0 ? configuration_js_1.Configuration.getGlobalConfig() : new configuration_js_1.Configuration(options);
|
|
82
|
+
const { configuration, ...configOptions } = options;
|
|
83
|
+
if (configuration) {
|
|
84
|
+
// BYO Configuration takes precedence; field-level overrides are
|
|
85
|
+
// ignored to keep the contract unambiguous. It must be the SDK's
|
|
86
|
+
// Configuration subclass, not a bare crawlee one — env-var
|
|
87
|
+
// resolution is driven by the subclass's `static fields`
|
|
88
|
+
// (`apifyConfigFields`) at construction, so a crawlee instance
|
|
89
|
+
// would silently expose none of the `APIFY_*`/`ACTOR_*` values.
|
|
90
|
+
if (!(configuration instanceof Configuration)) {
|
|
91
|
+
throw new Error('Actor `configuration` must be an Apify SDK Configuration (imported from `apify`), ' +
|
|
92
|
+
'not a crawlee Configuration, otherwise APIFY_*/ACTOR_* environment variables are not resolved.');
|
|
93
|
+
}
|
|
94
|
+
this.config = configuration;
|
|
95
|
+
}
|
|
96
|
+
else if (Object.keys(configOptions).length === 0) {
|
|
97
|
+
// use default configuration object if nothing overridden (it fallbacks to env vars)
|
|
98
|
+
this.config = Configuration.getGlobalConfig();
|
|
99
|
+
}
|
|
100
|
+
else {
|
|
101
|
+
this.config = new Configuration(configOptions);
|
|
102
|
+
}
|
|
135
103
|
this.apifyClient = this.newClient();
|
|
136
|
-
this.eventManager = new
|
|
137
|
-
this.chargingManager = new
|
|
104
|
+
this.eventManager = new PlatformEventManager(this.config);
|
|
105
|
+
this.chargingManager = new ChargingManager(this.config, this.apifyClient);
|
|
138
106
|
}
|
|
139
107
|
/**
|
|
140
108
|
* Runs the main user function that performs the job of the Actor
|
|
@@ -210,9 +178,9 @@ class Actor {
|
|
|
210
178
|
await this.exit(options);
|
|
211
179
|
}
|
|
212
180
|
catch (err) {
|
|
213
|
-
|
|
181
|
+
log.exception(err, err.message);
|
|
214
182
|
await this.exit({
|
|
215
|
-
exitCode:
|
|
183
|
+
exitCode: EXIT_CODES.ERROR_USER_FUNCTION_THREW,
|
|
216
184
|
});
|
|
217
185
|
}
|
|
218
186
|
return ret;
|
|
@@ -223,7 +191,7 @@ class Actor {
|
|
|
223
191
|
*/
|
|
224
192
|
async init(options = {}) {
|
|
225
193
|
if (this.initialized) {
|
|
226
|
-
|
|
194
|
+
log.debug(`Actor SDK was already initialized`);
|
|
227
195
|
return;
|
|
228
196
|
}
|
|
229
197
|
// If the warning about forgotten init call was emitted, we will not continue the init procedure.
|
|
@@ -234,22 +202,23 @@ class Actor {
|
|
|
234
202
|
].join('\n'));
|
|
235
203
|
}
|
|
236
204
|
this.initialized = true;
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
205
|
+
checkCrawleeVersion();
|
|
206
|
+
log.info('System info', getSystemInfo());
|
|
207
|
+
printOutdatedSdkWarning();
|
|
208
|
+
// Register this Actor's config as the global one so crawlee storages and
|
|
209
|
+
// the event manager resolve the same instance (`availableMemoryRatio` /
|
|
210
|
+
// `disableBrowserSandbox` at-home defaults now live in `Configuration`).
|
|
211
|
+
serviceLocator.setConfiguration(this.config);
|
|
241
212
|
if (this.isAtHome()) {
|
|
242
|
-
this.config
|
|
243
|
-
this.
|
|
244
|
-
this.config.useStorageClient(this.apifyClient);
|
|
245
|
-
this.config.useEventManager(this.eventManager);
|
|
213
|
+
serviceLocator.setStorageClient(new ApifyStorageClient(this.apifyClient, this.config, () => this.chargingManager));
|
|
214
|
+
serviceLocator.setEventManager(this.eventManager);
|
|
246
215
|
}
|
|
247
216
|
else if (options.storage) {
|
|
248
|
-
|
|
217
|
+
serviceLocator.setStorageClient(options.storage);
|
|
249
218
|
}
|
|
250
219
|
// Init the event manager the config uses
|
|
251
|
-
await
|
|
252
|
-
|
|
220
|
+
await serviceLocator.getEventManager().init();
|
|
221
|
+
log.debug(`Events initialized`);
|
|
253
222
|
// Register handlers for aborting and migrating events for automatic graceful shutdown.
|
|
254
223
|
// - aborting: calls Actor.exit() to terminate the run gracefully
|
|
255
224
|
// - migrating: calls Actor.reboot() to speed up migration (the run continues on a new worker)
|
|
@@ -259,28 +228,27 @@ class Actor {
|
|
|
259
228
|
this.gracefulShutdownHandlers.aborting = () => {
|
|
260
229
|
setTimeout(() => {
|
|
261
230
|
this.exit().catch((err) => {
|
|
262
|
-
|
|
231
|
+
log.exception(err, 'Failed to exit gracefully');
|
|
263
232
|
});
|
|
264
233
|
}, delay);
|
|
265
234
|
};
|
|
266
|
-
this.on(
|
|
235
|
+
this.on(ACTOR_EVENT_NAMES.ABORTING, this.gracefulShutdownHandlers.aborting);
|
|
267
236
|
this.gracefulShutdownHandlers.migrating = () => {
|
|
268
237
|
setTimeout(() => {
|
|
269
238
|
this.reboot().catch((err) => {
|
|
270
|
-
|
|
239
|
+
log.exception(err, 'Failed to reboot on migration');
|
|
271
240
|
});
|
|
272
241
|
}, delay);
|
|
273
242
|
};
|
|
274
|
-
this.on(
|
|
243
|
+
this.on(ACTOR_EVENT_NAMES.MIGRATING, this.gracefulShutdownHandlers.migrating);
|
|
275
244
|
}
|
|
276
|
-
await
|
|
245
|
+
await purgeDefaultStorages({
|
|
277
246
|
config: this.config,
|
|
278
247
|
onlyPurgeOnce: true,
|
|
279
248
|
});
|
|
280
|
-
|
|
281
|
-
configuration_js_1.Configuration.storage.enterWith(this.config);
|
|
249
|
+
log.debug(`Default storages purged`);
|
|
282
250
|
await this.chargingManager.init();
|
|
283
|
-
|
|
251
|
+
log.debug(`ChargingManager initialized`, this.chargingManager.getPricingInfo());
|
|
284
252
|
}
|
|
285
253
|
/**
|
|
286
254
|
* @ignore
|
|
@@ -288,7 +256,7 @@ class Actor {
|
|
|
288
256
|
async exit(messageOrOptions, options = {}) {
|
|
289
257
|
// Prevent double-exit from graceful shutdown handlers
|
|
290
258
|
if (this.isExiting) {
|
|
291
|
-
|
|
259
|
+
log.debug('Actor.exit() called while already exiting, skipping');
|
|
292
260
|
return;
|
|
293
261
|
}
|
|
294
262
|
this.isExiting = true;
|
|
@@ -296,26 +264,26 @@ class Actor {
|
|
|
296
264
|
typeof messageOrOptions === 'string'
|
|
297
265
|
? { ...options, statusMessage: messageOrOptions }
|
|
298
266
|
: { ...messageOrOptions, ...options };
|
|
299
|
-
options.exit
|
|
300
|
-
options.exitCode
|
|
301
|
-
options.timeoutSecs
|
|
267
|
+
options.exit ??= true;
|
|
268
|
+
options.exitCode ??= EXIT_CODES.SUCCESS;
|
|
269
|
+
options.timeoutSecs ??= 30;
|
|
302
270
|
this._ensureActorInit('exit');
|
|
303
|
-
const client =
|
|
304
|
-
const events =
|
|
271
|
+
const client = serviceLocator.getStorageClient();
|
|
272
|
+
const events = serviceLocator.getEventManager();
|
|
305
273
|
// Remove graceful shutdown handlers to prevent them from interfering with exit
|
|
306
274
|
if (this.gracefulShutdownHandlers.aborting) {
|
|
307
|
-
this.off(
|
|
275
|
+
this.off(ACTOR_EVENT_NAMES.ABORTING, this.gracefulShutdownHandlers.aborting);
|
|
308
276
|
}
|
|
309
277
|
if (this.gracefulShutdownHandlers.migrating) {
|
|
310
|
-
this.off(
|
|
278
|
+
this.off(ACTOR_EVENT_NAMES.MIGRATING, this.gracefulShutdownHandlers.migrating);
|
|
311
279
|
}
|
|
312
280
|
// Close the event manager and emit the final PERSIST_STATE event
|
|
313
281
|
await events.close();
|
|
314
|
-
|
|
282
|
+
log.debug(`Events closed`);
|
|
315
283
|
// Emit the exit event
|
|
316
284
|
events.emit("exit" /* EventType.EXIT */, options);
|
|
317
285
|
// Wait for all event listeners to be processed
|
|
318
|
-
|
|
286
|
+
log.debug(`Waiting for all event listeners to complete their execution (with ${options.timeoutSecs} seconds timeout)`);
|
|
319
287
|
if (options.exit) {
|
|
320
288
|
// `addTimeoutToPromise` is a cooperative timeout. This ensures that the process exits
|
|
321
289
|
// after the timeout, even if the event listeners don't trigger the timeout.
|
|
@@ -323,13 +291,13 @@ class Actor {
|
|
|
323
291
|
process.exit(options.exitCode);
|
|
324
292
|
}, options.timeoutSecs * 1000);
|
|
325
293
|
}
|
|
326
|
-
await
|
|
294
|
+
await addTimeoutToPromise(async () => {
|
|
327
295
|
await events.waitForAllListenersToComplete();
|
|
328
296
|
if (client.teardown) {
|
|
329
297
|
let finished = false;
|
|
330
298
|
setTimeout(() => {
|
|
331
299
|
if (!finished) {
|
|
332
|
-
|
|
300
|
+
log.info('Waiting for the storage to write its state to file system.');
|
|
333
301
|
}
|
|
334
302
|
}, 1000);
|
|
335
303
|
await client.teardown();
|
|
@@ -341,7 +309,7 @@ class Actor {
|
|
|
341
309
|
level: options.exitCode > 0 ? 'ERROR' : 'INFO',
|
|
342
310
|
});
|
|
343
311
|
// Waiting 1ms is enough for the network request to be sent. We don't need to wait for the response.
|
|
344
|
-
await Promise.race([statusMessagePromise,
|
|
312
|
+
await Promise.race([statusMessagePromise, sleep(1)]);
|
|
345
313
|
}
|
|
346
314
|
}, options.timeoutSecs * 1000, `Waiting for all event listeners to complete their execution timed out after ${options.timeoutSecs} seconds`).catch(() => {
|
|
347
315
|
if (options.exit) {
|
|
@@ -366,13 +334,13 @@ class Actor {
|
|
|
366
334
|
* @ignore
|
|
367
335
|
*/
|
|
368
336
|
on(event, listener) {
|
|
369
|
-
|
|
337
|
+
serviceLocator.getEventManager().on(event, listener);
|
|
370
338
|
}
|
|
371
339
|
/**
|
|
372
340
|
* @ignore
|
|
373
341
|
*/
|
|
374
342
|
off(event, listener) {
|
|
375
|
-
|
|
343
|
+
serviceLocator.getEventManager().off(event, listener);
|
|
376
344
|
}
|
|
377
345
|
/**
|
|
378
346
|
* Runs an Actor on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable).
|
|
@@ -507,14 +475,14 @@ class Actor {
|
|
|
507
475
|
*/
|
|
508
476
|
async metamorph(targetActorId, input, options = {}) {
|
|
509
477
|
if (!this.isAtHome()) {
|
|
510
|
-
|
|
478
|
+
log.warning('Actor.metamorph() is only supported when running on the Apify platform.');
|
|
511
479
|
return;
|
|
512
480
|
}
|
|
513
|
-
const { customAfterSleepMillis = this.config.
|
|
514
|
-
const runId = this.config.
|
|
481
|
+
const { customAfterSleepMillis = this.config.metamorphAfterSleepMillis, ...metamorphOpts } = options;
|
|
482
|
+
const runId = this.config.actorRunId;
|
|
515
483
|
await this.apifyClient.run(runId).metamorph(targetActorId, input, metamorphOpts);
|
|
516
484
|
// Wait some time for container to be stopped.
|
|
517
|
-
await
|
|
485
|
+
await sleep(customAfterSleepMillis);
|
|
518
486
|
}
|
|
519
487
|
/**
|
|
520
488
|
* Internally reboots this Actor. The system stops the current container and starts
|
|
@@ -526,32 +494,32 @@ class Actor {
|
|
|
526
494
|
async reboot(options = {}) {
|
|
527
495
|
this._ensureActorInit('reboot');
|
|
528
496
|
if (!this.isAtHome()) {
|
|
529
|
-
|
|
497
|
+
log.warning('Actor.reboot() is only supported when running on the Apify platform.');
|
|
530
498
|
return;
|
|
531
499
|
}
|
|
532
500
|
if (this.isRebooting) {
|
|
533
|
-
|
|
501
|
+
log.debug('Actor is already rebooting, skipping the additional reboot call.');
|
|
534
502
|
return;
|
|
535
503
|
}
|
|
536
504
|
this.isRebooting = true;
|
|
537
505
|
// Waiting for all the listeners to finish, as `.reboot()` kills the container.
|
|
538
506
|
await Promise.all([
|
|
539
507
|
// `persistState` for individual RequestLists, RequestQueue... instances to be persisted
|
|
540
|
-
...
|
|
508
|
+
...serviceLocator
|
|
541
509
|
.getEventManager()
|
|
542
510
|
.listeners("persistState" /* EventType.PERSIST_STATE */)
|
|
543
511
|
.map(async (x) => x({})),
|
|
544
512
|
// `migrating` to pause Apify crawlers
|
|
545
|
-
...
|
|
513
|
+
...serviceLocator
|
|
546
514
|
.getEventManager()
|
|
547
515
|
.listeners("migrating" /* EventType.MIGRATING */)
|
|
548
516
|
.map(async (x) => x({})),
|
|
549
517
|
]);
|
|
550
|
-
const runId = this.config.
|
|
518
|
+
const runId = this.config.actorRunId;
|
|
551
519
|
await this.apifyClient.run(runId).reboot();
|
|
552
520
|
// Wait some time for container to be stopped.
|
|
553
|
-
const { customAfterSleepMillis = this.config.
|
|
554
|
-
await
|
|
521
|
+
const { customAfterSleepMillis = this.config.metamorphAfterSleepMillis } = options;
|
|
522
|
+
await sleep(customAfterSleepMillis);
|
|
555
523
|
}
|
|
556
524
|
/**
|
|
557
525
|
* Creates an ad-hoc webhook for the current Actor run, which lets you receive a notification when the Actor run finished or failed.
|
|
@@ -566,25 +534,25 @@ class Actor {
|
|
|
566
534
|
* @ignore
|
|
567
535
|
*/
|
|
568
536
|
async addWebhook(options) {
|
|
569
|
-
(
|
|
570
|
-
eventTypes:
|
|
571
|
-
requestUrl:
|
|
572
|
-
payloadTemplate:
|
|
573
|
-
idempotencyKey:
|
|
574
|
-
headersTemplate:
|
|
575
|
-
description:
|
|
576
|
-
ignoreSslErrors:
|
|
577
|
-
doNotRetry:
|
|
578
|
-
shouldInterpolateStrings:
|
|
579
|
-
isApifyIntegration:
|
|
537
|
+
ow(options, ow.object.exactShape({
|
|
538
|
+
eventTypes: ow.array.ofType(ow.string),
|
|
539
|
+
requestUrl: ow.string,
|
|
540
|
+
payloadTemplate: ow.optional.string,
|
|
541
|
+
idempotencyKey: ow.optional.string,
|
|
542
|
+
headersTemplate: ow.optional.string,
|
|
543
|
+
description: ow.optional.string,
|
|
544
|
+
ignoreSslErrors: ow.optional.boolean,
|
|
545
|
+
doNotRetry: ow.optional.boolean,
|
|
546
|
+
shouldInterpolateStrings: ow.optional.boolean,
|
|
547
|
+
isApifyIntegration: ow.optional.boolean,
|
|
580
548
|
}));
|
|
581
549
|
if (!this.isAtHome()) {
|
|
582
|
-
|
|
550
|
+
log.warning('Actor.addWebhook() is only supported when running on the Apify platform. The webhook will not be invoked.');
|
|
583
551
|
return undefined;
|
|
584
552
|
}
|
|
585
|
-
const runId = this.config.
|
|
553
|
+
const runId = this.config.actorRunId;
|
|
586
554
|
if (!runId) {
|
|
587
|
-
throw new Error(`Environment variable ${
|
|
555
|
+
throw new Error(`Environment variable ${ACTOR_ENV_VARS.RUN_ID} is not set!`);
|
|
588
556
|
}
|
|
589
557
|
return this.apifyClient.webhooks().create({
|
|
590
558
|
...options,
|
|
@@ -603,34 +571,34 @@ class Actor {
|
|
|
603
571
|
*/
|
|
604
572
|
async setStatusMessage(statusMessage, options) {
|
|
605
573
|
const { isStatusMessageTerminal, level } = options || {};
|
|
606
|
-
(
|
|
607
|
-
(
|
|
574
|
+
ow(statusMessage, ow.string);
|
|
575
|
+
ow(isStatusMessageTerminal, ow.optional.boolean);
|
|
608
576
|
this._ensureActorInit('setStatusMessage');
|
|
609
577
|
const loggedStatusMessage = `[Status message]: ${statusMessage}`;
|
|
610
578
|
switch (level) {
|
|
611
579
|
case 'DEBUG':
|
|
612
|
-
|
|
580
|
+
log.debug(loggedStatusMessage);
|
|
613
581
|
break;
|
|
614
582
|
case 'WARNING':
|
|
615
|
-
|
|
583
|
+
log.warning(loggedStatusMessage);
|
|
616
584
|
break;
|
|
617
585
|
case 'ERROR':
|
|
618
|
-
|
|
586
|
+
log.error(loggedStatusMessage);
|
|
619
587
|
break;
|
|
620
588
|
default:
|
|
621
|
-
|
|
589
|
+
log.info(loggedStatusMessage);
|
|
622
590
|
break;
|
|
623
591
|
}
|
|
624
|
-
const client =
|
|
592
|
+
const client = serviceLocator.getStorageClient();
|
|
625
593
|
// just to be sure, this should be fast
|
|
626
|
-
await
|
|
594
|
+
await addTimeoutToPromise(async () => client.setStatusMessage(statusMessage, {
|
|
627
595
|
isStatusMessageTerminal,
|
|
628
596
|
level,
|
|
629
|
-
}), 1000, 'Setting status message timed out after 1s').catch((e) =>
|
|
630
|
-
const runId = this.config.
|
|
597
|
+
}), 1000, 'Setting status message timed out after 1s').catch((e) => log.warning(e.message));
|
|
598
|
+
const runId = this.config.actorRunId;
|
|
631
599
|
if (runId) {
|
|
632
600
|
// just to be sure, this should be fast
|
|
633
|
-
const run = await
|
|
601
|
+
const run = await addTimeoutToPromise(async () => this.apifyClient.run(runId).get(), 1000, 'Getting the current run timed out after 1s').catch((e) => log.warning(e.message));
|
|
634
602
|
if (run) {
|
|
635
603
|
return run;
|
|
636
604
|
}
|
|
@@ -697,11 +665,11 @@ class Actor {
|
|
|
697
665
|
* @ignore
|
|
698
666
|
*/
|
|
699
667
|
async openDataset(datasetIdOrName, options = {}) {
|
|
700
|
-
(
|
|
701
|
-
forceCloud:
|
|
668
|
+
ow(options, ow.object.exactShape({
|
|
669
|
+
forceCloud: ow.optional.boolean,
|
|
702
670
|
}));
|
|
703
671
|
this._ensureActorInit('openDataset');
|
|
704
|
-
return this._openStorage(
|
|
672
|
+
return this._openStorage(Dataset, datasetIdOrName, options);
|
|
705
673
|
}
|
|
706
674
|
/**
|
|
707
675
|
* Gets a value from the default {@link KeyValueStore} associated with the current Actor run.
|
|
@@ -803,20 +771,19 @@ class Actor {
|
|
|
803
771
|
*/
|
|
804
772
|
async getInput() {
|
|
805
773
|
this._ensureActorInit('getInput');
|
|
806
|
-
const inputSecretsPrivateKeyFile = this.config
|
|
807
|
-
const
|
|
808
|
-
const rawInput = await this.getValue(this.config.get('inputKey'));
|
|
774
|
+
const { inputSecretsPrivateKeyFile, inputSecretsPrivateKeyPassphrase } = this.config;
|
|
775
|
+
const rawInput = await this.getValue(this.config.inputKey);
|
|
809
776
|
let input = rawInput;
|
|
810
|
-
if (
|
|
777
|
+
if (ow.isValid(rawInput, ow.object.nonEmpty) &&
|
|
811
778
|
inputSecretsPrivateKeyFile &&
|
|
812
779
|
inputSecretsPrivateKeyPassphrase) {
|
|
813
|
-
const privateKey =
|
|
780
|
+
const privateKey = createPrivateKey({
|
|
814
781
|
key: Buffer.from(inputSecretsPrivateKeyFile, 'base64'),
|
|
815
782
|
passphrase: inputSecretsPrivateKeyPassphrase,
|
|
816
783
|
});
|
|
817
|
-
input =
|
|
784
|
+
input = decryptInputSecrets({ input: rawInput, privateKey });
|
|
818
785
|
}
|
|
819
|
-
if (
|
|
786
|
+
if (ow.isValid(input, ow.object.nonEmpty) && !Buffer.isBuffer(input)) {
|
|
820
787
|
input = await this.inferDefaultsFromInputSchema(input);
|
|
821
788
|
}
|
|
822
789
|
return input;
|
|
@@ -849,11 +816,11 @@ class Actor {
|
|
|
849
816
|
* @ignore
|
|
850
817
|
*/
|
|
851
818
|
async openKeyValueStore(storeIdOrName, options = {}) {
|
|
852
|
-
(
|
|
853
|
-
forceCloud:
|
|
819
|
+
ow(options, ow.object.exactShape({
|
|
820
|
+
forceCloud: ow.optional.boolean,
|
|
854
821
|
}));
|
|
855
822
|
this._ensureActorInit('openKeyValueStore');
|
|
856
|
-
return this._openStorage(
|
|
823
|
+
return this._openStorage(KeyValueStore, storeIdOrName, options);
|
|
857
824
|
}
|
|
858
825
|
/**
|
|
859
826
|
* Opens a request queue and returns a promise resolving to an instance
|
|
@@ -874,13 +841,13 @@ class Actor {
|
|
|
874
841
|
* @ignore
|
|
875
842
|
*/
|
|
876
843
|
async openRequestQueue(queueIdOrName, options = {}) {
|
|
877
|
-
(
|
|
878
|
-
forceCloud:
|
|
844
|
+
ow(options, ow.object.exactShape({
|
|
845
|
+
forceCloud: ow.optional.boolean,
|
|
879
846
|
}));
|
|
880
847
|
this._ensureActorInit('openRequestQueue');
|
|
881
|
-
const queue = await this._openStorage(
|
|
848
|
+
const queue = await this._openStorage(RequestQueue, queueIdOrName, options);
|
|
882
849
|
// eslint-disable-next-line dot-notation
|
|
883
|
-
queue['initialCount'] = (await queue.client.
|
|
850
|
+
queue['initialCount'] = (await queue.client.getMetadata())?.totalRequestCount ?? 0;
|
|
884
851
|
return queue;
|
|
885
852
|
}
|
|
886
853
|
/**
|
|
@@ -932,7 +899,7 @@ class Actor {
|
|
|
932
899
|
if (dontUseApifyProxy && dontUseCustomProxies) {
|
|
933
900
|
return undefined;
|
|
934
901
|
}
|
|
935
|
-
const proxyConfiguration = new
|
|
902
|
+
const proxyConfiguration = new ProxyConfiguration(options, this.config);
|
|
936
903
|
if (await proxyConfiguration.initialize({ checkAccess })) {
|
|
937
904
|
return proxyConfiguration;
|
|
938
905
|
}
|
|
@@ -972,7 +939,7 @@ class Actor {
|
|
|
972
939
|
*/
|
|
973
940
|
getModifiedActorEnvVars() {
|
|
974
941
|
const modifiedActorEnvVars = {};
|
|
975
|
-
Object.entries(
|
|
942
|
+
Object.entries(ACTOR_ENV_VARS).forEach(([k, v]) => {
|
|
976
943
|
// Prepend `ACTOR_` to env vars so ApifyEnv structure is preserved
|
|
977
944
|
if (['ID', 'RUN_ID', 'TASK_ID'].includes(k)) {
|
|
978
945
|
modifiedActorEnvVars[`ACTOR_${k}`] = v;
|
|
@@ -996,17 +963,17 @@ class Actor {
|
|
|
996
963
|
const env = process.env || {};
|
|
997
964
|
const envVars = {};
|
|
998
965
|
for (const [shortName, fullName] of Object.entries({
|
|
999
|
-
...
|
|
966
|
+
...APIFY_ENV_VARS,
|
|
1000
967
|
...this.getModifiedActorEnvVars(),
|
|
1001
968
|
})) {
|
|
1002
|
-
const camelCaseName =
|
|
969
|
+
const camelCaseName = snakeCaseToCamelCase(shortName);
|
|
1003
970
|
let value = env[fullName];
|
|
1004
971
|
// Parse dates and integers.
|
|
1005
972
|
if (value && fullName.endsWith('_AT')) {
|
|
1006
973
|
const unix = Date.parse(value);
|
|
1007
974
|
value = unix > 0 ? new Date(unix) : undefined;
|
|
1008
975
|
}
|
|
1009
|
-
else if (
|
|
976
|
+
else if (INTEGER_ENV_VARS.includes(fullName)) {
|
|
1010
977
|
value = parseInt(value, 10);
|
|
1011
978
|
}
|
|
1012
979
|
Reflect.set(envVars, camelCaseName, value || value === 0 ? value : null);
|
|
@@ -1022,23 +989,23 @@ class Actor {
|
|
|
1022
989
|
* @ignore
|
|
1023
990
|
*/
|
|
1024
991
|
newClient(options = {}) {
|
|
1025
|
-
const { storageDir, ...storageClientOptions } = this.config.
|
|
1026
|
-
const { apifyVersion, crawleeVersion } =
|
|
1027
|
-
return
|
|
1028
|
-
baseUrl: this.config.
|
|
1029
|
-
publicBaseUrl: this.config.
|
|
1030
|
-
token: this.config.
|
|
992
|
+
const { storageDir, ...storageClientOptions } = (this.config.storageClientOptions ?? {});
|
|
993
|
+
const { apifyVersion, crawleeVersion } = getSystemInfo();
|
|
994
|
+
return new ApifyClient({
|
|
995
|
+
baseUrl: this.config.apiBaseUrl,
|
|
996
|
+
publicBaseUrl: this.config.apiPublicBaseUrl,
|
|
997
|
+
token: this.config.token,
|
|
1031
998
|
userAgentSuffix: [`SDK/${apifyVersion}`, `Crawlee/${crawleeVersion}`],
|
|
1032
999
|
...storageClientOptions,
|
|
1033
1000
|
...options, // allow overriding the instance configuration
|
|
1034
|
-
}
|
|
1001
|
+
});
|
|
1035
1002
|
}
|
|
1036
1003
|
/**
|
|
1037
1004
|
* Returns `true` when code is running on Apify platform and `false` otherwise (for example locally).
|
|
1038
1005
|
* @ignore
|
|
1039
1006
|
*/
|
|
1040
1007
|
isAtHome() {
|
|
1041
|
-
return !!process.env[
|
|
1008
|
+
return !!process.env[APIFY_ENV_VARS.IS_AT_HOME];
|
|
1042
1009
|
}
|
|
1043
1010
|
/**
|
|
1044
1011
|
* Easily create and manage state values. All state values are automatically persisted.
|
|
@@ -1051,8 +1018,8 @@ class Actor {
|
|
|
1051
1018
|
*/
|
|
1052
1019
|
async useState(name, defaultValue = {}, options) {
|
|
1053
1020
|
this._ensureActorInit('useState');
|
|
1054
|
-
const kvStore = await
|
|
1055
|
-
config: options?.config ||
|
|
1021
|
+
const kvStore = await KeyValueStore.open(options?.keyValueStoreName, {
|
|
1022
|
+
config: options?.config || Configuration.getGlobalConfig(),
|
|
1056
1023
|
});
|
|
1057
1024
|
return kvStore.getAutoSavedValue(name || 'APIFY_GLOBAL_STATE', defaultValue);
|
|
1058
1025
|
}
|
|
@@ -1622,11 +1589,11 @@ class Actor {
|
|
|
1622
1589
|
}
|
|
1623
1590
|
/** @internal */
|
|
1624
1591
|
static getDefaultInstance() {
|
|
1625
|
-
this._instance
|
|
1592
|
+
this._instance ??= new Actor();
|
|
1626
1593
|
return this._instance;
|
|
1627
1594
|
}
|
|
1628
1595
|
usesPushDataInterception(dataset) {
|
|
1629
|
-
return Boolean(dataset.client[
|
|
1596
|
+
return Boolean(dataset.client[USES_PUSH_DATA_INTERCEPTION]);
|
|
1630
1597
|
}
|
|
1631
1598
|
async pushDataViaInterceptedClient(dataset, item, eventName) {
|
|
1632
1599
|
// PatchedDatasetClient will handle charging and item limiting.
|
|
@@ -1634,7 +1601,7 @@ class Actor {
|
|
|
1634
1601
|
const context = {
|
|
1635
1602
|
eventName,
|
|
1636
1603
|
};
|
|
1637
|
-
await
|
|
1604
|
+
await pushDataChargingContext.run(context, async () => {
|
|
1638
1605
|
await dataset.pushData(item);
|
|
1639
1606
|
});
|
|
1640
1607
|
return (context.chargeResult ?? {
|
|
@@ -1654,8 +1621,8 @@ class Actor {
|
|
|
1654
1621
|
chargeableWithinLimit: {},
|
|
1655
1622
|
};
|
|
1656
1623
|
}
|
|
1657
|
-
const isDefaultDataset = dataset.id === this.config.
|
|
1658
|
-
return
|
|
1624
|
+
const isDefaultDataset = dataset.id === this.config.defaultDatasetId;
|
|
1625
|
+
return pushDataAndCharge({
|
|
1659
1626
|
chargingManager: this.chargingManager,
|
|
1660
1627
|
items,
|
|
1661
1628
|
eventName: explicitEventName,
|
|
@@ -1664,9 +1631,11 @@ class Actor {
|
|
|
1664
1631
|
});
|
|
1665
1632
|
}
|
|
1666
1633
|
async _openStorage(storageClass, identifier, options = {}) {
|
|
1667
|
-
return
|
|
1634
|
+
return openStorage(storageClass, identifier, {
|
|
1668
1635
|
config: this.config,
|
|
1669
|
-
client: options.forceCloud
|
|
1636
|
+
client: options.forceCloud
|
|
1637
|
+
? new ApifyStorageClient(this.apifyClient, this.config, () => this.chargingManager)
|
|
1638
|
+
: undefined,
|
|
1670
1639
|
purgedStorageAliases: this.purgedStorageAliases,
|
|
1671
1640
|
});
|
|
1672
1641
|
}
|
|
@@ -1679,7 +1648,7 @@ class Actor {
|
|
|
1679
1648
|
return;
|
|
1680
1649
|
}
|
|
1681
1650
|
this.warnedAboutMissingInitCall = true;
|
|
1682
|
-
|
|
1651
|
+
log.warning([
|
|
1683
1652
|
`Actor.${methodCalled}() was called but the Actor instance was not initialized.`,
|
|
1684
1653
|
'Did you forget to call Actor.init()?',
|
|
1685
1654
|
].join('\n'));
|
|
@@ -1693,7 +1662,7 @@ class Actor {
|
|
|
1693
1662
|
if (this.isAtHome() && env.timeoutAt !== null) {
|
|
1694
1663
|
return env.timeoutAt.getTime() - Date.now();
|
|
1695
1664
|
}
|
|
1696
|
-
|
|
1665
|
+
log.warning('Using `inherit` argument is only possible when the Actor is running on the Apify platform and when the ' +
|
|
1697
1666
|
'timeout for the Actor run is set.');
|
|
1698
1667
|
return undefined;
|
|
1699
1668
|
}
|
|
@@ -1704,17 +1673,16 @@ class Actor {
|
|
|
1704
1673
|
return input;
|
|
1705
1674
|
}
|
|
1706
1675
|
// On local, we can get the input schema from the local config
|
|
1707
|
-
const inputSchema =
|
|
1676
|
+
const inputSchema = readInputSchema();
|
|
1708
1677
|
// Don't emit warning if there is no input schema defined
|
|
1709
|
-
if (inputSchema ===
|
|
1678
|
+
if (inputSchema === noActorInputSchemaDefinedMarker) {
|
|
1710
1679
|
return input;
|
|
1711
1680
|
}
|
|
1712
1681
|
if (!inputSchema) {
|
|
1713
|
-
|
|
1682
|
+
log.warning('Failed to find the input schema for the local run of this Actor. Your input will be missing fields that have default values set if they are missing from the input you are using.');
|
|
1714
1683
|
return input;
|
|
1715
1684
|
}
|
|
1716
|
-
const defaults =
|
|
1685
|
+
const defaults = getDefaultsFromInputSchema(inputSchema);
|
|
1717
1686
|
return { ...defaults, ...input };
|
|
1718
1687
|
}
|
|
1719
1688
|
}
|
|
1720
|
-
exports.Actor = Actor;
|