apify 4.0.0-beta.12 → 4.0.0-beta.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -48
- package/dist/actor.d.ts +157 -61
- package/dist/actor.js +278 -91
- package/dist/apify_storage_client.d.ts +54 -0
- package/dist/apify_storage_client.js +152 -0
- package/dist/charging.d.ts +43 -2
- package/dist/charging.js +196 -54
- package/dist/configuration.d.ts +79 -132
- package/dist/configuration.js +114 -141
- package/dist/index.d.ts +2 -2
- package/dist/index.js +1 -2
- package/dist/input-schemas.d.ts +7 -0
- package/dist/input-schemas.js +58 -0
- package/dist/key_value_store.d.ts +8 -4
- package/dist/key_value_store.js +19 -11
- package/dist/platform_event_manager.d.ts +0 -1
- package/dist/platform_event_manager.js +5 -5
- package/dist/proxy_configuration.d.ts +41 -44
- package/dist/proxy_configuration.js +65 -103
- package/dist/storage.d.ts +58 -0
- package/dist/storage.js +79 -0
- package/dist/utils.d.ts +0 -1
- package/dist/utils.js +2 -4
- package/package.json +123 -73
- package/.turbo/turbo-build.log +0 -26
- package/.turbo/turbo-copy.log +0 -4
- package/dist/LICENSE.md +0 -201
- package/dist/README.md +0 -98
- package/dist/actor.d.ts.map +0 -1
- package/dist/actor.js.map +0 -1
- package/dist/charging.d.ts.map +0 -1
- package/dist/charging.js.map +0 -1
- package/dist/configuration.d.ts.map +0 -1
- package/dist/configuration.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/key_value_store.d.ts.map +0 -1
- package/dist/key_value_store.js.map +0 -1
- package/dist/package.json +0 -75
- package/dist/platform_event_manager.d.ts.map +0 -1
- package/dist/platform_event_manager.js.map +0 -1
- package/dist/proxy_configuration.d.ts.map +0 -1
- package/dist/proxy_configuration.js.map +0 -1
- package/dist/utils.d.ts.map +0 -1
- package/dist/utils.js.map +0 -1
package/dist/actor.js
CHANGED
|
@@ -1,18 +1,21 @@
|
|
|
1
1
|
import { createPrivateKey } from 'node:crypto';
|
|
2
|
-
import {
|
|
2
|
+
import { Dataset, purgeDefaultStorages, RequestQueue, serviceLocator } from '@crawlee/core';
|
|
3
3
|
import { sleep, snakeCaseToCamelCase } from '@crawlee/utils';
|
|
4
4
|
import { ApifyClient } from 'apify-client';
|
|
5
5
|
import ow from 'ow';
|
|
6
|
-
import { ACTOR_ENV_VARS, APIFY_ENV_VARS, INTEGER_ENV_VARS, } from '@apify/consts';
|
|
6
|
+
import { ACTOR_ENV_VARS, ACTOR_EVENT_NAMES, APIFY_ENV_VARS, INTEGER_ENV_VARS, } from '@apify/consts';
|
|
7
7
|
import { decryptInputSecrets } from '@apify/input_secrets';
|
|
8
8
|
import log from '@apify/log';
|
|
9
9
|
import { addTimeoutToPromise } from '@apify/timeout';
|
|
10
|
-
import {
|
|
10
|
+
import { ApifyStorageClient, pushDataChargingContext, USES_PUSH_DATA_INTERCEPTION, } from './apify_storage_client.js';
|
|
11
|
+
import { ChargingManager, pushDataAndCharge } from './charging.js';
|
|
11
12
|
import { Configuration } from './configuration.js';
|
|
13
|
+
import { getDefaultsFromInputSchema, noActorInputSchemaDefinedMarker, readInputSchema } from './input-schemas.js';
|
|
12
14
|
import { KeyValueStore } from './key_value_store.js';
|
|
13
15
|
import { PlatformEventManager } from './platform_event_manager.js';
|
|
14
16
|
import { ProxyConfiguration } from './proxy_configuration.js';
|
|
15
|
-
import {
|
|
17
|
+
import { openStorage } from './storage.js';
|
|
18
|
+
import { checkCrawleeVersion, getSystemInfo, printOutdatedSdkWarning } from './utils.js';
|
|
16
19
|
/**
|
|
17
20
|
* Exit codes for the Actor process.
|
|
18
21
|
* The error codes must be in the range 1-128, to avoid collision with signal exits
|
|
@@ -31,7 +34,6 @@ export const EXIT_CODES = {
|
|
|
31
34
|
*/
|
|
32
35
|
export class Actor {
|
|
33
36
|
/** @internal */
|
|
34
|
-
// eslint-disable-next-line no-use-before-define -- self-reference
|
|
35
37
|
static _instance;
|
|
36
38
|
/**
|
|
37
39
|
* Configuration of this SDK instance (provided to its constructor). See {@link Configuration} for details.
|
|
@@ -61,13 +63,43 @@ export class Actor {
|
|
|
61
63
|
* Set if the Actor is currently rebooting.
|
|
62
64
|
*/
|
|
63
65
|
isRebooting = false;
|
|
66
|
+
/**
|
|
67
|
+
* Set if the Actor is currently exiting. Prevents double-exit from graceful shutdown handlers.
|
|
68
|
+
*/
|
|
69
|
+
isExiting = false;
|
|
70
|
+
/**
|
|
71
|
+
* References to graceful shutdown handlers so they can be removed during cleanup.
|
|
72
|
+
*/
|
|
73
|
+
gracefulShutdownHandlers = {};
|
|
64
74
|
chargingManager;
|
|
75
|
+
/**
|
|
76
|
+
* Tracks which aliased storages have been purged during this session,
|
|
77
|
+
* so we only purge them once (on first open) when running locally.
|
|
78
|
+
* @internal
|
|
79
|
+
*/
|
|
80
|
+
purgedStorageAliases = new Set();
|
|
65
81
|
constructor(options = {}) {
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
82
|
+
const { configuration, ...configOptions } = options;
|
|
83
|
+
if (configuration) {
|
|
84
|
+
// BYO Configuration takes precedence; field-level overrides are
|
|
85
|
+
// ignored to keep the contract unambiguous. It must be the SDK's
|
|
86
|
+
// Configuration subclass, not a bare crawlee one — env-var
|
|
87
|
+
// resolution is driven by the subclass's `static fields`
|
|
88
|
+
// (`apifyConfigFields`) at construction, so a crawlee instance
|
|
89
|
+
// would silently expose none of the `APIFY_*`/`ACTOR_*` values.
|
|
90
|
+
if (!(configuration instanceof Configuration)) {
|
|
91
|
+
throw new Error('Actor `configuration` must be an Apify SDK Configuration (imported from `apify`), ' +
|
|
92
|
+
'not a crawlee Configuration, otherwise APIFY_*/ACTOR_* environment variables are not resolved.');
|
|
93
|
+
}
|
|
94
|
+
this.config = configuration;
|
|
95
|
+
}
|
|
96
|
+
else if (Object.keys(configOptions).length === 0) {
|
|
97
|
+
// use default configuration object if nothing overridden (it fallbacks to env vars)
|
|
98
|
+
this.config = Configuration.getGlobalConfig();
|
|
99
|
+
}
|
|
100
|
+
else {
|
|
101
|
+
this.config = new Configuration(configOptions);
|
|
102
|
+
}
|
|
71
103
|
this.apifyClient = this.newClient();
|
|
72
104
|
this.eventManager = new PlatformEventManager(this.config);
|
|
73
105
|
this.chargingManager = new ChargingManager(this.config, this.apifyClient);
|
|
@@ -173,26 +205,48 @@ export class Actor {
|
|
|
173
205
|
checkCrawleeVersion();
|
|
174
206
|
log.info('System info', getSystemInfo());
|
|
175
207
|
printOutdatedSdkWarning();
|
|
176
|
-
//
|
|
177
|
-
|
|
208
|
+
// Register this Actor's config as the global one so crawlee storages and
|
|
209
|
+
// the event manager resolve the same instance (`availableMemoryRatio` /
|
|
210
|
+
// `disableBrowserSandbox` at-home defaults now live in `Configuration`).
|
|
211
|
+
serviceLocator.setConfiguration(this.config);
|
|
178
212
|
if (this.isAtHome()) {
|
|
179
|
-
this.config
|
|
180
|
-
this.
|
|
181
|
-
this.config.useStorageClient(this.apifyClient);
|
|
182
|
-
this.config.useEventManager(this.eventManager);
|
|
213
|
+
serviceLocator.setStorageClient(new ApifyStorageClient(this.apifyClient, this.config, () => this.chargingManager));
|
|
214
|
+
serviceLocator.setEventManager(this.eventManager);
|
|
183
215
|
}
|
|
184
216
|
else if (options.storage) {
|
|
185
|
-
|
|
217
|
+
serviceLocator.setStorageClient(options.storage);
|
|
186
218
|
}
|
|
187
219
|
// Init the event manager the config uses
|
|
188
|
-
await
|
|
220
|
+
await serviceLocator.getEventManager().init();
|
|
189
221
|
log.debug(`Events initialized`);
|
|
222
|
+
// Register handlers for aborting and migrating events for automatic graceful shutdown.
|
|
223
|
+
// - aborting: calls Actor.exit() to terminate the run gracefully
|
|
224
|
+
// - migrating: calls Actor.reboot() to speed up migration (the run continues on a new worker)
|
|
225
|
+
// Using setTimeout to avoid deadlock with waitForAllListenersToComplete() in exit()/reboot()
|
|
226
|
+
if (options.gracefulShutdown !== false) {
|
|
227
|
+
const delay = options.gracefulShutdownDelayMillis ?? 0;
|
|
228
|
+
this.gracefulShutdownHandlers.aborting = () => {
|
|
229
|
+
setTimeout(() => {
|
|
230
|
+
this.exit().catch((err) => {
|
|
231
|
+
log.exception(err, 'Failed to exit gracefully');
|
|
232
|
+
});
|
|
233
|
+
}, delay);
|
|
234
|
+
};
|
|
235
|
+
this.on(ACTOR_EVENT_NAMES.ABORTING, this.gracefulShutdownHandlers.aborting);
|
|
236
|
+
this.gracefulShutdownHandlers.migrating = () => {
|
|
237
|
+
setTimeout(() => {
|
|
238
|
+
this.reboot().catch((err) => {
|
|
239
|
+
log.exception(err, 'Failed to reboot on migration');
|
|
240
|
+
});
|
|
241
|
+
}, delay);
|
|
242
|
+
};
|
|
243
|
+
this.on(ACTOR_EVENT_NAMES.MIGRATING, this.gracefulShutdownHandlers.migrating);
|
|
244
|
+
}
|
|
190
245
|
await purgeDefaultStorages({
|
|
191
246
|
config: this.config,
|
|
192
247
|
onlyPurgeOnce: true,
|
|
193
248
|
});
|
|
194
249
|
log.debug(`Default storages purged`);
|
|
195
|
-
Configuration.storage.enterWith(this.config);
|
|
196
250
|
await this.chargingManager.init();
|
|
197
251
|
log.debug(`ChargingManager initialized`, this.chargingManager.getPricingInfo());
|
|
198
252
|
}
|
|
@@ -200,6 +254,12 @@ export class Actor {
|
|
|
200
254
|
* @ignore
|
|
201
255
|
*/
|
|
202
256
|
async exit(messageOrOptions, options = {}) {
|
|
257
|
+
// Prevent double-exit from graceful shutdown handlers
|
|
258
|
+
if (this.isExiting) {
|
|
259
|
+
log.debug('Actor.exit() called while already exiting, skipping');
|
|
260
|
+
return;
|
|
261
|
+
}
|
|
262
|
+
this.isExiting = true;
|
|
203
263
|
options =
|
|
204
264
|
typeof messageOrOptions === 'string'
|
|
205
265
|
? { ...options, statusMessage: messageOrOptions }
|
|
@@ -207,8 +267,16 @@ export class Actor {
|
|
|
207
267
|
options.exit ??= true;
|
|
208
268
|
options.exitCode ??= EXIT_CODES.SUCCESS;
|
|
209
269
|
options.timeoutSecs ??= 30;
|
|
210
|
-
|
|
211
|
-
const
|
|
270
|
+
this._ensureActorInit('exit');
|
|
271
|
+
const client = serviceLocator.getStorageClient();
|
|
272
|
+
const events = serviceLocator.getEventManager();
|
|
273
|
+
// Remove graceful shutdown handlers to prevent them from interfering with exit
|
|
274
|
+
if (this.gracefulShutdownHandlers.aborting) {
|
|
275
|
+
this.off(ACTOR_EVENT_NAMES.ABORTING, this.gracefulShutdownHandlers.aborting);
|
|
276
|
+
}
|
|
277
|
+
if (this.gracefulShutdownHandlers.migrating) {
|
|
278
|
+
this.off(ACTOR_EVENT_NAMES.MIGRATING, this.gracefulShutdownHandlers.migrating);
|
|
279
|
+
}
|
|
212
280
|
// Close the event manager and emit the final PERSIST_STATE event
|
|
213
281
|
await events.close();
|
|
214
282
|
log.debug(`Events closed`);
|
|
@@ -216,6 +284,13 @@ export class Actor {
|
|
|
216
284
|
events.emit("exit" /* EventType.EXIT */, options);
|
|
217
285
|
// Wait for all event listeners to be processed
|
|
218
286
|
log.debug(`Waiting for all event listeners to complete their execution (with ${options.timeoutSecs} seconds timeout)`);
|
|
287
|
+
if (options.exit) {
|
|
288
|
+
// `addTimeoutToPromise` is a cooperative timeout. This ensures that the process exits
|
|
289
|
+
// after the timeout, even if the event listeners don't trigger the timeout.
|
|
290
|
+
setTimeout(() => {
|
|
291
|
+
process.exit(options.exitCode);
|
|
292
|
+
}, options.timeoutSecs * 1000);
|
|
293
|
+
}
|
|
219
294
|
await addTimeoutToPromise(async () => {
|
|
220
295
|
await events.waitForAllListenersToComplete();
|
|
221
296
|
if (client.teardown) {
|
|
@@ -229,16 +304,21 @@ export class Actor {
|
|
|
229
304
|
finished = true;
|
|
230
305
|
}
|
|
231
306
|
if (options.statusMessage != null) {
|
|
232
|
-
|
|
307
|
+
const statusMessagePromise = this.setStatusMessage(options.statusMessage, {
|
|
233
308
|
isStatusMessageTerminal: true,
|
|
234
309
|
level: options.exitCode > 0 ? 'ERROR' : 'INFO',
|
|
235
310
|
});
|
|
311
|
+
// Waiting 1ms is enough for the network request to be sent. We don't need to wait for the response.
|
|
312
|
+
await Promise.race([statusMessagePromise, sleep(1)]);
|
|
236
313
|
}
|
|
237
314
|
}, options.timeoutSecs * 1000, `Waiting for all event listeners to complete their execution timed out after ${options.timeoutSecs} seconds`).catch(() => {
|
|
238
315
|
if (options.exit) {
|
|
239
316
|
process.exit(options.exitCode);
|
|
240
317
|
}
|
|
241
318
|
});
|
|
319
|
+
// Reset the flag so the instance can be reused (e.g., in tests or when exit is false).
|
|
320
|
+
// When process.exit() actually terminates the process, this line is never reached - which is fine.
|
|
321
|
+
this.isExiting = false;
|
|
242
322
|
if (!options.exit) {
|
|
243
323
|
return;
|
|
244
324
|
}
|
|
@@ -254,13 +334,13 @@ export class Actor {
|
|
|
254
334
|
* @ignore
|
|
255
335
|
*/
|
|
256
336
|
on(event, listener) {
|
|
257
|
-
|
|
337
|
+
serviceLocator.getEventManager().on(event, listener);
|
|
258
338
|
}
|
|
259
339
|
/**
|
|
260
340
|
* @ignore
|
|
261
341
|
*/
|
|
262
342
|
off(event, listener) {
|
|
263
|
-
|
|
343
|
+
serviceLocator.getEventManager().off(event, listener);
|
|
264
344
|
}
|
|
265
345
|
/**
|
|
266
346
|
* Runs an Actor on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable).
|
|
@@ -287,9 +367,10 @@ export class Actor {
|
|
|
287
367
|
* @ignore
|
|
288
368
|
*/
|
|
289
369
|
async call(actorId, input, options = {}) {
|
|
370
|
+
const timeout = options.timeout === 'inherit' ? this.getRemainingTime() : options.timeout;
|
|
290
371
|
const { token, ...rest } = options;
|
|
291
372
|
const client = token ? this.newClient({ token }) : this.apifyClient;
|
|
292
|
-
return client.actor(actorId).call(input, rest);
|
|
373
|
+
return client.actor(actorId).call(input, { ...rest, timeout });
|
|
293
374
|
}
|
|
294
375
|
/**
|
|
295
376
|
* Runs an Actor on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable),
|
|
@@ -316,9 +397,10 @@ export class Actor {
|
|
|
316
397
|
* @ignore
|
|
317
398
|
*/
|
|
318
399
|
async start(actorId, input, options = {}) {
|
|
400
|
+
const timeout = options.timeout === 'inherit' ? this.getRemainingTime() : options.timeout;
|
|
319
401
|
const { token, ...rest } = options;
|
|
320
402
|
const client = token ? this.newClient({ token }) : this.apifyClient;
|
|
321
|
-
return client.actor(actorId).start(input, rest);
|
|
403
|
+
return client.actor(actorId).start(input, { ...rest, timeout });
|
|
322
404
|
}
|
|
323
405
|
/**
|
|
324
406
|
* Aborts given Actor run on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable).
|
|
@@ -372,9 +454,10 @@ export class Actor {
|
|
|
372
454
|
* @ignore
|
|
373
455
|
*/
|
|
374
456
|
async callTask(taskId, input, options = {}) {
|
|
457
|
+
const timeout = options.timeout === 'inherit' ? this.getRemainingTime() : options.timeout;
|
|
375
458
|
const { token, ...rest } = options;
|
|
376
459
|
const client = token ? this.newClient({ token }) : this.apifyClient;
|
|
377
|
-
return client.task(taskId).call(input, rest);
|
|
460
|
+
return client.task(taskId).call(input, { ...rest, timeout });
|
|
378
461
|
}
|
|
379
462
|
/**
|
|
380
463
|
* Transforms this Actor run to an Actor run of a given Actor. The system stops the current container and starts
|
|
@@ -395,11 +478,9 @@ export class Actor {
|
|
|
395
478
|
log.warning('Actor.metamorph() is only supported when running on the Apify platform.');
|
|
396
479
|
return;
|
|
397
480
|
}
|
|
398
|
-
const { customAfterSleepMillis = this.config.
|
|
399
|
-
const runId = this.config.
|
|
400
|
-
await this.apifyClient
|
|
401
|
-
.run(runId)
|
|
402
|
-
.metamorph(targetActorId, input, metamorphOpts);
|
|
481
|
+
const { customAfterSleepMillis = this.config.metamorphAfterSleepMillis, ...metamorphOpts } = options;
|
|
482
|
+
const runId = this.config.actorRunId;
|
|
483
|
+
await this.apifyClient.run(runId).metamorph(targetActorId, input, metamorphOpts);
|
|
403
484
|
// Wait some time for container to be stopped.
|
|
404
485
|
await sleep(customAfterSleepMillis);
|
|
405
486
|
}
|
|
@@ -411,6 +492,7 @@ export class Actor {
|
|
|
411
492
|
* @ignore
|
|
412
493
|
*/
|
|
413
494
|
async reboot(options = {}) {
|
|
495
|
+
this._ensureActorInit('reboot');
|
|
414
496
|
if (!this.isAtHome()) {
|
|
415
497
|
log.warning('Actor.reboot() is only supported when running on the Apify platform.');
|
|
416
498
|
return;
|
|
@@ -423,20 +505,20 @@ export class Actor {
|
|
|
423
505
|
// Waiting for all the listeners to finish, as `.reboot()` kills the container.
|
|
424
506
|
await Promise.all([
|
|
425
507
|
// `persistState` for individual RequestLists, RequestQueue... instances to be persisted
|
|
426
|
-
...
|
|
508
|
+
...serviceLocator
|
|
427
509
|
.getEventManager()
|
|
428
510
|
.listeners("persistState" /* EventType.PERSIST_STATE */)
|
|
429
|
-
.map(async (x) => x()),
|
|
511
|
+
.map(async (x) => x({})),
|
|
430
512
|
// `migrating` to pause Apify crawlers
|
|
431
|
-
...
|
|
513
|
+
...serviceLocator
|
|
432
514
|
.getEventManager()
|
|
433
515
|
.listeners("migrating" /* EventType.MIGRATING */)
|
|
434
|
-
.map(async (x) => x()),
|
|
516
|
+
.map(async (x) => x({})),
|
|
435
517
|
]);
|
|
436
|
-
const runId = this.config.
|
|
518
|
+
const runId = this.config.actorRunId;
|
|
437
519
|
await this.apifyClient.run(runId).reboot();
|
|
438
520
|
// Wait some time for container to be stopped.
|
|
439
|
-
const { customAfterSleepMillis = this.config.
|
|
521
|
+
const { customAfterSleepMillis = this.config.metamorphAfterSleepMillis } = options;
|
|
440
522
|
await sleep(customAfterSleepMillis);
|
|
441
523
|
}
|
|
442
524
|
/**
|
|
@@ -457,25 +539,27 @@ export class Actor {
|
|
|
457
539
|
requestUrl: ow.string,
|
|
458
540
|
payloadTemplate: ow.optional.string,
|
|
459
541
|
idempotencyKey: ow.optional.string,
|
|
542
|
+
headersTemplate: ow.optional.string,
|
|
543
|
+
description: ow.optional.string,
|
|
544
|
+
ignoreSslErrors: ow.optional.boolean,
|
|
545
|
+
doNotRetry: ow.optional.boolean,
|
|
546
|
+
shouldInterpolateStrings: ow.optional.boolean,
|
|
547
|
+
isApifyIntegration: ow.optional.boolean,
|
|
460
548
|
}));
|
|
461
|
-
const { eventTypes, requestUrl, payloadTemplate, idempotencyKey } = options;
|
|
462
549
|
if (!this.isAtHome()) {
|
|
463
550
|
log.warning('Actor.addWebhook() is only supported when running on the Apify platform. The webhook will not be invoked.');
|
|
464
551
|
return undefined;
|
|
465
552
|
}
|
|
466
|
-
const runId = this.config.
|
|
553
|
+
const runId = this.config.actorRunId;
|
|
467
554
|
if (!runId) {
|
|
468
555
|
throw new Error(`Environment variable ${ACTOR_ENV_VARS.RUN_ID} is not set!`);
|
|
469
556
|
}
|
|
470
557
|
return this.apifyClient.webhooks().create({
|
|
558
|
+
...options,
|
|
471
559
|
isAdHoc: true,
|
|
472
|
-
eventTypes,
|
|
473
560
|
condition: {
|
|
474
561
|
actorRunId: runId,
|
|
475
562
|
},
|
|
476
|
-
requestUrl,
|
|
477
|
-
payloadTemplate,
|
|
478
|
-
idempotencyKey,
|
|
479
563
|
});
|
|
480
564
|
}
|
|
481
565
|
/**
|
|
@@ -489,6 +573,7 @@ export class Actor {
|
|
|
489
573
|
const { isStatusMessageTerminal, level } = options || {};
|
|
490
574
|
ow(statusMessage, ow.string);
|
|
491
575
|
ow(isStatusMessageTerminal, ow.optional.boolean);
|
|
576
|
+
this._ensureActorInit('setStatusMessage');
|
|
492
577
|
const loggedStatusMessage = `[Status message]: ${statusMessage}`;
|
|
493
578
|
switch (level) {
|
|
494
579
|
case 'DEBUG':
|
|
@@ -504,13 +589,13 @@ export class Actor {
|
|
|
504
589
|
log.info(loggedStatusMessage);
|
|
505
590
|
break;
|
|
506
591
|
}
|
|
507
|
-
const client =
|
|
592
|
+
const client = serviceLocator.getStorageClient();
|
|
508
593
|
// just to be sure, this should be fast
|
|
509
594
|
await addTimeoutToPromise(async () => client.setStatusMessage(statusMessage, {
|
|
510
595
|
isStatusMessageTerminal,
|
|
511
596
|
level,
|
|
512
597
|
}), 1000, 'Setting status message timed out after 1s').catch((e) => log.warning(e.message));
|
|
513
|
-
const runId = this.config.
|
|
598
|
+
const runId = this.config.actorRunId;
|
|
514
599
|
if (runId) {
|
|
515
600
|
// just to be sure, this should be fast
|
|
516
601
|
const run = await addTimeoutToPromise(async () => this.apifyClient.run(runId).get(), 1000, 'Getting the current run timed out after 1s').catch((e) => log.warning(e.message));
|
|
@@ -545,28 +630,22 @@ export class Actor {
|
|
|
545
630
|
* @param eventName If provided, the method will attempt to charge for the event for each pushed item.
|
|
546
631
|
* @ignore
|
|
547
632
|
*/
|
|
548
|
-
// eslint-disable-next-line consistent-return -- The `return` is inconsistent by design here (`ChargeResult` with `eventName` parameter)
|
|
549
633
|
async pushData(item, eventName) {
|
|
550
634
|
this._ensureActorInit('pushData');
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
? this.chargingManager.calculateMaxEventChargeCountWithinLimit(eventName)
|
|
554
|
-
: Infinity;
|
|
555
|
-
const toCharge = Array.isArray(item) ? item.length : 1;
|
|
556
|
-
if (toCharge > maxChargedCount) {
|
|
557
|
-
// Push as many items as we can charge for
|
|
558
|
-
const items = Array.isArray(item) ? item : [item];
|
|
559
|
-
await dataset.pushData(items.slice(0, maxChargedCount));
|
|
560
|
-
}
|
|
561
|
-
else {
|
|
562
|
-
await dataset.pushData(item);
|
|
635
|
+
if (eventName?.startsWith('apify-')) {
|
|
636
|
+
throw new Error(`Cannot charge for synthetic event '${eventName}' manually`);
|
|
563
637
|
}
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
638
|
+
const dataset = await this.openDataset();
|
|
639
|
+
// Two code paths for charging:
|
|
640
|
+
// 1. Intercepted client: PatchedDatasetClient intercepts pushItems() calls, handling charging
|
|
641
|
+
// internally. This is needed because Crawlee's Dataset may call pushItems() directly,
|
|
642
|
+
// bypassing Actor.pushData(). We propagate eventName via AsyncLocalStorage context.
|
|
643
|
+
// 2. Direct charging: When using a non-patched client (e.g., forceCloud option or custom client),
|
|
644
|
+
// we handle charging here before delegating to the dataset.
|
|
645
|
+
if (this.usesPushDataInterception(dataset)) {
|
|
646
|
+
return await this.pushDataViaInterceptedClient(dataset, item, eventName);
|
|
569
647
|
}
|
|
648
|
+
return await this.pushDataWithExplicitCharging(dataset, item, eventName);
|
|
570
649
|
}
|
|
571
650
|
/**
|
|
572
651
|
* Opens a dataset and returns a promise resolving to an instance of the {@link Dataset} class.
|
|
@@ -578,13 +657,14 @@ export class Actor {
|
|
|
578
657
|
* For more details and code examples, see the {@link Dataset} class.
|
|
579
658
|
*
|
|
580
659
|
* @param [datasetIdOrName]
|
|
581
|
-
* ID or
|
|
660
|
+
* ID, name, or alias of the dataset to be opened. If `null` or `undefined`,
|
|
582
661
|
* the function returns the default dataset associated with the Actor run.
|
|
662
|
+
* You can also pass `{ alias: 'name' }` to open a dataset defined in the Actor's schema storages,
|
|
663
|
+
* `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
|
|
583
664
|
* @param [options]
|
|
584
665
|
* @ignore
|
|
585
666
|
*/
|
|
586
667
|
async openDataset(datasetIdOrName, options = {}) {
|
|
587
|
-
ow(datasetIdOrName, ow.optional.string);
|
|
588
668
|
ow(options, ow.object.exactShape({
|
|
589
669
|
forceCloud: ow.optional.boolean,
|
|
590
670
|
}));
|
|
@@ -691,17 +771,20 @@ export class Actor {
|
|
|
691
771
|
*/
|
|
692
772
|
async getInput() {
|
|
693
773
|
this._ensureActorInit('getInput');
|
|
694
|
-
const inputSecretsPrivateKeyFile = this.config
|
|
695
|
-
const
|
|
696
|
-
|
|
697
|
-
if (ow.isValid(
|
|
774
|
+
const { inputSecretsPrivateKeyFile, inputSecretsPrivateKeyPassphrase } = this.config;
|
|
775
|
+
const rawInput = await this.getValue(this.config.inputKey);
|
|
776
|
+
let input = rawInput;
|
|
777
|
+
if (ow.isValid(rawInput, ow.object.nonEmpty) &&
|
|
698
778
|
inputSecretsPrivateKeyFile &&
|
|
699
779
|
inputSecretsPrivateKeyPassphrase) {
|
|
700
780
|
const privateKey = createPrivateKey({
|
|
701
781
|
key: Buffer.from(inputSecretsPrivateKeyFile, 'base64'),
|
|
702
782
|
passphrase: inputSecretsPrivateKeyPassphrase,
|
|
703
783
|
});
|
|
704
|
-
|
|
784
|
+
input = decryptInputSecrets({ input: rawInput, privateKey });
|
|
785
|
+
}
|
|
786
|
+
if (ow.isValid(input, ow.object.nonEmpty) && !Buffer.isBuffer(input)) {
|
|
787
|
+
input = await this.inferDefaultsFromInputSchema(input);
|
|
705
788
|
}
|
|
706
789
|
return input;
|
|
707
790
|
}
|
|
@@ -728,11 +811,11 @@ export class Actor {
|
|
|
728
811
|
* @param [storeIdOrName]
|
|
729
812
|
* ID or name of the key-value store to be opened. If `null` or `undefined`,
|
|
730
813
|
* the function returns the default key-value store associated with the Actor run.
|
|
814
|
+
* You can also pass `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
|
|
731
815
|
* @param [options]
|
|
732
816
|
* @ignore
|
|
733
817
|
*/
|
|
734
818
|
async openKeyValueStore(storeIdOrName, options = {}) {
|
|
735
|
-
ow(storeIdOrName, ow.optional.string);
|
|
736
819
|
ow(options, ow.object.exactShape({
|
|
737
820
|
forceCloud: ow.optional.boolean,
|
|
738
821
|
}));
|
|
@@ -753,19 +836,18 @@ export class Actor {
|
|
|
753
836
|
* @param [queueIdOrName]
|
|
754
837
|
* ID or name of the request queue to be opened. If `null` or `undefined`,
|
|
755
838
|
* the function returns the default request queue associated with the Actor run.
|
|
839
|
+
* You can also pass `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
|
|
756
840
|
* @param [options]
|
|
757
841
|
* @ignore
|
|
758
842
|
*/
|
|
759
843
|
async openRequestQueue(queueIdOrName, options = {}) {
|
|
760
|
-
ow(queueIdOrName, ow.optional.string);
|
|
761
844
|
ow(options, ow.object.exactShape({
|
|
762
845
|
forceCloud: ow.optional.boolean,
|
|
763
846
|
}));
|
|
764
847
|
this._ensureActorInit('openRequestQueue');
|
|
765
848
|
const queue = await this._openStorage(RequestQueue, queueIdOrName, options);
|
|
766
849
|
// eslint-disable-next-line dot-notation
|
|
767
|
-
queue['initialCount'] =
|
|
768
|
-
(await queue.client.get())?.totalRequestCount ?? 0;
|
|
850
|
+
queue['initialCount'] = (await queue.client.getMetadata())?.totalRequestCount ?? 0;
|
|
769
851
|
return queue;
|
|
770
852
|
}
|
|
771
853
|
/**
|
|
@@ -802,19 +884,23 @@ export class Actor {
|
|
|
802
884
|
* ```
|
|
803
885
|
* { useApifyProxy: false }
|
|
804
886
|
* ```
|
|
887
|
+
*
|
|
888
|
+
* As part of the init process, we verify the configuration by checking the proxy status endpoint.
|
|
889
|
+
* This can make the init slower, to opt-out of this, use `checkAccess: false` (defaults to `true`).
|
|
890
|
+
*
|
|
805
891
|
* @ignore
|
|
806
892
|
*/
|
|
807
893
|
async createProxyConfiguration(proxyConfigurationOptions = {}) {
|
|
808
894
|
// Compatibility fix for Input UI where proxy: None returns { useApifyProxy: false }
|
|
809
895
|
// Without this, it would cause proxy to use the zero config / auto mode.
|
|
810
|
-
const { useApifyProxy, ...options } = proxyConfigurationOptions;
|
|
896
|
+
const { useApifyProxy, checkAccess, ...options } = proxyConfigurationOptions;
|
|
811
897
|
const dontUseApifyProxy = useApifyProxy === false;
|
|
812
898
|
const dontUseCustomProxies = !proxyConfigurationOptions.proxyUrls;
|
|
813
899
|
if (dontUseApifyProxy && dontUseCustomProxies) {
|
|
814
900
|
return undefined;
|
|
815
901
|
}
|
|
816
902
|
const proxyConfiguration = new ProxyConfiguration(options, this.config);
|
|
817
|
-
if (await proxyConfiguration.initialize()) {
|
|
903
|
+
if (await proxyConfiguration.initialize({ checkAccess })) {
|
|
818
904
|
return proxyConfiguration;
|
|
819
905
|
}
|
|
820
906
|
return undefined;
|
|
@@ -825,6 +911,14 @@ export class Actor {
|
|
|
825
911
|
* This method attempts to charge for the specified number of events, but may charge fewer
|
|
826
912
|
* if doing so would exceed the total budget limit (`maxTotalChargeUsd`).
|
|
827
913
|
*
|
|
914
|
+
* **Important:** When using the `count` parameter to charge for multiple events at once,
|
|
915
|
+
* be aware that the charge may be partially fulfilled — i.e. `chargedCount` can be less
|
|
916
|
+
* than the requested `count`. Always check the returned `chargedCount` to know how many
|
|
917
|
+
* events were actually charged, and only perform that much work. If your work is
|
|
918
|
+
* meaningfully divisible into individual units, prefer calling `charge()` once per unit
|
|
919
|
+
* rather than batching via `count` — this gives finer control over budget consumption
|
|
920
|
+
* and avoids situations where more work is requested than the budget allows.
|
|
921
|
+
*
|
|
828
922
|
* @param options The name of the event to charge for and the number of events to be charged.
|
|
829
923
|
* @ignore
|
|
830
924
|
*/
|
|
@@ -860,7 +954,7 @@ export class Actor {
|
|
|
860
954
|
* Returns a new {@link ApifyEnv} object which contains information parsed from all the Apify environment variables.
|
|
861
955
|
*
|
|
862
956
|
* For the list of the Apify environment variables, see
|
|
863
|
-
* [Actor documentation](https://docs.apify.com/
|
|
957
|
+
* [Actor documentation](https://docs.apify.com/platform/actors/development/programming-interface/environment-variables).
|
|
864
958
|
* If some variables are not defined or are invalid, the corresponding value in the resulting object will be null.
|
|
865
959
|
* @ignore
|
|
866
960
|
*/
|
|
@@ -895,15 +989,13 @@ export class Actor {
|
|
|
895
989
|
* @ignore
|
|
896
990
|
*/
|
|
897
991
|
newClient(options = {}) {
|
|
898
|
-
const { storageDir, ...storageClientOptions } = this.config.
|
|
992
|
+
const { storageDir, ...storageClientOptions } = (this.config.storageClientOptions ?? {});
|
|
899
993
|
const { apifyVersion, crawleeVersion } = getSystemInfo();
|
|
900
994
|
return new ApifyClient({
|
|
901
|
-
baseUrl: this.config.
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
`Crawlee/${crawleeVersion}`,
|
|
906
|
-
],
|
|
995
|
+
baseUrl: this.config.apiBaseUrl,
|
|
996
|
+
publicBaseUrl: this.config.apiPublicBaseUrl,
|
|
997
|
+
token: this.config.token,
|
|
998
|
+
userAgentSuffix: [`SDK/${apifyVersion}`, `Crawlee/${crawleeVersion}`],
|
|
907
999
|
...storageClientOptions,
|
|
908
1000
|
...options, // allow overriding the instance configuration
|
|
909
1001
|
});
|
|
@@ -925,6 +1017,7 @@ export class Actor {
|
|
|
925
1017
|
* @param options An optional object parameter where a custom `keyValueStoreName` and `config` can be passed in.
|
|
926
1018
|
*/
|
|
927
1019
|
async useState(name, defaultValue = {}, options) {
|
|
1020
|
+
this._ensureActorInit('useState');
|
|
928
1021
|
const kvStore = await KeyValueStore.open(options?.keyValueStoreName, {
|
|
929
1022
|
config: options?.config || Configuration.getGlobalConfig(),
|
|
930
1023
|
});
|
|
@@ -1016,6 +1109,11 @@ export class Actor {
|
|
|
1016
1109
|
* Calling `Actor.exit()` is required if you use the `Actor.init()` method, since it opens websocket connection
|
|
1017
1110
|
* (see {@link Actor.events} for details), which needs to be terminated for the code to finish.
|
|
1018
1111
|
*
|
|
1112
|
+
* **Graceful shutdown:** When running on the Apify platform, the Actor may receive `aborting` or `migrating`
|
|
1113
|
+
* events. By default, the SDK will automatically call `Actor.exit()` on `aborting` events and `Actor.reboot()`
|
|
1114
|
+
* on `migrating` events (to speed up the migration and continue the run on a new worker). You can disable this
|
|
1115
|
+
* behavior by setting `options.gracefulShutdown` to `false`.
|
|
1116
|
+
*
|
|
1019
1117
|
* ```js
|
|
1020
1118
|
* import { gotScraping } from 'got-scraping';
|
|
1021
1119
|
*
|
|
@@ -1242,8 +1340,10 @@ export class Actor {
|
|
|
1242
1340
|
* For more details and code examples, see the {@link Dataset} class.
|
|
1243
1341
|
*
|
|
1244
1342
|
* @param [datasetIdOrName]
|
|
1245
|
-
* ID or
|
|
1343
|
+
* ID, name, or alias of the dataset to be opened. If `null` or `undefined`,
|
|
1246
1344
|
* the function returns the default dataset associated with the Actor run.
|
|
1345
|
+
* You can also pass `{ alias: 'name' }` to open a dataset defined in the Actor's schema storages,
|
|
1346
|
+
* `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
|
|
1247
1347
|
* @param [options]
|
|
1248
1348
|
*/
|
|
1249
1349
|
static async openDataset(datasetIdOrName, options = {}) {
|
|
@@ -1361,6 +1461,7 @@ export class Actor {
|
|
|
1361
1461
|
* @param [storeIdOrName]
|
|
1362
1462
|
* ID or name of the key-value store to be opened. If `null` or `undefined`,
|
|
1363
1463
|
* the function returns the default key-value store associated with the Actor run.
|
|
1464
|
+
* You can also pass `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
|
|
1364
1465
|
* @param [options]
|
|
1365
1466
|
*/
|
|
1366
1467
|
static async openKeyValueStore(storeIdOrName, options = {}) {
|
|
@@ -1380,6 +1481,7 @@ export class Actor {
|
|
|
1380
1481
|
* @param [queueIdOrName]
|
|
1381
1482
|
* ID or name of the request queue to be opened. If `null` or `undefined`,
|
|
1382
1483
|
* the function returns the default request queue associated with the Actor run.
|
|
1484
|
+
* You can also pass `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
|
|
1383
1485
|
* @param [options]
|
|
1384
1486
|
*/
|
|
1385
1487
|
static async openRequestQueue(queueIdOrName, options = {}) {
|
|
@@ -1419,6 +1521,9 @@ export class Actor {
|
|
|
1419
1521
|
* ```
|
|
1420
1522
|
* { useApifyProxy: false }
|
|
1421
1523
|
* ```
|
|
1524
|
+
*
|
|
1525
|
+
* As part of the init process, we verify the configuration by checking the proxy status endpoint.
|
|
1526
|
+
* This can make the init slower, to opt-out of this, use `checkAccess: false` (defaults to `true`).
|
|
1422
1527
|
*/
|
|
1423
1528
|
static async createProxyConfiguration(proxyConfigurationOptions = {}) {
|
|
1424
1529
|
return Actor.getDefaultInstance().createProxyConfiguration(proxyConfigurationOptions);
|
|
@@ -1429,6 +1534,14 @@ export class Actor {
|
|
|
1429
1534
|
* This method attempts to charge for the specified number of events, but may charge fewer
|
|
1430
1535
|
* if doing so would exceed the total budget limit (`maxTotalChargeUsd`).
|
|
1431
1536
|
*
|
|
1537
|
+
* **Important:** When using the `count` parameter to charge for multiple events at once,
|
|
1538
|
+
* be aware that the charge may be partially fulfilled — i.e. `chargedCount` can be less
|
|
1539
|
+
* than the requested `count`. Always check the returned `chargedCount` to know how many
|
|
1540
|
+
* events were actually charged, and only perform that much work. If your work is
|
|
1541
|
+
* meaningfully divisible into individual units, prefer calling `charge()` once per unit
|
|
1542
|
+
* rather than batching via `count` — this gives finer control over budget consumption
|
|
1543
|
+
* and avoids situations where more work is requested than the budget allows.
|
|
1544
|
+
*
|
|
1432
1545
|
* @param options The name of the event to charge for and the number of events to be charged.
|
|
1433
1546
|
*/
|
|
1434
1547
|
static async charge(options) {
|
|
@@ -1444,7 +1557,7 @@ export class Actor {
|
|
|
1444
1557
|
* Returns a new {@link ApifyEnv} object which contains information parsed from all the Apify environment variables.
|
|
1445
1558
|
*
|
|
1446
1559
|
* For the list of the Apify environment variables, see
|
|
1447
|
-
* [Actor documentation](https://docs.apify.com/
|
|
1560
|
+
* [Actor documentation](https://docs.apify.com/platform/actors/development/programming-interface/environment-variables).
|
|
1448
1561
|
* If some of the variables are not defined or are invalid, the corresponding value in the resulting object will be null.
|
|
1449
1562
|
*/
|
|
1450
1563
|
static getEnv() {
|
|
@@ -1479,9 +1592,52 @@ export class Actor {
|
|
|
1479
1592
|
this._instance ??= new Actor();
|
|
1480
1593
|
return this._instance;
|
|
1481
1594
|
}
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1595
|
+
usesPushDataInterception(dataset) {
|
|
1596
|
+
return Boolean(dataset.client[USES_PUSH_DATA_INTERCEPTION]);
|
|
1597
|
+
}
|
|
1598
|
+
async pushDataViaInterceptedClient(dataset, item, eventName) {
|
|
1599
|
+
// PatchedDatasetClient will handle charging and item limiting.
|
|
1600
|
+
// We only need to propagate `eventName` and (optionally) return aggregated charge info.
|
|
1601
|
+
const context = {
|
|
1602
|
+
eventName,
|
|
1603
|
+
};
|
|
1604
|
+
await pushDataChargingContext.run(context, async () => {
|
|
1605
|
+
await dataset.pushData(item);
|
|
1606
|
+
});
|
|
1607
|
+
return (context.chargeResult ?? {
|
|
1608
|
+
eventChargeLimitReached: false,
|
|
1609
|
+
chargedCount: 0,
|
|
1610
|
+
chargeableWithinLimit: {},
|
|
1611
|
+
});
|
|
1612
|
+
}
|
|
1613
|
+
async pushDataWithExplicitCharging(dataset, items, explicitEventName) {
|
|
1614
|
+
// `Actor.pushData()` historically worked even without calling `Actor.init()`.
|
|
1615
|
+
// In that case, charging isn't configured, so just push the data through.
|
|
1616
|
+
if (!this.initialized && explicitEventName === undefined) {
|
|
1617
|
+
await dataset.pushData(items);
|
|
1618
|
+
return {
|
|
1619
|
+
eventChargeLimitReached: false,
|
|
1620
|
+
chargedCount: 0,
|
|
1621
|
+
chargeableWithinLimit: {},
|
|
1622
|
+
};
|
|
1623
|
+
}
|
|
1624
|
+
const isDefaultDataset = dataset.id === this.config.defaultDatasetId;
|
|
1625
|
+
return pushDataAndCharge({
|
|
1626
|
+
chargingManager: this.chargingManager,
|
|
1627
|
+
items,
|
|
1628
|
+
eventName: explicitEventName,
|
|
1629
|
+
isDefaultDataset,
|
|
1630
|
+
pushFn: async (limitedItems) => dataset.pushData(limitedItems),
|
|
1631
|
+
});
|
|
1632
|
+
}
|
|
1633
|
+
async _openStorage(storageClass, identifier, options = {}) {
|
|
1634
|
+
return openStorage(storageClass, identifier, {
|
|
1635
|
+
config: this.config,
|
|
1636
|
+
client: options.forceCloud
|
|
1637
|
+
? new ApifyStorageClient(this.apifyClient, this.config, () => this.chargingManager)
|
|
1638
|
+
: undefined,
|
|
1639
|
+
purgedStorageAliases: this.purgedStorageAliases,
|
|
1640
|
+
});
|
|
1485
1641
|
}
|
|
1486
1642
|
_ensureActorInit(methodCalled) {
|
|
1487
1643
|
// If we already warned the user once, don't do it again to prevent spam
|
|
@@ -1497,5 +1653,36 @@ export class Actor {
|
|
|
1497
1653
|
'Did you forget to call Actor.init()?',
|
|
1498
1654
|
].join('\n'));
|
|
1499
1655
|
}
|
|
1656
|
+
/**
|
|
1657
|
+
* Get time remaining from the Actor run timeout. Returns `undefined` if not on an Apify platform or the current
|
|
1658
|
+
* run was started without a timeout.
|
|
1659
|
+
*/
|
|
1660
|
+
getRemainingTime() {
|
|
1661
|
+
const env = this.getEnv();
|
|
1662
|
+
if (this.isAtHome() && env.timeoutAt !== null) {
|
|
1663
|
+
return env.timeoutAt.getTime() - Date.now();
|
|
1664
|
+
}
|
|
1665
|
+
log.warning('Using `inherit` argument is only possible when the Actor is running on the Apify platform and when the ' +
|
|
1666
|
+
'timeout for the Actor run is set.');
|
|
1667
|
+
return undefined;
|
|
1668
|
+
}
|
|
1669
|
+
async inferDefaultsFromInputSchema(input) {
|
|
1670
|
+
// TODO: https://github.com/apify/apify-shared-js/issues/547
|
|
1671
|
+
// On platform, this is already handled
|
|
1672
|
+
if (this.isAtHome()) {
|
|
1673
|
+
return input;
|
|
1674
|
+
}
|
|
1675
|
+
// On local, we can get the input schema from the local config
|
|
1676
|
+
const inputSchema = readInputSchema();
|
|
1677
|
+
// Don't emit warning if there is no input schema defined
|
|
1678
|
+
if (inputSchema === noActorInputSchemaDefinedMarker) {
|
|
1679
|
+
return input;
|
|
1680
|
+
}
|
|
1681
|
+
if (!inputSchema) {
|
|
1682
|
+
log.warning('Failed to find the input schema for the local run of this Actor. Your input will be missing fields that have default values set if they are missing from the input you are using.');
|
|
1683
|
+
return input;
|
|
1684
|
+
}
|
|
1685
|
+
const defaults = getDefaultsFromInputSchema(inputSchema);
|
|
1686
|
+
return { ...defaults, ...input };
|
|
1687
|
+
}
|
|
1500
1688
|
}
|
|
1501
|
-
//# sourceMappingURL=actor.js.map
|