apify 4.0.0-beta.12 → 4.0.0-beta.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +12 -48
  2. package/dist/actor.d.ts +157 -61
  3. package/dist/actor.js +278 -91
  4. package/dist/apify_storage_client.d.ts +54 -0
  5. package/dist/apify_storage_client.js +152 -0
  6. package/dist/charging.d.ts +43 -2
  7. package/dist/charging.js +196 -54
  8. package/dist/configuration.d.ts +79 -132
  9. package/dist/configuration.js +114 -141
  10. package/dist/index.d.ts +2 -2
  11. package/dist/index.js +1 -2
  12. package/dist/input-schemas.d.ts +7 -0
  13. package/dist/input-schemas.js +58 -0
  14. package/dist/key_value_store.d.ts +8 -4
  15. package/dist/key_value_store.js +19 -11
  16. package/dist/platform_event_manager.d.ts +0 -1
  17. package/dist/platform_event_manager.js +5 -5
  18. package/dist/proxy_configuration.d.ts +41 -44
  19. package/dist/proxy_configuration.js +65 -103
  20. package/dist/storage.d.ts +58 -0
  21. package/dist/storage.js +79 -0
  22. package/dist/utils.d.ts +0 -1
  23. package/dist/utils.js +2 -4
  24. package/package.json +123 -73
  25. package/.turbo/turbo-build.log +0 -26
  26. package/.turbo/turbo-copy.log +0 -4
  27. package/dist/LICENSE.md +0 -201
  28. package/dist/README.md +0 -98
  29. package/dist/actor.d.ts.map +0 -1
  30. package/dist/actor.js.map +0 -1
  31. package/dist/charging.d.ts.map +0 -1
  32. package/dist/charging.js.map +0 -1
  33. package/dist/configuration.d.ts.map +0 -1
  34. package/dist/configuration.js.map +0 -1
  35. package/dist/index.d.ts.map +0 -1
  36. package/dist/index.js.map +0 -1
  37. package/dist/key_value_store.d.ts.map +0 -1
  38. package/dist/key_value_store.js.map +0 -1
  39. package/dist/package.json +0 -75
  40. package/dist/platform_event_manager.d.ts.map +0 -1
  41. package/dist/platform_event_manager.js.map +0 -1
  42. package/dist/proxy_configuration.d.ts.map +0 -1
  43. package/dist/proxy_configuration.js.map +0 -1
  44. package/dist/utils.d.ts.map +0 -1
  45. package/dist/utils.js.map +0 -1
package/dist/actor.js CHANGED
@@ -1,18 +1,21 @@
1
1
  import { createPrivateKey } from 'node:crypto';
2
- import { Configuration as CoreConfiguration, Dataset, purgeDefaultStorages, RequestQueue, StorageManager, } from '@crawlee/core';
2
+ import { Dataset, purgeDefaultStorages, RequestQueue, serviceLocator } from '@crawlee/core';
3
3
  import { sleep, snakeCaseToCamelCase } from '@crawlee/utils';
4
4
  import { ApifyClient } from 'apify-client';
5
5
  import ow from 'ow';
6
- import { ACTOR_ENV_VARS, APIFY_ENV_VARS, INTEGER_ENV_VARS, } from '@apify/consts';
6
+ import { ACTOR_ENV_VARS, ACTOR_EVENT_NAMES, APIFY_ENV_VARS, INTEGER_ENV_VARS, } from '@apify/consts';
7
7
  import { decryptInputSecrets } from '@apify/input_secrets';
8
8
  import log from '@apify/log';
9
9
  import { addTimeoutToPromise } from '@apify/timeout';
10
- import { ChargingManager } from './charging.js';
10
+ import { ApifyStorageClient, pushDataChargingContext, USES_PUSH_DATA_INTERCEPTION, } from './apify_storage_client.js';
11
+ import { ChargingManager, pushDataAndCharge } from './charging.js';
11
12
  import { Configuration } from './configuration.js';
13
+ import { getDefaultsFromInputSchema, noActorInputSchemaDefinedMarker, readInputSchema } from './input-schemas.js';
12
14
  import { KeyValueStore } from './key_value_store.js';
13
15
  import { PlatformEventManager } from './platform_event_manager.js';
14
16
  import { ProxyConfiguration } from './proxy_configuration.js';
15
- import { checkCrawleeVersion, getSystemInfo, printOutdatedSdkWarning, } from './utils.js';
17
+ import { openStorage } from './storage.js';
18
+ import { checkCrawleeVersion, getSystemInfo, printOutdatedSdkWarning } from './utils.js';
16
19
  /**
17
20
  * Exit codes for the Actor process.
18
21
  * The error codes must be in the range 1-128, to avoid collision with signal exits
@@ -31,7 +34,6 @@ export const EXIT_CODES = {
31
34
  */
32
35
  export class Actor {
33
36
  /** @internal */
34
- // eslint-disable-next-line no-use-before-define -- self-reference
35
37
  static _instance;
36
38
  /**
37
39
  * Configuration of this SDK instance (provided to its constructor). See {@link Configuration} for details.
@@ -61,13 +63,43 @@ export class Actor {
61
63
  * Set if the Actor is currently rebooting.
62
64
  */
63
65
  isRebooting = false;
66
+ /**
67
+ * Set if the Actor is currently exiting. Prevents double-exit from graceful shutdown handlers.
68
+ */
69
+ isExiting = false;
70
+ /**
71
+ * References to graceful shutdown handlers so they can be removed during cleanup.
72
+ */
73
+ gracefulShutdownHandlers = {};
64
74
  chargingManager;
75
+ /**
76
+ * Tracks which aliased storages have been purged during this session,
77
+ * so we only purge them once (on first open) when running locally.
78
+ * @internal
79
+ */
80
+ purgedStorageAliases = new Set();
65
81
  constructor(options = {}) {
66
- // use default configuration object if nothing overridden (it fallbacks to env vars)
67
- this.config =
68
- Object.keys(options).length === 0
69
- ? Configuration.getGlobalConfig()
70
- : new Configuration(options);
82
+ const { configuration, ...configOptions } = options;
83
+ if (configuration) {
84
+ // BYO Configuration takes precedence; field-level overrides are
85
+ // ignored to keep the contract unambiguous. It must be the SDK's
86
+ // Configuration subclass, not a bare crawlee one — env-var
87
+ // resolution is driven by the subclass's `static fields`
88
+ // (`apifyConfigFields`) at construction, so a crawlee instance
89
+ // would silently expose none of the `APIFY_*`/`ACTOR_*` values.
90
+ if (!(configuration instanceof Configuration)) {
91
+ throw new Error('Actor `configuration` must be an Apify SDK Configuration (imported from `apify`), ' +
92
+ 'not a crawlee Configuration, otherwise APIFY_*/ACTOR_* environment variables are not resolved.');
93
+ }
94
+ this.config = configuration;
95
+ }
96
+ else if (Object.keys(configOptions).length === 0) {
97
+ // use default configuration object if nothing overridden (it fallbacks to env vars)
98
+ this.config = Configuration.getGlobalConfig();
99
+ }
100
+ else {
101
+ this.config = new Configuration(configOptions);
102
+ }
71
103
  this.apifyClient = this.newClient();
72
104
  this.eventManager = new PlatformEventManager(this.config);
73
105
  this.chargingManager = new ChargingManager(this.config, this.apifyClient);
@@ -173,26 +205,48 @@ export class Actor {
173
205
  checkCrawleeVersion();
174
206
  log.info('System info', getSystemInfo());
175
207
  printOutdatedSdkWarning();
176
- // reset global config instance to respect APIFY_ prefixed env vars
177
- CoreConfiguration.globalConfig = Configuration.getGlobalConfig();
208
+ // Register this Actor's config as the global one so crawlee storages and
209
+ // the event manager resolve the same instance (`availableMemoryRatio` /
210
+ // `disableBrowserSandbox` at-home defaults now live in `Configuration`).
211
+ serviceLocator.setConfiguration(this.config);
178
212
  if (this.isAtHome()) {
179
- this.config.set('availableMemoryRatio', 1);
180
- this.config.set('disableBrowserSandbox', true); // for browser launcher, adds `--no-sandbox` to args
181
- this.config.useStorageClient(this.apifyClient);
182
- this.config.useEventManager(this.eventManager);
213
+ serviceLocator.setStorageClient(new ApifyStorageClient(this.apifyClient, this.config, () => this.chargingManager));
214
+ serviceLocator.setEventManager(this.eventManager);
183
215
  }
184
216
  else if (options.storage) {
185
- this.config.useStorageClient(options.storage);
217
+ serviceLocator.setStorageClient(options.storage);
186
218
  }
187
219
  // Init the event manager the config uses
188
- await this.config.getEventManager().init();
220
+ await serviceLocator.getEventManager().init();
189
221
  log.debug(`Events initialized`);
222
+ // Register handlers for aborting and migrating events for automatic graceful shutdown.
223
+ // - aborting: calls Actor.exit() to terminate the run gracefully
224
+ // - migrating: calls Actor.reboot() to speed up migration (the run continues on a new worker)
225
+ // Using setTimeout to avoid deadlock with waitForAllListenersToComplete() in exit()/reboot()
226
+ if (options.gracefulShutdown !== false) {
227
+ const delay = options.gracefulShutdownDelayMillis ?? 0;
228
+ this.gracefulShutdownHandlers.aborting = () => {
229
+ setTimeout(() => {
230
+ this.exit().catch((err) => {
231
+ log.exception(err, 'Failed to exit gracefully');
232
+ });
233
+ }, delay);
234
+ };
235
+ this.on(ACTOR_EVENT_NAMES.ABORTING, this.gracefulShutdownHandlers.aborting);
236
+ this.gracefulShutdownHandlers.migrating = () => {
237
+ setTimeout(() => {
238
+ this.reboot().catch((err) => {
239
+ log.exception(err, 'Failed to reboot on migration');
240
+ });
241
+ }, delay);
242
+ };
243
+ this.on(ACTOR_EVENT_NAMES.MIGRATING, this.gracefulShutdownHandlers.migrating);
244
+ }
190
245
  await purgeDefaultStorages({
191
246
  config: this.config,
192
247
  onlyPurgeOnce: true,
193
248
  });
194
249
  log.debug(`Default storages purged`);
195
- Configuration.storage.enterWith(this.config);
196
250
  await this.chargingManager.init();
197
251
  log.debug(`ChargingManager initialized`, this.chargingManager.getPricingInfo());
198
252
  }
@@ -200,6 +254,12 @@ export class Actor {
200
254
  * @ignore
201
255
  */
202
256
  async exit(messageOrOptions, options = {}) {
257
+ // Prevent double-exit from graceful shutdown handlers
258
+ if (this.isExiting) {
259
+ log.debug('Actor.exit() called while already exiting, skipping');
260
+ return;
261
+ }
262
+ this.isExiting = true;
203
263
  options =
204
264
  typeof messageOrOptions === 'string'
205
265
  ? { ...options, statusMessage: messageOrOptions }
@@ -207,8 +267,16 @@ export class Actor {
207
267
  options.exit ??= true;
208
268
  options.exitCode ??= EXIT_CODES.SUCCESS;
209
269
  options.timeoutSecs ??= 30;
210
- const client = this.config.getStorageClient();
211
- const events = this.config.getEventManager();
270
+ this._ensureActorInit('exit');
271
+ const client = serviceLocator.getStorageClient();
272
+ const events = serviceLocator.getEventManager();
273
+ // Remove graceful shutdown handlers to prevent them from interfering with exit
274
+ if (this.gracefulShutdownHandlers.aborting) {
275
+ this.off(ACTOR_EVENT_NAMES.ABORTING, this.gracefulShutdownHandlers.aborting);
276
+ }
277
+ if (this.gracefulShutdownHandlers.migrating) {
278
+ this.off(ACTOR_EVENT_NAMES.MIGRATING, this.gracefulShutdownHandlers.migrating);
279
+ }
212
280
  // Close the event manager and emit the final PERSIST_STATE event
213
281
  await events.close();
214
282
  log.debug(`Events closed`);
@@ -216,6 +284,13 @@ export class Actor {
216
284
  events.emit("exit" /* EventType.EXIT */, options);
217
285
  // Wait for all event listeners to be processed
218
286
  log.debug(`Waiting for all event listeners to complete their execution (with ${options.timeoutSecs} seconds timeout)`);
287
+ if (options.exit) {
288
+ // `addTimeoutToPromise` is a cooperative timeout. This ensures that the process exits
289
+ // after the timeout, even if the event listeners don't trigger the timeout.
290
+ setTimeout(() => {
291
+ process.exit(options.exitCode);
292
+ }, options.timeoutSecs * 1000);
293
+ }
219
294
  await addTimeoutToPromise(async () => {
220
295
  await events.waitForAllListenersToComplete();
221
296
  if (client.teardown) {
@@ -229,16 +304,21 @@ export class Actor {
229
304
  finished = true;
230
305
  }
231
306
  if (options.statusMessage != null) {
232
- await this.setStatusMessage(options.statusMessage, {
307
+ const statusMessagePromise = this.setStatusMessage(options.statusMessage, {
233
308
  isStatusMessageTerminal: true,
234
309
  level: options.exitCode > 0 ? 'ERROR' : 'INFO',
235
310
  });
311
+ // Waiting 1ms is enough for the network request to be sent. We don't need to wait for the response.
312
+ await Promise.race([statusMessagePromise, sleep(1)]);
236
313
  }
237
314
  }, options.timeoutSecs * 1000, `Waiting for all event listeners to complete their execution timed out after ${options.timeoutSecs} seconds`).catch(() => {
238
315
  if (options.exit) {
239
316
  process.exit(options.exitCode);
240
317
  }
241
318
  });
319
+ // Reset the flag so the instance can be reused (e.g., in tests or when exit is false).
320
+ // When process.exit() actually terminates the process, this line is never reached - which is fine.
321
+ this.isExiting = false;
242
322
  if (!options.exit) {
243
323
  return;
244
324
  }
@@ -254,13 +334,13 @@ export class Actor {
254
334
  * @ignore
255
335
  */
256
336
  on(event, listener) {
257
- this.config.getEventManager().on(event, listener);
337
+ serviceLocator.getEventManager().on(event, listener);
258
338
  }
259
339
  /**
260
340
  * @ignore
261
341
  */
262
342
  off(event, listener) {
263
- this.config.getEventManager().off(event, listener);
343
+ serviceLocator.getEventManager().off(event, listener);
264
344
  }
265
345
  /**
266
346
  * Runs an Actor on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable).
@@ -287,9 +367,10 @@ export class Actor {
287
367
  * @ignore
288
368
  */
289
369
  async call(actorId, input, options = {}) {
370
+ const timeout = options.timeout === 'inherit' ? this.getRemainingTime() : options.timeout;
290
371
  const { token, ...rest } = options;
291
372
  const client = token ? this.newClient({ token }) : this.apifyClient;
292
- return client.actor(actorId).call(input, rest);
373
+ return client.actor(actorId).call(input, { ...rest, timeout });
293
374
  }
294
375
  /**
295
376
  * Runs an Actor on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable),
@@ -316,9 +397,10 @@ export class Actor {
316
397
  * @ignore
317
398
  */
318
399
  async start(actorId, input, options = {}) {
400
+ const timeout = options.timeout === 'inherit' ? this.getRemainingTime() : options.timeout;
319
401
  const { token, ...rest } = options;
320
402
  const client = token ? this.newClient({ token }) : this.apifyClient;
321
- return client.actor(actorId).start(input, rest);
403
+ return client.actor(actorId).start(input, { ...rest, timeout });
322
404
  }
323
405
  /**
324
406
  * Aborts given Actor run on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable).
@@ -372,9 +454,10 @@ export class Actor {
372
454
  * @ignore
373
455
  */
374
456
  async callTask(taskId, input, options = {}) {
457
+ const timeout = options.timeout === 'inherit' ? this.getRemainingTime() : options.timeout;
375
458
  const { token, ...rest } = options;
376
459
  const client = token ? this.newClient({ token }) : this.apifyClient;
377
- return client.task(taskId).call(input, rest);
460
+ return client.task(taskId).call(input, { ...rest, timeout });
378
461
  }
379
462
  /**
380
463
  * Transforms this Actor run to an Actor run of a given Actor. The system stops the current container and starts
@@ -395,11 +478,9 @@ export class Actor {
395
478
  log.warning('Actor.metamorph() is only supported when running on the Apify platform.');
396
479
  return;
397
480
  }
398
- const { customAfterSleepMillis = this.config.get('metamorphAfterSleepMillis'), ...metamorphOpts } = options;
399
- const runId = this.config.get('actorRunId');
400
- await this.apifyClient
401
- .run(runId)
402
- .metamorph(targetActorId, input, metamorphOpts);
481
+ const { customAfterSleepMillis = this.config.metamorphAfterSleepMillis, ...metamorphOpts } = options;
482
+ const runId = this.config.actorRunId;
483
+ await this.apifyClient.run(runId).metamorph(targetActorId, input, metamorphOpts);
403
484
  // Wait some time for container to be stopped.
404
485
  await sleep(customAfterSleepMillis);
405
486
  }
@@ -411,6 +492,7 @@ export class Actor {
411
492
  * @ignore
412
493
  */
413
494
  async reboot(options = {}) {
495
+ this._ensureActorInit('reboot');
414
496
  if (!this.isAtHome()) {
415
497
  log.warning('Actor.reboot() is only supported when running on the Apify platform.');
416
498
  return;
@@ -423,20 +505,20 @@ export class Actor {
423
505
  // Waiting for all the listeners to finish, as `.reboot()` kills the container.
424
506
  await Promise.all([
425
507
  // `persistState` for individual RequestLists, RequestQueue... instances to be persisted
426
- ...this.config
508
+ ...serviceLocator
427
509
  .getEventManager()
428
510
  .listeners("persistState" /* EventType.PERSIST_STATE */)
429
- .map(async (x) => x()),
511
+ .map(async (x) => x({})),
430
512
  // `migrating` to pause Apify crawlers
431
- ...this.config
513
+ ...serviceLocator
432
514
  .getEventManager()
433
515
  .listeners("migrating" /* EventType.MIGRATING */)
434
- .map(async (x) => x()),
516
+ .map(async (x) => x({})),
435
517
  ]);
436
- const runId = this.config.get('actorRunId');
518
+ const runId = this.config.actorRunId;
437
519
  await this.apifyClient.run(runId).reboot();
438
520
  // Wait some time for container to be stopped.
439
- const { customAfterSleepMillis = this.config.get('metamorphAfterSleepMillis'), } = options;
521
+ const { customAfterSleepMillis = this.config.metamorphAfterSleepMillis } = options;
440
522
  await sleep(customAfterSleepMillis);
441
523
  }
442
524
  /**
@@ -457,25 +539,27 @@ export class Actor {
457
539
  requestUrl: ow.string,
458
540
  payloadTemplate: ow.optional.string,
459
541
  idempotencyKey: ow.optional.string,
542
+ headersTemplate: ow.optional.string,
543
+ description: ow.optional.string,
544
+ ignoreSslErrors: ow.optional.boolean,
545
+ doNotRetry: ow.optional.boolean,
546
+ shouldInterpolateStrings: ow.optional.boolean,
547
+ isApifyIntegration: ow.optional.boolean,
460
548
  }));
461
- const { eventTypes, requestUrl, payloadTemplate, idempotencyKey } = options;
462
549
  if (!this.isAtHome()) {
463
550
  log.warning('Actor.addWebhook() is only supported when running on the Apify platform. The webhook will not be invoked.');
464
551
  return undefined;
465
552
  }
466
- const runId = this.config.get('actorRunId');
553
+ const runId = this.config.actorRunId;
467
554
  if (!runId) {
468
555
  throw new Error(`Environment variable ${ACTOR_ENV_VARS.RUN_ID} is not set!`);
469
556
  }
470
557
  return this.apifyClient.webhooks().create({
558
+ ...options,
471
559
  isAdHoc: true,
472
- eventTypes,
473
560
  condition: {
474
561
  actorRunId: runId,
475
562
  },
476
- requestUrl,
477
- payloadTemplate,
478
- idempotencyKey,
479
563
  });
480
564
  }
481
565
  /**
@@ -489,6 +573,7 @@ export class Actor {
489
573
  const { isStatusMessageTerminal, level } = options || {};
490
574
  ow(statusMessage, ow.string);
491
575
  ow(isStatusMessageTerminal, ow.optional.boolean);
576
+ this._ensureActorInit('setStatusMessage');
492
577
  const loggedStatusMessage = `[Status message]: ${statusMessage}`;
493
578
  switch (level) {
494
579
  case 'DEBUG':
@@ -504,13 +589,13 @@ export class Actor {
504
589
  log.info(loggedStatusMessage);
505
590
  break;
506
591
  }
507
- const client = this.config.getStorageClient();
592
+ const client = serviceLocator.getStorageClient();
508
593
  // just to be sure, this should be fast
509
594
  await addTimeoutToPromise(async () => client.setStatusMessage(statusMessage, {
510
595
  isStatusMessageTerminal,
511
596
  level,
512
597
  }), 1000, 'Setting status message timed out after 1s').catch((e) => log.warning(e.message));
513
- const runId = this.config.get('actorRunId');
598
+ const runId = this.config.actorRunId;
514
599
  if (runId) {
515
600
  // just to be sure, this should be fast
516
601
  const run = await addTimeoutToPromise(async () => this.apifyClient.run(runId).get(), 1000, 'Getting the current run timed out after 1s').catch((e) => log.warning(e.message));
@@ -545,28 +630,22 @@ export class Actor {
545
630
  * @param eventName If provided, the method will attempt to charge for the event for each pushed item.
546
631
  * @ignore
547
632
  */
548
- // eslint-disable-next-line consistent-return -- The `return` is inconsistent by design here (`ChargeResult` with `eventName` parameter)
549
633
  async pushData(item, eventName) {
550
634
  this._ensureActorInit('pushData');
551
- const dataset = await this.openDataset();
552
- const maxChargedCount = eventName !== undefined
553
- ? this.chargingManager.calculateMaxEventChargeCountWithinLimit(eventName)
554
- : Infinity;
555
- const toCharge = Array.isArray(item) ? item.length : 1;
556
- if (toCharge > maxChargedCount) {
557
- // Push as many items as we can charge for
558
- const items = Array.isArray(item) ? item : [item];
559
- await dataset.pushData(items.slice(0, maxChargedCount));
560
- }
561
- else {
562
- await dataset.pushData(item);
635
+ if (eventName?.startsWith('apify-')) {
636
+ throw new Error(`Cannot charge for synthetic event '${eventName}' manually`);
563
637
  }
564
- if (eventName) {
565
- return await this.chargingManager.charge({
566
- eventName,
567
- count: Math.min(toCharge, maxChargedCount),
568
- });
638
+ const dataset = await this.openDataset();
639
+ // Two code paths for charging:
640
+ // 1. Intercepted client: PatchedDatasetClient intercepts pushItems() calls, handling charging
641
+ // internally. This is needed because Crawlee's Dataset may call pushItems() directly,
642
+ // bypassing Actor.pushData(). We propagate eventName via AsyncLocalStorage context.
643
+ // 2. Direct charging: When using a non-patched client (e.g., forceCloud option or custom client),
644
+ // we handle charging here before delegating to the dataset.
645
+ if (this.usesPushDataInterception(dataset)) {
646
+ return await this.pushDataViaInterceptedClient(dataset, item, eventName);
569
647
  }
648
+ return await this.pushDataWithExplicitCharging(dataset, item, eventName);
570
649
  }
571
650
  /**
572
651
  * Opens a dataset and returns a promise resolving to an instance of the {@link Dataset} class.
@@ -578,13 +657,14 @@ export class Actor {
578
657
  * For more details and code examples, see the {@link Dataset} class.
579
658
  *
580
659
  * @param [datasetIdOrName]
581
- * ID or name of the dataset to be opened. If `null` or `undefined`,
660
+ * ID, name, or alias of the dataset to be opened. If `null` or `undefined`,
582
661
  * the function returns the default dataset associated with the Actor run.
662
+ * You can also pass `{ alias: 'name' }` to open a dataset defined in the Actor's schema storages,
663
+ * `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
583
664
  * @param [options]
584
665
  * @ignore
585
666
  */
586
667
  async openDataset(datasetIdOrName, options = {}) {
587
- ow(datasetIdOrName, ow.optional.string);
588
668
  ow(options, ow.object.exactShape({
589
669
  forceCloud: ow.optional.boolean,
590
670
  }));
@@ -691,17 +771,20 @@ export class Actor {
691
771
  */
692
772
  async getInput() {
693
773
  this._ensureActorInit('getInput');
694
- const inputSecretsPrivateKeyFile = this.config.get('inputSecretsPrivateKeyFile');
695
- const inputSecretsPrivateKeyPassphrase = this.config.get('inputSecretsPrivateKeyPassphrase');
696
- const input = await this.getValue(this.config.get('inputKey'));
697
- if (ow.isValid(input, ow.object.nonEmpty) &&
774
+ const { inputSecretsPrivateKeyFile, inputSecretsPrivateKeyPassphrase } = this.config;
775
+ const rawInput = await this.getValue(this.config.inputKey);
776
+ let input = rawInput;
777
+ if (ow.isValid(rawInput, ow.object.nonEmpty) &&
698
778
  inputSecretsPrivateKeyFile &&
699
779
  inputSecretsPrivateKeyPassphrase) {
700
780
  const privateKey = createPrivateKey({
701
781
  key: Buffer.from(inputSecretsPrivateKeyFile, 'base64'),
702
782
  passphrase: inputSecretsPrivateKeyPassphrase,
703
783
  });
704
- return decryptInputSecrets({ input, privateKey });
784
+ input = decryptInputSecrets({ input: rawInput, privateKey });
785
+ }
786
+ if (ow.isValid(input, ow.object.nonEmpty) && !Buffer.isBuffer(input)) {
787
+ input = await this.inferDefaultsFromInputSchema(input);
705
788
  }
706
789
  return input;
707
790
  }
@@ -728,11 +811,11 @@ export class Actor {
728
811
  * @param [storeIdOrName]
729
812
  * ID or name of the key-value store to be opened. If `null` or `undefined`,
730
813
  * the function returns the default key-value store associated with the Actor run.
814
+ * You can also pass `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
731
815
  * @param [options]
732
816
  * @ignore
733
817
  */
734
818
  async openKeyValueStore(storeIdOrName, options = {}) {
735
- ow(storeIdOrName, ow.optional.string);
736
819
  ow(options, ow.object.exactShape({
737
820
  forceCloud: ow.optional.boolean,
738
821
  }));
@@ -753,19 +836,18 @@ export class Actor {
753
836
  * @param [queueIdOrName]
754
837
  * ID or name of the request queue to be opened. If `null` or `undefined`,
755
838
  * the function returns the default request queue associated with the Actor run.
839
+ * You can also pass `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
756
840
  * @param [options]
757
841
  * @ignore
758
842
  */
759
843
  async openRequestQueue(queueIdOrName, options = {}) {
760
- ow(queueIdOrName, ow.optional.string);
761
844
  ow(options, ow.object.exactShape({
762
845
  forceCloud: ow.optional.boolean,
763
846
  }));
764
847
  this._ensureActorInit('openRequestQueue');
765
848
  const queue = await this._openStorage(RequestQueue, queueIdOrName, options);
766
849
  // eslint-disable-next-line dot-notation
767
- queue['initialCount'] =
768
- (await queue.client.get())?.totalRequestCount ?? 0;
850
+ queue['initialCount'] = (await queue.client.getMetadata())?.totalRequestCount ?? 0;
769
851
  return queue;
770
852
  }
771
853
  /**
@@ -802,19 +884,23 @@ export class Actor {
802
884
  * ```
803
885
  * { useApifyProxy: false }
804
886
  * ```
887
+ *
888
+ * As part of the init process, we verify the configuration by checking the proxy status endpoint.
889
+ * This can make the init slower, to opt-out of this, use `checkAccess: false` (defaults to `true`).
890
+ *
805
891
  * @ignore
806
892
  */
807
893
  async createProxyConfiguration(proxyConfigurationOptions = {}) {
808
894
  // Compatibility fix for Input UI where proxy: None returns { useApifyProxy: false }
809
895
  // Without this, it would cause proxy to use the zero config / auto mode.
810
- const { useApifyProxy, ...options } = proxyConfigurationOptions;
896
+ const { useApifyProxy, checkAccess, ...options } = proxyConfigurationOptions;
811
897
  const dontUseApifyProxy = useApifyProxy === false;
812
898
  const dontUseCustomProxies = !proxyConfigurationOptions.proxyUrls;
813
899
  if (dontUseApifyProxy && dontUseCustomProxies) {
814
900
  return undefined;
815
901
  }
816
902
  const proxyConfiguration = new ProxyConfiguration(options, this.config);
817
- if (await proxyConfiguration.initialize()) {
903
+ if (await proxyConfiguration.initialize({ checkAccess })) {
818
904
  return proxyConfiguration;
819
905
  }
820
906
  return undefined;
@@ -825,6 +911,14 @@ export class Actor {
825
911
  * This method attempts to charge for the specified number of events, but may charge fewer
826
912
  * if doing so would exceed the total budget limit (`maxTotalChargeUsd`).
827
913
  *
914
+ * **Important:** When using the `count` parameter to charge for multiple events at once,
915
+ * be aware that the charge may be partially fulfilled — i.e. `chargedCount` can be less
916
+ * than the requested `count`. Always check the returned `chargedCount` to know how many
917
+ * events were actually charged, and only perform that much work. If your work is
918
+ * meaningfully divisible into individual units, prefer calling `charge()` once per unit
919
+ * rather than batching via `count` — this gives finer control over budget consumption
920
+ * and avoids situations where more work is requested than the budget allows.
921
+ *
828
922
  * @param options The name of the event to charge for and the number of events to be charged.
829
923
  * @ignore
830
924
  */
@@ -860,7 +954,7 @@ export class Actor {
860
954
  * Returns a new {@link ApifyEnv} object which contains information parsed from all the Apify environment variables.
861
955
  *
862
956
  * For the list of the Apify environment variables, see
863
- * [Actor documentation](https://docs.apify.com/actor/run#environment-variables).
957
+ * [Actor documentation](https://docs.apify.com/platform/actors/development/programming-interface/environment-variables).
864
958
  * If some variables are not defined or are invalid, the corresponding value in the resulting object will be null.
865
959
  * @ignore
866
960
  */
@@ -895,15 +989,13 @@ export class Actor {
895
989
  * @ignore
896
990
  */
897
991
  newClient(options = {}) {
898
- const { storageDir, ...storageClientOptions } = this.config.get('storageClientOptions');
992
+ const { storageDir, ...storageClientOptions } = (this.config.storageClientOptions ?? {});
899
993
  const { apifyVersion, crawleeVersion } = getSystemInfo();
900
994
  return new ApifyClient({
901
- baseUrl: this.config.get('apiBaseUrl'),
902
- token: this.config.get('token'),
903
- userAgentSuffix: [
904
- `SDK/${apifyVersion}`,
905
- `Crawlee/${crawleeVersion}`,
906
- ],
995
+ baseUrl: this.config.apiBaseUrl,
996
+ publicBaseUrl: this.config.apiPublicBaseUrl,
997
+ token: this.config.token,
998
+ userAgentSuffix: [`SDK/${apifyVersion}`, `Crawlee/${crawleeVersion}`],
907
999
  ...storageClientOptions,
908
1000
  ...options, // allow overriding the instance configuration
909
1001
  });
@@ -925,6 +1017,7 @@ export class Actor {
925
1017
  * @param options An optional object parameter where a custom `keyValueStoreName` and `config` can be passed in.
926
1018
  */
927
1019
  async useState(name, defaultValue = {}, options) {
1020
+ this._ensureActorInit('useState');
928
1021
  const kvStore = await KeyValueStore.open(options?.keyValueStoreName, {
929
1022
  config: options?.config || Configuration.getGlobalConfig(),
930
1023
  });
@@ -1016,6 +1109,11 @@ export class Actor {
1016
1109
  * Calling `Actor.exit()` is required if you use the `Actor.init()` method, since it opens websocket connection
1017
1110
  * (see {@link Actor.events} for details), which needs to be terminated for the code to finish.
1018
1111
  *
1112
+ * **Graceful shutdown:** When running on the Apify platform, the Actor may receive `aborting` or `migrating`
1113
+ * events. By default, the SDK will automatically call `Actor.exit()` on `aborting` events and `Actor.reboot()`
1114
+ * on `migrating` events (to speed up the migration and continue the run on a new worker). You can disable this
1115
+ * behavior by setting `options.gracefulShutdown` to `false`.
1116
+ *
1019
1117
  * ```js
1020
1118
  * import { gotScraping } from 'got-scraping';
1021
1119
  *
@@ -1242,8 +1340,10 @@ export class Actor {
1242
1340
  * For more details and code examples, see the {@link Dataset} class.
1243
1341
  *
1244
1342
  * @param [datasetIdOrName]
1245
- * ID or name of the dataset to be opened. If `null` or `undefined`,
1343
+ * ID, name, or alias of the dataset to be opened. If `null` or `undefined`,
1246
1344
  * the function returns the default dataset associated with the Actor run.
1345
+ * You can also pass `{ alias: 'name' }` to open a dataset defined in the Actor's schema storages,
1346
+ * `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
1247
1347
  * @param [options]
1248
1348
  */
1249
1349
  static async openDataset(datasetIdOrName, options = {}) {
@@ -1361,6 +1461,7 @@ export class Actor {
1361
1461
  * @param [storeIdOrName]
1362
1462
  * ID or name of the key-value store to be opened. If `null` or `undefined`,
1363
1463
  * the function returns the default key-value store associated with the Actor run.
1464
+ * You can also pass `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
1364
1465
  * @param [options]
1365
1466
  */
1366
1467
  static async openKeyValueStore(storeIdOrName, options = {}) {
@@ -1380,6 +1481,7 @@ export class Actor {
1380
1481
  * @param [queueIdOrName]
1381
1482
  * ID or name of the request queue to be opened. If `null` or `undefined`,
1382
1483
  * the function returns the default request queue associated with the Actor run.
1484
+ * You can also pass `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
1383
1485
  * @param [options]
1384
1486
  */
1385
1487
  static async openRequestQueue(queueIdOrName, options = {}) {
@@ -1419,6 +1521,9 @@ export class Actor {
1419
1521
  * ```
1420
1522
  * { useApifyProxy: false }
1421
1523
  * ```
1524
+ *
1525
+ * As part of the init process, we verify the configuration by checking the proxy status endpoint.
1526
+ * This can make the init slower, to opt-out of this, use `checkAccess: false` (defaults to `true`).
1422
1527
  */
1423
1528
  static async createProxyConfiguration(proxyConfigurationOptions = {}) {
1424
1529
  return Actor.getDefaultInstance().createProxyConfiguration(proxyConfigurationOptions);
@@ -1429,6 +1534,14 @@ export class Actor {
1429
1534
  * This method attempts to charge for the specified number of events, but may charge fewer
1430
1535
  * if doing so would exceed the total budget limit (`maxTotalChargeUsd`).
1431
1536
  *
1537
+ * **Important:** When using the `count` parameter to charge for multiple events at once,
1538
+ * be aware that the charge may be partially fulfilled — i.e. `chargedCount` can be less
1539
+ * than the requested `count`. Always check the returned `chargedCount` to know how many
1540
+ * events were actually charged, and only perform that much work. If your work is
1541
+ * meaningfully divisible into individual units, prefer calling `charge()` once per unit
1542
+ * rather than batching via `count` — this gives finer control over budget consumption
1543
+ * and avoids situations where more work is requested than the budget allows.
1544
+ *
1432
1545
  * @param options The name of the event to charge for and the number of events to be charged.
1433
1546
  */
1434
1547
  static async charge(options) {
@@ -1444,7 +1557,7 @@ export class Actor {
1444
1557
  * Returns a new {@link ApifyEnv} object which contains information parsed from all the Apify environment variables.
1445
1558
  *
1446
1559
  * For the list of the Apify environment variables, see
1447
- * [Actor documentation](https://docs.apify.com/actor/run#environment-variables).
1560
+ * [Actor documentation](https://docs.apify.com/platform/actors/development/programming-interface/environment-variables).
1448
1561
  * If some of the variables are not defined or are invalid, the corresponding value in the resulting object will be null.
1449
1562
  */
1450
1563
  static getEnv() {
@@ -1479,9 +1592,52 @@ export class Actor {
1479
1592
  this._instance ??= new Actor();
1480
1593
  return this._instance;
1481
1594
  }
1482
- async _openStorage(storageClass, id, options = {}) {
1483
- const client = options.forceCloud ? this.apifyClient : undefined;
1484
- return StorageManager.openStorage(storageClass, id, client, this.config);
1595
+ usesPushDataInterception(dataset) {
1596
+ return Boolean(dataset.client[USES_PUSH_DATA_INTERCEPTION]);
1597
+ }
1598
+ async pushDataViaInterceptedClient(dataset, item, eventName) {
1599
+ // PatchedDatasetClient will handle charging and item limiting.
1600
+ // We only need to propagate `eventName` and (optionally) return aggregated charge info.
1601
+ const context = {
1602
+ eventName,
1603
+ };
1604
+ await pushDataChargingContext.run(context, async () => {
1605
+ await dataset.pushData(item);
1606
+ });
1607
+ return (context.chargeResult ?? {
1608
+ eventChargeLimitReached: false,
1609
+ chargedCount: 0,
1610
+ chargeableWithinLimit: {},
1611
+ });
1612
+ }
1613
+ async pushDataWithExplicitCharging(dataset, items, explicitEventName) {
1614
+ // `Actor.pushData()` historically worked even without calling `Actor.init()`.
1615
+ // In that case, charging isn't configured, so just push the data through.
1616
+ if (!this.initialized && explicitEventName === undefined) {
1617
+ await dataset.pushData(items);
1618
+ return {
1619
+ eventChargeLimitReached: false,
1620
+ chargedCount: 0,
1621
+ chargeableWithinLimit: {},
1622
+ };
1623
+ }
1624
+ const isDefaultDataset = dataset.id === this.config.defaultDatasetId;
1625
+ return pushDataAndCharge({
1626
+ chargingManager: this.chargingManager,
1627
+ items,
1628
+ eventName: explicitEventName,
1629
+ isDefaultDataset,
1630
+ pushFn: async (limitedItems) => dataset.pushData(limitedItems),
1631
+ });
1632
+ }
1633
+ async _openStorage(storageClass, identifier, options = {}) {
1634
+ return openStorage(storageClass, identifier, {
1635
+ config: this.config,
1636
+ client: options.forceCloud
1637
+ ? new ApifyStorageClient(this.apifyClient, this.config, () => this.chargingManager)
1638
+ : undefined,
1639
+ purgedStorageAliases: this.purgedStorageAliases,
1640
+ });
1485
1641
  }
1486
1642
  _ensureActorInit(methodCalled) {
1487
1643
  // If we already warned the user once, don't do it again to prevent spam
@@ -1497,5 +1653,36 @@ export class Actor {
1497
1653
  'Did you forget to call Actor.init()?',
1498
1654
  ].join('\n'));
1499
1655
  }
1656
+ /**
1657
+ * Get time remaining from the Actor run timeout. Returns `undefined` if not on an Apify platform or the current
1658
+ * run was started without a timeout.
1659
+ */
1660
+ getRemainingTime() {
1661
+ const env = this.getEnv();
1662
+ if (this.isAtHome() && env.timeoutAt !== null) {
1663
+ return env.timeoutAt.getTime() - Date.now();
1664
+ }
1665
+ log.warning('Using `inherit` argument is only possible when the Actor is running on the Apify platform and when the ' +
1666
+ 'timeout for the Actor run is set.');
1667
+ return undefined;
1668
+ }
1669
+ async inferDefaultsFromInputSchema(input) {
1670
+ // TODO: https://github.com/apify/apify-shared-js/issues/547
1671
+ // On platform, this is already handled
1672
+ if (this.isAtHome()) {
1673
+ return input;
1674
+ }
1675
+ // On local, we can get the input schema from the local config
1676
+ const inputSchema = readInputSchema();
1677
+ // Don't emit warning if there is no input schema defined
1678
+ if (inputSchema === noActorInputSchemaDefinedMarker) {
1679
+ return input;
1680
+ }
1681
+ if (!inputSchema) {
1682
+ log.warning('Failed to find the input schema for the local run of this Actor. Your input will be missing fields that have default values set if they are missing from the input you are using.');
1683
+ return input;
1684
+ }
1685
+ const defaults = getDefaultsFromInputSchema(inputSchema);
1686
+ return { ...defaults, ...input };
1687
+ }
1500
1688
  }
1501
- //# sourceMappingURL=actor.js.map