apify 3.0.0-alpha.2 → 3.0.0-alpha.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/actor.js ADDED
@@ -0,0 +1,1221 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.EXIT_CODES = exports.Actor = void 0;
4
+ const tslib_1 = require("tslib");
5
+ const ow_1 = tslib_1.__importDefault(require("ow"));
6
+ const consts_1 = require("@apify/consts");
7
+ const log_1 = tslib_1.__importDefault(require("@apify/log"));
8
+ const apify_client_1 = require("apify-client");
9
+ const core_1 = require("@crawlee/core");
10
+ const utils_1 = require("@crawlee/utils");
11
+ const utils_2 = require("./utils");
12
+ const platform_event_manager_1 = require("./platform_event_manager");
13
+ const proxy_configuration_1 = require("./proxy_configuration");
14
+ /**
15
+ * `Apify` class serves as an alternative approach to the static helpers exported from the package. It allows to pass configuration
16
+ * that will be used on the instance methods. Environment variables will have precedence over this configuration.
17
+ * See {@link Configuration} for details about what can be configured and what are the default values.
18
+ */
19
+ class Actor {
20
+ constructor(options = {}) {
21
+ /**
22
+ * Configuration of this SDK instance (provided to its constructor). See {@link Configuration} for details.
23
+ * @internal
24
+ */
25
+ Object.defineProperty(this, "config", {
26
+ enumerable: true,
27
+ configurable: true,
28
+ writable: true,
29
+ value: void 0
30
+ });
31
+ /**
32
+ * Default {@link ApifyClient} instance.
33
+ * @internal
34
+ */
35
+ Object.defineProperty(this, "apifyClient", {
36
+ enumerable: true,
37
+ configurable: true,
38
+ writable: true,
39
+ value: void 0
40
+ });
41
+ /**
42
+ * Default {@link EventManager} instance.
43
+ * @internal
44
+ */
45
+ Object.defineProperty(this, "eventManager", {
46
+ enumerable: true,
47
+ configurable: true,
48
+ writable: true,
49
+ value: void 0
50
+ });
51
+ Object.defineProperty(this, "storageManagers", {
52
+ enumerable: true,
53
+ configurable: true,
54
+ writable: true,
55
+ value: new Map()
56
+ });
57
+ // use default configuration object if nothing overridden (it fallbacks to env vars)
58
+ this.config = Object.keys(options).length === 0 ? core_1.Configuration.getGlobalConfig() : new core_1.Configuration(options);
59
+ this.apifyClient = this.newClient();
60
+ this.eventManager = new platform_event_manager_1.PlatformEventManager(this.config);
61
+ }
62
+ /**
63
+ * Runs the main user function that performs the job of the actor
64
+ * and terminates the process when the user function finishes.
65
+ *
66
+ * **The `Actor.main()` function is optional** and is provided merely for your convenience.
67
+ * It is mainly useful when you're running your code as an actor on the [Apify platform](https://apify.com/actors).
68
+ * However, if you want to use Apify SDK tools directly inside your existing projects, e.g.
69
+ * running in an [Express](https://expressjs.com/) server, on
70
+ * [Google Cloud functions](https://cloud.google.com/functions)
71
+ * or [AWS Lambda](https://aws.amazon.com/lambda/), it's better to avoid
72
+ * it since the function terminates the main process when it finishes!
73
+ *
74
+ * The `Actor.main()` function performs the following actions:
75
+ *
76
+ * - When running on the Apify platform (i.e. `APIFY_IS_AT_HOME` environment variable is set),
77
+ * it sets up a connection to listen for platform events.
78
+ * For example, to get a notification about an imminent migration to another server.
79
+ * See {@link Actor.events} for details.
80
+ * - It checks that either `APIFY_TOKEN` or `APIFY_LOCAL_STORAGE_DIR` environment variable
81
+ * is defined. If not, the functions sets `APIFY_LOCAL_STORAGE_DIR` to `./apify_storage`
82
+ * inside the current working directory. This is to simplify running code examples.
83
+ * - It invokes the user function passed as the `userFunc` parameter.
84
+ * - If the user function returned a promise, waits for it to resolve.
85
+ * - If the user function throws an exception or some other error is encountered,
86
+ * prints error details to console so that they are stored to the log.
87
+ * - Exits the Node.js process, with zero exit code on success and non-zero on errors.
88
+ *
89
+ * The user function can be synchronous:
90
+ *
91
+ * ```javascript
92
+ * Actor.main(() => {
93
+ * // My synchronous function that returns immediately
94
+ * console.log('Hello world from actor!');
95
+ * });
96
+ * ```
97
+ *
98
+ * If the user function returns a promise, it is considered asynchronous:
99
+ * ```javascript
100
+ * const { gotScraping } = require('got-scraping');
101
+ *
102
+ * Actor.main(() => {
103
+ * // My asynchronous function that returns a promise
104
+ * return gotScraping('http://www.example.com').then((html) => {
105
+ * console.log(html);
106
+ * });
107
+ * });
108
+ * ```
109
+ *
110
+ * To simplify your code, you can take advantage of the `async`/`await` keywords:
111
+ *
112
+ * ```javascript
113
+ * const { gotScraping } = require('got-scraping');
114
+ *
115
+ * Actor.main(async () => {
116
+ * // My asynchronous function
117
+ * const html = await request('http://www.example.com');
118
+ * console.log(html);
119
+ * });
120
+ * ```
121
+ *
122
+ * @param userFunc User function to be executed. If it returns a promise,
123
+ * the promise will be awaited. The user function is called with no arguments.
124
+ * @param options
125
+ * @ignore
126
+ */
127
+ main(userFunc, options) {
128
+ if (!userFunc || typeof userFunc !== 'function') {
129
+ throw new Error(`First parameter for Actor.main() must be a function (was '${userFunc === null ? 'null' : typeof userFunc}').`);
130
+ }
131
+ return (async () => {
132
+ if (options?.purge) {
133
+ await (0, utils_1.purgeLocalStorage)();
134
+ }
135
+ await this.init();
136
+ let ret;
137
+ try {
138
+ ret = await core_1.Configuration.storage.run(this.config, userFunc);
139
+ await this.exit(options);
140
+ }
141
+ catch (err) {
142
+ log_1.default.exception(err, err.message);
143
+ await this.exit({ exitCode: exports.EXIT_CODES.ERROR_USER_FUNCTION_THREW });
144
+ }
145
+ return ret;
146
+ })();
147
+ }
148
+ /**
149
+ * @ignore
150
+ */
151
+ async init() {
152
+ (0, utils_2.logSystemInfo)();
153
+ (0, utils_2.printOutdatedSdkWarning)();
154
+ await this.eventManager.init();
155
+ if (this.isAtHome()) {
156
+ this.config.set('availableMemoryRatio', 1);
157
+ this.config.useStorageClient(this.apifyClient);
158
+ this.config.useEventManager(this.eventManager);
159
+ }
160
+ }
161
+ /**
162
+ * @ignore
163
+ */
164
+ async exit(options = {}) {
165
+ await this.eventManager.close();
166
+ if (options.exit ?? true) {
167
+ process.exit(options.exitCode ?? exports.EXIT_CODES.SUCCESS);
168
+ }
169
+ }
170
+ /**
171
+ * @ignore
172
+ */
173
+ on(event, listener) {
174
+ this.eventManager.on(event, listener);
175
+ }
176
+ /**
177
+ * @ignore
178
+ */
179
+ off(event, listener) {
180
+ this.eventManager.off(event, listener);
181
+ }
182
+ /**
183
+ * Runs an actor on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable),
184
+ * waits for the actor to finish and fetches its output.
185
+ *
186
+ * By passing the `waitSecs` option you can reduce the maximum amount of time to wait for the run to finish.
187
+ * If the value is less than or equal to zero, the function returns immediately after the run is started.
188
+ *
189
+ * The result of the function is an {@link ActorRun} object
190
+ * that contains details about the actor run and its output (if any).
191
+ *
192
+ * If you want to run an actor task rather than an actor, please use the
193
+ * {@link Actor.callTask} function instead.
194
+ *
195
+ * For more information about actors, read the
196
+ * [documentation](https://docs.apify.com/actor).
197
+ *
198
+ * **Example usage:**
199
+ *
200
+ * ```javascript
201
+ * const run = await Actor.call('apify/hello-world', { myInput: 123 });
202
+ * console.log(`Received message: ${run.output.body.message}`);
203
+ * ```
204
+ *
205
+ * Internally, the `call()` function invokes the
206
+ * [Run actor](https://apify.com/docs/api/v2#/reference/actors/run-collection/run-actor)
207
+ * and several other API endpoints to obtain the output.
208
+ *
209
+ * @param actId
210
+ * Allowed formats are `username/actor-name`, `userId/actor-name` or actor ID.
211
+ * @param [input]
212
+ * Input for the actor. If it is an object, it will be stringified to
213
+ * JSON and its content type set to `application/json; charset=utf-8`.
214
+ * Otherwise the `options.contentType` parameter must be provided.
215
+ * @param [options]
216
+ * @ignore
217
+ */
218
+ async call(actId, input, options = {}) {
219
+ const { token, ...rest } = options;
220
+ const client = token ? this.newClient({ token }) : this.apifyClient;
221
+ return client.actor(actId).call(input, rest);
222
+ }
223
+ /**
224
+ * Runs an actor task on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable),
225
+ * waits for the task to finish and fetches its output.
226
+ *
227
+ * By passing the `waitSecs` option you can reduce the maximum amount of time to wait for the run to finish.
228
+ * If the value is less than or equal to zero, the function returns immediately after the run is started.
229
+ *
230
+ * The result of the function is an {@link ActorRun} object
231
+ * that contains details about the actor run and its output (if any).
232
+ *
233
+ * Note that an actor task is a saved input configuration and options for an actor.
234
+ * If you want to run an actor directly rather than an actor task, please use the
235
+ * {@link Actor.call} function instead.
236
+ *
237
+ * For more information about actor tasks, read the [documentation](https://docs.apify.com/tasks).
238
+ *
239
+ * **Example usage:**
240
+ *
241
+ * ```javascript
242
+ * const run = await Actor.callTask('bob/some-task');
243
+ * console.log(`Received message: ${run.output.body.message}`);
244
+ * ```
245
+ *
246
+ * Internally, the `callTask()` function calls the
247
+ * [Run task](https://apify.com/docs/api/v2#/reference/actor-tasks/run-collection/run-task)
248
+ * and several other API endpoints to obtain the output.
249
+ *
250
+ * @param taskId
251
+ * Allowed formats are `username/task-name`, `userId/task-name` or task ID.
252
+ * @param [input]
253
+ * Input overrides for the actor task. If it is an object, it will be stringified to
254
+ * JSON and its content type set to `application/json; charset=utf-8`.
255
+ * Provided input will be merged with actor task input.
256
+ * @param [options]
257
+ * @ignore
258
+ */
259
+ async callTask(taskId, input, options = {}) {
260
+ const { token, ...rest } = options;
261
+ const client = token ? this.newClient({ token }) : this.apifyClient;
262
+ return client.task(taskId).call(input, rest);
263
+ }
264
+ /**
265
+ * Transforms this actor run to an actor run of a given actor. The system stops the current container and starts
266
+ * the new container instead. All the default storages are preserved and the new input is stored under the `INPUT-METAMORPH-1` key
267
+ * in the same default key-value store.
268
+ *
269
+ * @param targetActorId
270
+ * Either `username/actor-name` or actor ID of an actor to which we want to metamorph.
271
+ * @param [input]
272
+ * Input for the actor. If it is an object, it will be stringified to
273
+ * JSON and its content type set to `application/json; charset=utf-8`.
274
+ * Otherwise, the `options.contentType` parameter must be provided.
275
+ * @param [options]
276
+ * @ignore
277
+ */
278
+ async metamorph(targetActorId, input, options = {}) {
279
+ if (!this.isAtHome()) {
280
+ log_1.default.warning('Actor.metamorph() is only supported when running on the Apify platform.');
281
+ return;
282
+ }
283
+ const { customAfterSleepMillis = this.config.get('metamorphAfterSleepMillis'), ...metamorphOpts } = options;
284
+ const runId = this.config.get('actorRunId');
285
+ await this.apifyClient.run(runId).metamorph(targetActorId, input, metamorphOpts);
286
+ // Wait some time for container to be stopped.
287
+ await (0, utils_1.sleep)(customAfterSleepMillis);
288
+ }
289
+ /**
290
+ * Internally reboots this actor. The system stops the current container and starts
291
+ * a new container with the same run ID.
292
+ *
293
+ * @ignore
294
+ */
295
+ async reboot() {
296
+ if (!this.isAtHome()) {
297
+ log_1.default.warning('Actor.reboot() is only supported when running on the Apify platform.');
298
+ return;
299
+ }
300
+ // Waiting for all the listeners to finish, as `.metamorph()` kills the container.
301
+ await Promise.all([
302
+ // `persistState` for individual RequestLists, RequestQueue... instances to be persisted
303
+ ...this.config.getEventManager().listeners("persistState" /* PERSIST_STATE */).map((x) => x()),
304
+ // `migrating` to pause Apify crawlers
305
+ ...this.config.getEventManager().listeners("migrating" /* MIGRATING */).map((x) => x()),
306
+ ]);
307
+ const actorId = this.config.get('actorId');
308
+ await this.metamorph(actorId);
309
+ }
310
+ /**
311
+ * Creates an ad-hoc webhook for the current actor run, which lets you receive a notification when the actor run finished or failed.
312
+ * For more information about Apify actor webhooks, please see the [documentation](https://docs.apify.com/webhooks).
313
+ *
314
+ * Note that webhooks are only supported for actors running on the Apify platform.
315
+ * In local environment, the function will print a warning and have no effect.
316
+ *
317
+ * @param options
318
+ * @returns The return value is the Webhook object.
319
+ * For more information, see the [Get webhook](https://apify.com/docs/api/v2#/reference/webhooks/webhook-object/get-webhook) API endpoint.
320
+ * @ignore
321
+ */
322
+ async addWebhook(options) {
323
+ (0, ow_1.default)(options, ow_1.default.object.exactShape({
324
+ eventTypes: ow_1.default.array.ofType(ow_1.default.string),
325
+ requestUrl: ow_1.default.string,
326
+ payloadTemplate: ow_1.default.optional.string,
327
+ idempotencyKey: ow_1.default.optional.string,
328
+ }));
329
+ const { eventTypes, requestUrl, payloadTemplate, idempotencyKey } = options;
330
+ if (!this.isAtHome()) {
331
+ log_1.default.warning('Actor.addWebhook() is only supported when running on the Apify platform. The webhook will not be invoked.');
332
+ return undefined;
333
+ }
334
+ const runId = this.config.get('actorRunId');
335
+ if (!runId) {
336
+ throw new Error(`Environment variable ${consts_1.ENV_VARS.ACTOR_RUN_ID} is not set!`);
337
+ }
338
+ return this.apifyClient.webhooks().create({
339
+ isAdHoc: true,
340
+ eventTypes,
341
+ condition: {
342
+ actorRunId: runId,
343
+ },
344
+ requestUrl,
345
+ payloadTemplate,
346
+ idempotencyKey,
347
+ });
348
+ }
349
+ /**
350
+ * Stores an object or an array of objects to the default {@link Dataset} of the current actor run.
351
+ *
352
+ * This is just a convenient shortcut for {@link Dataset.pushData}.
353
+ * For example, calling the following code:
354
+ * ```javascript
355
+ * await Actor.pushData({ myValue: 123 });
356
+ * ```
357
+ *
358
+ * is equivalent to:
359
+ * ```javascript
360
+ * const dataset = await Actor.openDataset();
361
+ * await dataset.pushData({ myValue: 123 });
362
+ * ```
363
+ *
364
+ * For more information, see {@link Actor.openDataset} and {@link Dataset.pushData}
365
+ *
366
+ * **IMPORTANT**: Make sure to use the `await` keyword when calling `pushData()`,
367
+ * otherwise the actor process might finish before the data are stored!
368
+ *
369
+ * @param item Object or array of objects containing data to be stored in the default dataset.
370
+ * The objects must be serializable to JSON and the JSON representation of each object must be smaller than 9MB.
371
+ * @ignore
372
+ */
373
+ async pushData(item) {
374
+ const dataset = await this.openDataset();
375
+ return dataset.pushData(item);
376
+ }
377
+ /**
378
+ * Opens a dataset and returns a promise resolving to an instance of the {@link Dataset} class.
379
+ *
380
+ * Datasets are used to store structured data where each object stored has the same attributes,
381
+ * such as online store products or real estate offers.
382
+ * The actual data is stored either on the local filesystem or in the cloud.
383
+ *
384
+ * For more details and code examples, see the {@link Dataset} class.
385
+ *
386
+ * @param [datasetIdOrName]
387
+ * ID or name of the dataset to be opened. If `null` or `undefined`,
388
+ * the function returns the default dataset associated with the actor run.
389
+ * @param [options]
390
+ * @ignore
391
+ */
392
+ async openDataset(datasetIdOrName, options = {}) {
393
+ (0, ow_1.default)(datasetIdOrName, ow_1.default.optional.string);
394
+ (0, ow_1.default)(options, ow_1.default.object.exactShape({
395
+ forceCloud: ow_1.default.optional.boolean,
396
+ }));
397
+ return this._openStorage(core_1.Dataset, datasetIdOrName, options);
398
+ }
399
+ /**
400
+ * Gets a value from the default {@link KeyValueStore} associated with the current actor run.
401
+ *
402
+ * This is just a convenient shortcut for {@link KeyValueStore.getValue}.
403
+ * For example, calling the following code:
404
+ * ```javascript
405
+ * const value = await Actor.getValue('my-key');
406
+ * ```
407
+ *
408
+ * is equivalent to:
409
+ * ```javascript
410
+ * const store = await Actor.openKeyValueStore();
411
+ * const value = await store.getValue('my-key');
412
+ * ```
413
+ *
414
+ * To store the value to the default key-value store, you can use the {@link Actor.setValue} function.
415
+ *
416
+ * For more information, see {@link Actor.openKeyValueStore}
417
+ * and {@link KeyValueStore.getValue}.
418
+ *
419
+ * @param key Unique record key.
420
+ * @returns
421
+ * Returns a promise that resolves to an object, string
422
+ * or [`Buffer`](https://nodejs.org/api/buffer.html), depending
423
+ * on the MIME content type of the record, or `null`
424
+ * if the record is missing.
425
+ * @ignore
426
+ */
427
+ async getValue(key) {
428
+ const store = await this.openKeyValueStore();
429
+ return store.getValue(key);
430
+ }
431
+ /**
432
+ * Stores or deletes a value in the default {@link KeyValueStore} associated with the current actor run.
433
+ *
434
+ * This is just a convenient shortcut for {@link KeyValueStore.setValue}.
435
+ * For example, calling the following code:
436
+ * ```javascript
437
+ * await Actor.setValue('OUTPUT', { foo: "bar" });
438
+ * ```
439
+ *
440
+ * is equivalent to:
441
+ * ```javascript
442
+ * const store = await Actor.openKeyValueStore();
443
+ * await store.setValue('OUTPUT', { foo: "bar" });
444
+ * ```
445
+ *
446
+ * To get a value from the default key-value store, you can use the {@link Actor.getValue} function.
447
+ *
448
+ * For more information, see {@link Actor.openKeyValueStore}
449
+ * and {@link KeyValueStore.getValue}.
450
+ *
451
+ * @param key
452
+ * Unique record key.
453
+ * @param value
454
+ * Record data, which can be one of the following values:
455
+ * - If `null`, the record in the key-value store is deleted.
456
+ * - If no `options.contentType` is specified, `value` can be any JavaScript object, and it will be stringified to JSON.
457
+ * - If `options.contentType` is set, `value` is taken as is, and it must be a `String` or [`Buffer`](https://nodejs.org/api/buffer.html).
458
+ * For any other value an error will be thrown.
459
+ * @param [options]
460
+ * @ignore
461
+ */
462
+ async setValue(key, value, options = {}) {
463
+ const store = await this.openKeyValueStore();
464
+ return store.setValue(key, value, options);
465
+ }
466
+ /**
467
+ * Gets the actor input value from the default {@link KeyValueStore} associated with the current actor run.
468
+ *
469
+ * This is just a convenient shortcut for [`keyValueStore.getValue('INPUT')`](core/class/KeyValueStore#getValue).
470
+ * For example, calling the following code:
471
+ * ```javascript
472
+ * const input = await Actor.getInput();
473
+ * ```
474
+ *
475
+ * is equivalent to:
476
+ * ```javascript
477
+ * const store = await Actor.openKeyValueStore();
478
+ * await store.getValue('INPUT');
479
+ * ```
480
+ *
481
+ * Note that the `getInput()` function does not cache the value read from the key-value store.
482
+ * If you need to use the input multiple times in your actor,
483
+ * it is far more efficient to read it once and store it locally.
484
+ *
485
+ * For more information, see {@link Actor.openKeyValueStore}
486
+ * and {@link KeyValueStore.getValue}.
487
+ *
488
+ * @returns
489
+ * Returns a promise that resolves to an object, string
490
+ * or [`Buffer`](https://nodejs.org/api/buffer.html), depending
491
+ * on the MIME content type of the record, or `null`
492
+ * if the record is missing.
493
+ * @ignore
494
+ */
495
+ async getInput() {
496
+ return this.getValue(this.config.get('inputKey'));
497
+ }
498
+ /**
499
+ * Opens a key-value store and returns a promise resolving to an instance of the {@link KeyValueStore} class.
500
+ *
501
+ * Key-value stores are used to store records or files, along with their MIME content type.
502
+ * The records are stored and retrieved using a unique key.
503
+ * The actual data is stored either on a local filesystem or in the Apify cloud.
504
+ *
505
+ * For more details and code examples, see the {@link KeyValueStore} class.
506
+ *
507
+ * @param [storeIdOrName]
508
+ * ID or name of the key-value store to be opened. If `null` or `undefined`,
509
+ * the function returns the default key-value store associated with the actor run.
510
+ * @param [options]
511
+ * @ignore
512
+ */
513
+ async openKeyValueStore(storeIdOrName, options = {}) {
514
+ (0, ow_1.default)(storeIdOrName, ow_1.default.optional.string);
515
+ (0, ow_1.default)(options, ow_1.default.object.exactShape({
516
+ forceCloud: ow_1.default.optional.boolean,
517
+ }));
518
+ return this._openStorage(core_1.KeyValueStore, storeIdOrName, options);
519
+ }
520
+ /**
521
+ * Opens a request list and returns a promise resolving to an instance
522
+ * of the {@link RequestList} class that is already initialized.
523
+ *
524
+ * {@link RequestList} represents a list of URLs to crawl, which is always stored in memory.
525
+ * To enable picking up where left off after a process restart, the request list sources
526
+ * are persisted to the key-value store at initialization of the list. Then, while crawling,
527
+ * a small state object is regularly persisted to keep track of the crawling status.
528
+ *
529
+ * For more details and code examples, see the {@link RequestList} class.
530
+ *
531
+ * **Example usage:**
532
+ *
533
+ * ```javascript
534
+ * const sources = [
535
+ * 'https://www.example.com',
536
+ * 'https://www.google.com',
537
+ * 'https://www.bing.com'
538
+ * ];
539
+ *
540
+ * const requestList = await RequestList.open('my-name', sources);
541
+ * ```
542
+ *
543
+ * @param listName
544
+ * Name of the request list to be opened. Setting a name enables the `RequestList`'s state to be persisted
545
+ * in the key-value store. This is useful in case of a restart or migration. Since `RequestList` is only
546
+ * stored in memory, a restart or migration wipes it clean. Setting a name will enable the `RequestList`'s
547
+ * state to survive those situations and continue where it left off.
548
+ *
549
+ * The name will be used as a prefix in key-value store, producing keys such as `NAME-REQUEST_LIST_STATE`
550
+ * and `NAME-REQUEST_LIST_SOURCES`.
551
+ *
552
+ * If `null`, the list will not be persisted and will only be stored in memory. Process restart
553
+ * will then cause the list to be crawled again from the beginning. We suggest always using a name.
554
+ * @param sources
555
+ * An array of sources of URLs for the {@link RequestList}. It can be either an array of strings,
556
+ * plain objects that define at least the `url` property, or an array of {@link Request} instances.
557
+ *
558
+ * **IMPORTANT:** The `sources` array will be consumed (left empty) after {@link RequestList} initializes.
559
+ * This is a measure to prevent memory leaks in situations when millions of sources are
560
+ * added.
561
+ *
562
+ * Additionally, the `requestsFromUrl` property may be used instead of `url`,
563
+ * which will instruct {@link RequestList} to download the source URLs from a given remote location.
564
+ * The URLs will be parsed from the received response. In this case you can limit the URLs
565
+ * using `regex` parameter containing regular expression pattern for URLs to be included.
566
+ *
567
+ * For details, see the {@link RequestListOptions.sources}
568
+ * @param [options]
569
+ * The {@link RequestList} options. Note that the `listName` parameter supersedes
570
+ * the {@link RequestListOptions.persistStateKey} and {@link RequestListOptions.persistRequestsKey}
571
+ * options and the `sources` parameter supersedes the {@link RequestListOptions.sources} option.
572
+ * @ignore
573
+ */
574
+ async openRequestList(listName, sources, options = {}) {
575
+ return core_1.RequestList.open(listName, sources, options);
576
+ }
577
+ /**
578
+ * Opens a request queue and returns a promise resolving to an instance
579
+ * of the {@link RequestQueue} class.
580
+ *
581
+ * {@link RequestQueue} represents a queue of URLs to crawl, which is stored either on local filesystem or in the cloud.
582
+ * The queue is used for deep crawling of websites, where you start with several URLs and then
583
+ * recursively follow links to other pages. The data structure supports both breadth-first
584
+ * and depth-first crawling orders.
585
+ *
586
+ * For more details and code examples, see the {@link RequestQueue} class.
587
+ *
588
+ * @param [queueIdOrName]
589
+ * ID or name of the request queue to be opened. If `null` or `undefined`,
590
+ * the function returns the default request queue associated with the actor run.
591
+ * @param [options]
592
+ * @ignore
593
+ */
594
+ async openRequestQueue(queueIdOrName, options = {}) {
595
+ (0, ow_1.default)(queueIdOrName, ow_1.default.optional.string);
596
+ (0, ow_1.default)(options, ow_1.default.object.exactShape({
597
+ forceCloud: ow_1.default.optional.boolean,
598
+ }));
599
+ return this._openStorage(core_1.RequestQueue, queueIdOrName, options);
600
+ }
601
+ /**
602
+ * Creates a proxy configuration and returns a promise resolving to an instance
603
+ * of the {@link ProxyConfiguration} class that is already initialized.
604
+ *
605
+ * Configures connection to a proxy server with the provided options. Proxy servers are used to prevent target websites from blocking
606
+ * your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
607
+ * them to use the selected proxies for all connections.
608
+ *
609
+ * For more details and code examples, see the {@link ProxyConfiguration} class.
610
+ *
611
+ * ```javascript
612
+ *
613
+ * // Returns initialized proxy configuration class
614
+ * const proxyConfiguration = await Actor.createProxyConfiguration({
615
+ * groups: ['GROUP1', 'GROUP2'] // List of Apify proxy groups
616
+ * countryCode: 'US'
617
+ * });
618
+ *
619
+ * const crawler = new CheerioCrawler({
620
+ * // ...
621
+ * proxyConfiguration,
622
+ * handlePageFunction: ({ proxyInfo }) => {
623
+ * const usedProxyUrl = proxyInfo.url; // Getting the proxy URL
624
+ * }
625
+ * })
626
+ *
627
+ * ```
628
+ *
629
+ * For compatibility with existing Actor Input UI (Input Schema), this function
630
+ * returns `undefined` when the following object is passed as `proxyConfigurationOptions`.
631
+ *
632
+ * ```
633
+ * { useApifyProxy: false }
634
+ * ```
635
+ * @ignore
636
+ */
637
+ async createProxyConfiguration(proxyConfigurationOptions = {}) {
638
+ // Compatibility fix for Input UI where proxy: None returns { useApifyProxy: false }
639
+ // Without this, it would cause proxy to use the zero config / auto mode.
640
+ const dontUseApifyProxy = proxyConfigurationOptions.useApifyProxy === false;
641
+ const dontUseCustomProxies = !proxyConfigurationOptions.proxyUrls;
642
+ if (dontUseApifyProxy && dontUseCustomProxies) {
643
+ return undefined;
644
+ }
645
+ const proxyConfiguration = new proxy_configuration_1.ProxyConfiguration(proxyConfigurationOptions, this.config);
646
+ await proxyConfiguration.initialize();
647
+ return proxyConfiguration;
648
+ }
649
+ /**
650
+ * Returns a new {@link ApifyEnv} object which contains information parsed from all the `APIFY_XXX` environment variables.
651
+ *
652
+ * For the list of the `APIFY_XXX` environment variables, see
653
+ * [Actor documentation](https://docs.apify.com/actor/run#environment-variables).
654
+ * If some variables are not defined or are invalid, the corresponding value in the resulting object will be null.
655
+ * @ignore
656
+ */
657
+ getEnv() {
658
+ // NOTE: Don't throw if env vars are invalid to simplify local development and debugging of actors
659
+ const env = process.env || {};
660
+ const envVars = {};
661
+ for (const [shortName, fullName] of Object.entries(consts_1.ENV_VARS)) {
662
+ const camelCaseName = (0, utils_1.snakeCaseToCamelCase)(shortName);
663
+ let value = env[fullName];
664
+ // Parse dates and integers.
665
+ if (value && fullName.endsWith('_AT')) {
666
+ const unix = Date.parse(value);
667
+ value = unix > 0 ? new Date(unix) : undefined;
668
+ }
669
+ else if (consts_1.INTEGER_ENV_VARS.includes(fullName)) {
670
+ value = parseInt(value, 10);
671
+ }
672
+ Reflect.set(envVars, camelCaseName, value || value === 0 ? value : null);
673
+ }
674
+ return envVars;
675
+ }
676
+ /**
677
+ * Returns a new instance of the Apify API client. The `ApifyClient` class is provided
678
+ * by the [apify-client](https://www.npmjs.com/package/apify-client)
679
+ * NPM package, and it is automatically configured using the `APIFY_API_BASE_URL`, and `APIFY_TOKEN`
680
+ * environment variables. You can override the token via the available options. That's useful
681
+ * if you want to use the client as a different Apify user than the SDK internals are using.
682
+ * @ignore
683
+ */
684
+ newClient(options = {}) {
685
+ const { storageDir, ...storageClientOptions } = this.config.get('storageClientOptions');
686
+ return new apify_client_1.ApifyClient({
687
+ baseUrl: process.env[consts_1.ENV_VARS.API_BASE_URL] ?? 'https://api.apify.com',
688
+ token: process.env[consts_1.ENV_VARS.TOKEN],
689
+ ...storageClientOptions,
690
+ ...options, // allow overriding the instance configuration
691
+ });
692
+ }
693
+ /**
694
+ * Returns `true` when code is running on Apify platform and `false` otherwise (for example locally).
695
+ * @ignore
696
+ */
697
+ isAtHome() {
698
+ return !!process.env[consts_1.ENV_VARS.IS_AT_HOME];
699
+ }
700
+ /**
701
+ * Runs the main user function that performs the job of the actor
702
+ * and terminates the process when the user function finishes.
703
+ *
704
+ * **The `Actor.main()` function is optional** and is provided merely for your convenience.
705
+ * It is mainly useful when you're running your code as an actor on the [Apify platform](https://apify.com/actors).
706
+ * However, if you want to use Apify SDK tools directly inside your existing projects, e.g.
707
+ * running in an [Express](https://expressjs.com/) server, on
708
+ * [Google Cloud functions](https://cloud.google.com/functions)
709
+ * or [AWS Lambda](https://aws.amazon.com/lambda/), it's better to avoid
710
+ * it since the function terminates the main process when it finishes!
711
+ *
712
+ * The `Actor.main()` function performs the following actions:
713
+ *
714
+ * - When running on the Apify platform (i.e. `APIFY_IS_AT_HOME` environment variable is set),
715
+ * it sets up a connection to listen for platform events.
716
+ * For example, to get a notification about an imminent migration to another server.
717
+ * See {@link Actor.events} for details.
718
+ * - It checks that either `APIFY_TOKEN` or `APIFY_LOCAL_STORAGE_DIR` environment variable
719
+ * is defined. If not, the functions sets `APIFY_LOCAL_STORAGE_DIR` to `./apify_storage`
720
+ * inside the current working directory. This is to simplify running code examples.
721
+ * - It invokes the user function passed as the `userFunc` parameter.
722
+ * - If the user function returned a promise, waits for it to resolve.
723
+ * - If the user function throws an exception or some other error is encountered,
724
+ * prints error details to console so that they are stored to the log.
725
+ * - Exits the Node.js process, with zero exit code on success and non-zero on errors.
726
+ *
727
+ * The user function can be synchronous:
728
+ *
729
+ * ```javascript
730
+ * Actor.main(() => {
731
+ * // My synchronous function that returns immediately
732
+ * console.log('Hello world from actor!');
733
+ * });
734
+ * ```
735
+ *
736
+ * If the user function returns a promise, it is considered asynchronous:
737
+ * ```javascript
738
+ * const { gotScraping } = require('got-scraping');
739
+ *
740
+ * Actor.main(() => {
741
+ * // My asynchronous function that returns a promise
742
+ * return gotScraping('http://www.example.com').then((html) => {
743
+ * console.log(html);
744
+ * });
745
+ * });
746
+ * ```
747
+ *
748
+ * To simplify your code, you can take advantage of the `async`/`await` keywords:
749
+ *
750
+ * ```javascript
751
+ * const { gotScraping } = require('got-scraping');
752
+ *
753
+ * Actor.main(async () => {
754
+ * // My asynchronous function
755
+ * const html = await gotScraping('http://www.example.com');
756
+ * console.log(html);
757
+ * });
758
+ * ```
759
+ *
760
+ * @param userFunc User function to be executed. If it returns a promise,
761
+ * the promise will be awaited. The user function is called with no arguments.
762
+ * @param options
763
+ */
764
+ static main(userFunc, options) {
765
+ return Actor.getDefaultInstance().main(userFunc, options);
766
+ }
767
+ static async init() {
768
+ return Actor.getDefaultInstance().init();
769
+ }
770
+ static async exit(options = {}) {
771
+ return Actor.getDefaultInstance().exit(options);
772
+ }
773
+ static on(event, listener) {
774
+ Actor.getDefaultInstance().on(event, listener);
775
+ }
776
+ static off(event, listener) {
777
+ Actor.getDefaultInstance().off(event, listener);
778
+ }
779
+ /**
780
+ * Runs an actor on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable),
781
+ * waits for the actor to finish and fetches its output.
782
+ *
783
+ * By passing the `waitSecs` option you can reduce the maximum amount of time to wait for the run to finish.
784
+ * If the value is less than or equal to zero, the function returns immediately after the run is started.
785
+ *
786
+ * The result of the function is an {@link ActorRun} object
787
+ * that contains details about the actor run and its output (if any).
788
+ *
789
+ * If you want to run an actor task rather than an actor, please use the
790
+ * {@link Actor.callTask} function instead.
791
+ *
792
+ * For more information about actors, read the
793
+ * [documentation](https://docs.apify.com/actor).
794
+ *
795
+ * **Example usage:**
796
+ *
797
+ * ```javascript
798
+ * const run = await Actor.call('apify/hello-world', { myInput: 123 });
799
+ * console.log(`Received message: ${run.output.body.message}`);
800
+ * ```
801
+ *
802
+ * Internally, the `call()` function invokes the
803
+ * [Run actor](https://apify.com/docs/api/v2#/reference/actors/run-collection/run-actor)
804
+ * and several other API endpoints to obtain the output.
805
+ *
806
+ * @param actId
807
+ * Allowed formats are `username/actor-name`, `userId/actor-name` or actor ID.
808
+ * @param [input]
809
+ * Input for the actor. If it is an object, it will be stringified to
810
+ * JSON and its content type set to `application/json; charset=utf-8`.
811
+ * Otherwise the `options.contentType` parameter must be provided.
812
+ * @param [options]
813
+ */
814
+ static async call(actId, input, options = {}) {
815
+ return Actor.getDefaultInstance().call(actId, input, options);
816
+ }
817
+ /**
818
+ * Runs an actor task on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable),
819
+ * waits for the task to finish and fetches its output.
820
+ *
821
+ * By passing the `waitSecs` option you can reduce the maximum amount of time to wait for the run to finish.
822
+ * If the value is less than or equal to zero, the function returns immediately after the run is started.
823
+ *
824
+ * The result of the function is an {@link ActorRun} object
825
+ * that contains details about the actor run and its output (if any).
826
+ *
827
+ * Note that an actor task is a saved input configuration and options for an actor.
828
+ * If you want to run an actor directly rather than an actor task, please use the
829
+ * {@link Actor.call} function instead.
830
+ *
831
+ * For more information about actor tasks, read the [documentation](https://docs.apify.com/tasks).
832
+ *
833
+ * **Example usage:**
834
+ *
835
+ * ```javascript
836
+ * const run = await Actor.callTask('bob/some-task');
837
+ * console.log(`Received message: ${run.output.body.message}`);
838
+ * ```
839
+ *
840
+ * Internally, the `callTask()` function calls the
841
+ * [Run task](https://apify.com/docs/api/v2#/reference/actor-tasks/run-collection/run-task)
842
+ * and several other API endpoints to obtain the output.
843
+ *
844
+ * @param taskId
845
+ * Allowed formats are `username/task-name`, `userId/task-name` or task ID.
846
+ * @param [input]
847
+ * Input overrides for the actor task. If it is an object, it will be stringified to
848
+ * JSON and its content type set to `application/json; charset=utf-8`.
849
+ * Provided input will be merged with actor task input.
850
+ * @param [options]
851
+ */
852
+ static async callTask(taskId, input, options = {}) {
853
+ return Actor.getDefaultInstance().callTask(taskId, input, options);
854
+ }
855
+ /**
856
+ * Transforms this actor run to an actor run of a given actor. The system stops the current container and starts
857
+ * the new container instead. All the default storages are preserved and the new input is stored under the `INPUT-METAMORPH-1` key
858
+ * in the same default key-value store.
859
+ *
860
+ * @param targetActorId
861
+ * Either `username/actor-name` or actor ID of an actor to which we want to metamorph.
862
+ * @param [input]
863
+ * Input for the actor. If it is an object, it will be stringified to
864
+ * JSON and its content type set to `application/json; charset=utf-8`.
865
+ * Otherwise, the `options.contentType` parameter must be provided.
866
+ * @param [options]
867
+ */
868
+ static async metamorph(targetActorId, input, options = {}) {
869
+ return Actor.getDefaultInstance().metamorph(targetActorId, input, options);
870
+ }
871
+ /**
872
+ * Internally reboots this actor run. The system stops the current container and starts
873
+ * a new container with the same run id.
874
+ */
875
+ static async reboot() {
876
+ return Actor.getDefaultInstance().reboot();
877
+ }
878
+ /**
879
+ * Creates an ad-hoc webhook for the current actor run, which lets you receive a notification when the actor run finished or failed.
880
+ * For more information about Apify actor webhooks, please see the [documentation](https://docs.apify.com/webhooks).
881
+ *
882
+ * Note that webhooks are only supported for actors running on the Apify platform.
883
+ * In local environment, the function will print a warning and have no effect.
884
+ *
885
+ * @param options
886
+ * @returns The return value is the Webhook object.
887
+ * For more information, see the [Get webhook](https://apify.com/docs/api/v2#/reference/webhooks/webhook-object/get-webhook) API endpoint.
888
+ */
889
+ static async addWebhook(options) {
890
+ return Actor.getDefaultInstance().addWebhook(options);
891
+ }
892
+ /**
893
+ * Stores an object or an array of objects to the default {@link Dataset} of the current actor run.
894
+ *
895
+ * This is just a convenient shortcut for {@link Dataset.pushData}.
896
+ * For example, calling the following code:
897
+ * ```javascript
898
+ * await Actor.pushData({ myValue: 123 });
899
+ * ```
900
+ *
901
+ * is equivalent to:
902
+ * ```javascript
903
+ * const dataset = await Actor.openDataset();
904
+ * await dataset.pushData({ myValue: 123 });
905
+ * ```
906
+ *
907
+ * For more information, see {@link Actor.openDataset} and {@link Dataset.pushData}
908
+ *
909
+ * **IMPORTANT**: Make sure to use the `await` keyword when calling `pushData()`,
910
+ * otherwise the actor process might finish before the data are stored!
911
+ *
912
+ * @param item Object or array of objects containing data to be stored in the default dataset.
913
+ * The objects must be serializable to JSON and the JSON representation of each object must be smaller than 9MB.
914
+ */
915
+ static async pushData(item) {
916
+ return Actor.getDefaultInstance().pushData(item);
917
+ }
918
+ /**
919
+ * Opens a dataset and returns a promise resolving to an instance of the {@link Dataset} class.
920
+ *
921
+ * Datasets are used to store structured data where each object stored has the same attributes,
922
+ * such as online store products or real estate offers.
923
+ * The actual data is stored either on the local filesystem or in the cloud.
924
+ *
925
+ * For more details and code examples, see the {@link Dataset} class.
926
+ *
927
+ * @param [datasetIdOrName]
928
+ * ID or name of the dataset to be opened. If `null` or `undefined`,
929
+ * the function returns the default dataset associated with the actor run.
930
+ * @param [options]
931
+ */
932
+ static async openDataset(datasetIdOrName, options = {}) {
933
+ return Actor.getDefaultInstance().openDataset(datasetIdOrName, options);
934
+ }
935
+ /**
936
+ * Gets a value from the default {@link KeyValueStore} associated with the current actor run.
937
+ *
938
+ * This is just a convenient shortcut for {@link KeyValueStore.getValue}.
939
+ * For example, calling the following code:
940
+ * ```javascript
941
+ * const value = await Actor.getValue('my-key');
942
+ * ```
943
+ *
944
+ * is equivalent to:
945
+ * ```javascript
946
+ * const store = await Actor.openKeyValueStore();
947
+ * const value = await store.getValue('my-key');
948
+ * ```
949
+ *
950
+ * To store the value to the default key-value store, you can use the {@link Actor.setValue} function.
951
+ *
952
+ * For more information, see {@link Actor.openKeyValueStore}
953
+ * and {@link KeyValueStore.getValue}.
954
+ *
955
+ * @param key Unique record key.
956
+ * @returns
957
+ * Returns a promise that resolves to an object, string
958
+ * or [`Buffer`](https://nodejs.org/api/buffer.html), depending
959
+ * on the MIME content type of the record, or `null`
960
+ * if the record is missing.
961
+ */
962
+ static async getValue(key) {
963
+ return Actor.getDefaultInstance().getValue(key);
964
+ }
965
+ /**
966
+ * Stores or deletes a value in the default {@link KeyValueStore} associated with the current actor run.
967
+ *
968
+ * This is just a convenient shortcut for {@link KeyValueStore.setValue}.
969
+ * For example, calling the following code:
970
+ * ```javascript
971
+ * await Actor.setValue('OUTPUT', { foo: "bar" });
972
+ * ```
973
+ *
974
+ * is equivalent to:
975
+ * ```javascript
976
+ * const store = await Actor.openKeyValueStore();
977
+ * await store.setValue('OUTPUT', { foo: "bar" });
978
+ * ```
979
+ *
980
+ * To get a value from the default key-value store, you can use the {@link Actor.getValue} function.
981
+ *
982
+ * For more information, see {@link Actor.openKeyValueStore}
983
+ * and {@link KeyValueStore.getValue}.
984
+ *
985
+ * @param key
986
+ * Unique record key.
987
+ * @param value
988
+ * Record data, which can be one of the following values:
989
+ * - If `null`, the record in the key-value store is deleted.
990
+ * - If no `options.contentType` is specified, `value` can be any JavaScript object, and it will be stringified to JSON.
991
+ * - If `options.contentType` is set, `value` is taken as is, and it must be a `String` or [`Buffer`](https://nodejs.org/api/buffer.html).
992
+ * For any other value an error will be thrown.
993
+ * @param [options]
994
+ */
995
+ static async setValue(key, value, options = {}) {
996
+ return Actor.getDefaultInstance().setValue(key, value, options);
997
+ }
998
+ /**
999
+ * Gets the actor input value from the default {@link KeyValueStore} associated with the current actor run.
1000
+ *
1001
+ * This is just a convenient shortcut for {@link KeyValueStore.getValue | `keyValueStore.getValue('INPUT')`}.
1002
+ * For example, calling the following code:
1003
+ * ```javascript
1004
+ * const input = await Actor.getInput();
1005
+ * ```
1006
+ *
1007
+ * is equivalent to:
1008
+ * ```javascript
1009
+ * const store = await Actor.openKeyValueStore();
1010
+ * await store.getValue('INPUT');
1011
+ * ```
1012
+ *
1013
+ * Note that the `getInput()` function does not cache the value read from the key-value store.
1014
+ * If you need to use the input multiple times in your actor,
1015
+ * it is far more efficient to read it once and store it locally.
1016
+ *
1017
+ * For more information, see {@link Actor.openKeyValueStore} and {@link KeyValueStore.getValue}.
1018
+ *
1019
+ * @returns
1020
+ * Returns a promise that resolves to an object, string
1021
+ * or [`Buffer`](https://nodejs.org/api/buffer.html), depending
1022
+ * on the MIME content type of the record, or `null`
1023
+ * if the record is missing.
1024
+ */
1025
+ static async getInput() {
1026
+ return Actor.getDefaultInstance().getInput();
1027
+ }
1028
+ /**
1029
+ * Opens a key-value store and returns a promise resolving to an instance of the {@link KeyValueStore} class.
1030
+ *
1031
+ * Key-value stores are used to store records or files, along with their MIME content type.
1032
+ * The records are stored and retrieved using a unique key.
1033
+ * The actual data is stored either on a local filesystem or in the Apify cloud.
1034
+ *
1035
+ * For more details and code examples, see the {@link KeyValueStore} class.
1036
+ *
1037
+ * @param [storeIdOrName]
1038
+ * ID or name of the key-value store to be opened. If `null` or `undefined`,
1039
+ * the function returns the default key-value store associated with the actor run.
1040
+ * @param [options]
1041
+ */
1042
+ static async openKeyValueStore(storeIdOrName, options = {}) {
1043
+ return Actor.getDefaultInstance().openKeyValueStore(storeIdOrName, options);
1044
+ }
1045
+ /**
1046
+ * Opens a request list and returns a promise resolving to an instance
1047
+ * of the {@link RequestList} class that is already initialized.
1048
+ *
1049
+ * {@link RequestList} represents a list of URLs to crawl, which is always stored in memory.
1050
+ * To enable picking up where left off after a process restart, the request list sources
1051
+ * are persisted to the key-value store at initialization of the list. Then, while crawling,
1052
+ * a small state object is regularly persisted to keep track of the crawling status.
1053
+ *
1054
+ * For more details and code examples, see the {@link RequestList} class.
1055
+ *
1056
+ * **Example usage:**
1057
+ *
1058
+ * ```javascript
1059
+ * const sources = [
1060
+ * 'https://www.example.com',
1061
+ * 'https://www.google.com',
1062
+ * 'https://www.bing.com'
1063
+ * ];
1064
+ *
1065
+ * const requestList = await RequestList.open('my-name', sources);
1066
+ * ```
1067
+ *
1068
+ * @param listName
1069
+ * Name of the request list to be opened. Setting a name enables the `RequestList`'s state to be persisted
1070
+ * in the key-value store. This is useful in case of a restart or migration. Since `RequestList` is only
1071
+ * stored in memory, a restart or migration wipes it clean. Setting a name will enable the `RequestList`'s
1072
+ * state to survive those situations and continue where it left off.
1073
+ *
1074
+ * The name will be used as a prefix in key-value store, producing keys such as `NAME-REQUEST_LIST_STATE`
1075
+ * and `NAME-REQUEST_LIST_SOURCES`.
1076
+ *
1077
+ * If `null`, the list will not be persisted and will only be stored in memory. Process restart
1078
+ * will then cause the list to be crawled again from the beginning. We suggest always using a name.
1079
+ * @param sources
1080
+ * An array of sources of URLs for the {@link RequestList}. It can be either an array of strings,
1081
+ * plain objects that define at least the `url` property, or an array of {@link Request} instances.
1082
+ *
1083
+ * **IMPORTANT:** The `sources` array will be consumed (left empty) after {@link RequestList} initializes.
1084
+ * This is a measure to prevent memory leaks in situations when millions of sources are
1085
+ * added.
1086
+ *
1087
+ * Additionally, the `requestsFromUrl` property may be used instead of `url`,
1088
+ * which will instruct {@link RequestList} to download the source URLs from a given remote location.
1089
+ * The URLs will be parsed from the received response. In this case you can limit the URLs
1090
+ * using `regex` parameter containing regular expression pattern for URLs to be included.
1091
+ *
1092
+ * For details, see the {@link RequestListOptions.sources}
1093
+ * @param [options]
1094
+ * The {@link RequestList} options. Note that the `listName` parameter supersedes
1095
+ * the {@link RequestListOptions.persistStateKey} and {@link RequestListOptions.persistRequestsKey}
1096
+ * options and the `sources` parameter supersedes the {@link RequestListOptions.sources} option.
1097
+ */
1098
+ static async openRequestList(listName, sources, options = {}) {
1099
+ return Actor.getDefaultInstance().openRequestList(listName, sources, options);
1100
+ }
1101
+ /**
1102
+ * Opens a request queue and returns a promise resolving to an instance
1103
+ * of the {@link RequestQueue} class.
1104
+ *
1105
+ * {@link RequestQueue} represents a queue of URLs to crawl, which is stored either on local filesystem or in the cloud.
1106
+ * The queue is used for deep crawling of websites, where you start with several URLs and then
1107
+ * recursively follow links to other pages. The data structure supports both breadth-first
1108
+ * and depth-first crawling orders.
1109
+ *
1110
+ * For more details and code examples, see the {@link RequestQueue} class.
1111
+ *
1112
+ * @param [queueIdOrName]
1113
+ * ID or name of the request queue to be opened. If `null` or `undefined`,
1114
+ * the function returns the default request queue associated with the actor run.
1115
+ * @param [options]
1116
+ */
1117
+ static async openRequestQueue(queueIdOrName, options = {}) {
1118
+ return Actor.getDefaultInstance().openRequestQueue(queueIdOrName, options);
1119
+ }
1120
+ /**
1121
+ * Creates a proxy configuration and returns a promise resolving to an instance
1122
+ * of the {@link ProxyConfiguration} class that is already initialized.
1123
+ *
1124
+ * Configures connection to a proxy server with the provided options. Proxy servers are used to prevent target websites from blocking
1125
+ * your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
1126
+ * them to use the selected proxies for all connections.
1127
+ *
1128
+ * For more details and code examples, see the {@link ProxyConfiguration} class.
1129
+ *
1130
+ * ```javascript
1131
+ *
1132
+ * // Returns initialized proxy configuration class
1133
+ * const proxyConfiguration = await Actor.createProxyConfiguration({
1134
+ * groups: ['GROUP1', 'GROUP2'] // List of Apify proxy groups
1135
+ * countryCode: 'US'
1136
+ * });
1137
+ *
1138
+ * const crawler = new CheerioCrawler({
1139
+ * // ...
1140
+ * proxyConfiguration,
1141
+ * handlePageFunction: ({ proxyInfo }) => {
1142
+ * const usedProxyUrl = proxyInfo.url; // Getting the proxy URL
1143
+ * }
1144
+ * })
1145
+ *
1146
+ * ```
1147
+ *
1148
+ * For compatibility with existing Actor Input UI (Input Schema), this function
1149
+ * returns `undefined` when the following object is passed as `proxyConfigurationOptions`.
1150
+ *
1151
+ * ```
1152
+ * { useApifyProxy: false }
1153
+ * ```
1154
+ */
1155
+ static async createProxyConfiguration(proxyConfigurationOptions = {}) {
1156
+ return Actor.getDefaultInstance().createProxyConfiguration(proxyConfigurationOptions);
1157
+ }
1158
+ /**
1159
+ * Returns a new {@link ApifyEnv} object which contains information parsed from all the `APIFY_XXX` environment variables.
1160
+ *
1161
+ * For the list of the `APIFY_XXX` environment variables, see
1162
+ * [Actor documentation](https://docs.apify.com/actor/run#environment-variables).
1163
+ * If some of the variables are not defined or are invalid, the corresponding value in the resulting object will be null.
1164
+ */
1165
+ static getEnv() {
1166
+ return Actor.getDefaultInstance().getEnv();
1167
+ }
1168
+ /**
1169
+ * Returns a new instance of the Apify API client. The `ApifyClient` class is provided
1170
+ * by the [apify-client](https://www.npmjs.com/package/apify-client)
1171
+ * NPM package, and it is automatically configured using the `APIFY_API_BASE_URL`, and `APIFY_TOKEN`
1172
+ * environment variables. You can override the token via the available options. That's useful
1173
+ * if you want to use the client as a different Apify user than the SDK internals are using.
1174
+ */
1175
+ static newClient(options = {}) {
1176
+ return Actor.getDefaultInstance().newClient(options);
1177
+ }
1178
+ /**
1179
+ * Returns `true` when code is running on Apify platform and `false` otherwise (for example locally).
1180
+ */
1181
+ static isAtHome() {
1182
+ return Actor.getDefaultInstance().isAtHome();
1183
+ }
1184
+ /** Default {@link ApifyClient} instance. */
1185
+ static get apifyClient() {
1186
+ return Actor.getDefaultInstance().apifyClient;
1187
+ }
1188
+ /** Default {@link Configuration} instance. */
1189
+ static get config() {
1190
+ return Actor.getDefaultInstance().config;
1191
+ }
1192
+ /** @internal */
1193
+ static getDefaultInstance() {
1194
+ this._instance ?? (this._instance = new Actor());
1195
+ return this._instance;
1196
+ }
1197
+ _openStorage(storageClass, id, options = {}) {
1198
+ const client = options.forceCloud ? this.apifyClient : undefined;
1199
+ return this._getStorageManager(storageClass).openStorage(id, client);
1200
+ }
1201
+ _getStorageManager(storageClass) {
1202
+ if (!this.storageManagers.has(storageClass)) {
1203
+ const manager = new core_1.StorageManager(storageClass, this.config);
1204
+ this.storageManagers.set(storageClass, manager);
1205
+ }
1206
+ return this.storageManagers.get(storageClass);
1207
+ }
1208
+ }
1209
+ exports.Actor = Actor;
1210
+ /**
1211
+ * Exit codes for the actor process.
1212
+ * The error codes must be in the range 1-128, to avoid collision with signal exits
1213
+ * and to ensure Docker will handle them correctly!
1214
+ * @internal should be removed if we decide to remove `Actor.main()`
1215
+ */
1216
+ exports.EXIT_CODES = {
1217
+ SUCCESS: 0,
1218
+ ERROR_USER_FUNCTION_THREW: 91,
1219
+ ERROR_UNKNOWN: 92,
1220
+ };
1221
+ //# sourceMappingURL=actor.js.map