apify 3.1.2-beta.44 → 3.1.2-beta.69
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-copy.log +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/package.json +3 -7
- package/package.json +3 -7
- package/dist/cli.d.ts +0 -3
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js +0 -9
- package/dist/cli.js.map +0 -1
- package/src/actor.ts +0 -1614
- package/src/cli.ts +0 -9
- package/src/configuration.ts +0 -202
- package/src/index.ts +0 -11
- package/src/key_value_store.ts +0 -25
- package/src/platform_event_manager.ts +0 -118
- package/src/proxy_configuration.ts +0 -395
- package/src/utils.ts +0 -38
package/src/actor.ts
DELETED
|
@@ -1,1614 +0,0 @@
|
|
|
1
|
-
import ow from 'ow';
|
|
2
|
-
import { createPrivateKey } from 'node:crypto';
|
|
3
|
-
import { decryptInputSecrets } from '@apify/input_secrets';
|
|
4
|
-
import { ENV_VARS, INTEGER_ENV_VARS } from '@apify/consts';
|
|
5
|
-
import { addTimeoutToPromise } from '@apify/timeout';
|
|
6
|
-
import log from '@apify/log';
|
|
7
|
-
import type {
|
|
8
|
-
ActorStartOptions,
|
|
9
|
-
ApifyClientOptions,
|
|
10
|
-
RunAbortOptions,
|
|
11
|
-
TaskStartOptions,
|
|
12
|
-
Webhook,
|
|
13
|
-
WebhookEventType,
|
|
14
|
-
} from 'apify-client';
|
|
15
|
-
import {
|
|
16
|
-
ActorRun as ClientActorRun,
|
|
17
|
-
ApifyClient,
|
|
18
|
-
} from 'apify-client';
|
|
19
|
-
import type {
|
|
20
|
-
ConfigurationOptions,
|
|
21
|
-
EventManager,
|
|
22
|
-
EventTypeName,
|
|
23
|
-
IStorage,
|
|
24
|
-
RecordOptions,
|
|
25
|
-
UseStateOptions,
|
|
26
|
-
} from '@crawlee/core';
|
|
27
|
-
import {
|
|
28
|
-
Configuration as CoreConfiguration,
|
|
29
|
-
Dataset,
|
|
30
|
-
EventType,
|
|
31
|
-
RequestQueue,
|
|
32
|
-
StorageManager,
|
|
33
|
-
purgeDefaultStorages,
|
|
34
|
-
} from '@crawlee/core';
|
|
35
|
-
import type { Awaitable, Constructor, Dictionary, StorageClient } from '@crawlee/types';
|
|
36
|
-
import { sleep, snakeCaseToCamelCase } from '@crawlee/utils';
|
|
37
|
-
import { logSystemInfo, printOutdatedSdkWarning } from './utils';
|
|
38
|
-
import { PlatformEventManager } from './platform_event_manager';
|
|
39
|
-
import type { ProxyConfigurationOptions } from './proxy_configuration';
|
|
40
|
-
import { ProxyConfiguration } from './proxy_configuration';
|
|
41
|
-
import { KeyValueStore } from './key_value_store';
|
|
42
|
-
import { Configuration } from './configuration';
|
|
43
|
-
|
|
44
|
-
/**
|
|
45
|
-
* `Actor` class serves as an alternative approach to the static helpers exported from the package. It allows to pass configuration
|
|
46
|
-
* that will be used on the instance methods. Environment variables will have precedence over this configuration.
|
|
47
|
-
* See {@apilink Configuration} for details about what can be configured and what are the default values.
|
|
48
|
-
*/
|
|
49
|
-
export class Actor<Data extends Dictionary = Dictionary> {
|
|
50
|
-
/** @internal */
|
|
51
|
-
static _instance: Actor;
|
|
52
|
-
|
|
53
|
-
/**
|
|
54
|
-
* Configuration of this SDK instance (provided to its constructor). See {@apilink Configuration} for details.
|
|
55
|
-
* @internal
|
|
56
|
-
*/
|
|
57
|
-
readonly config: Configuration;
|
|
58
|
-
|
|
59
|
-
/**
|
|
60
|
-
* Default {@apilink ApifyClient} instance.
|
|
61
|
-
* @internal
|
|
62
|
-
*/
|
|
63
|
-
readonly apifyClient: ApifyClient;
|
|
64
|
-
|
|
65
|
-
/**
|
|
66
|
-
* Default {@apilink EventManager} instance.
|
|
67
|
-
* @internal
|
|
68
|
-
*/
|
|
69
|
-
readonly eventManager: EventManager;
|
|
70
|
-
|
|
71
|
-
/**
|
|
72
|
-
* Whether the actor instance was initialized. This is set by calling {@apilink Actor.init}.
|
|
73
|
-
*/
|
|
74
|
-
initialized = false;
|
|
75
|
-
|
|
76
|
-
/**
|
|
77
|
-
* Set if the actor called a method that requires the instance to be initialized, but did not do so.
|
|
78
|
-
* A call to `init` after this warning is emitted is considered an invalid state and will throw an error.
|
|
79
|
-
*/
|
|
80
|
-
private warnedAboutMissingInitCall = false;
|
|
81
|
-
|
|
82
|
-
constructor(options: ConfigurationOptions = {}) {
|
|
83
|
-
// use default configuration object if nothing overridden (it fallbacks to env vars)
|
|
84
|
-
this.config = Object.keys(options).length === 0 ? Configuration.getGlobalConfig() : new Configuration(options);
|
|
85
|
-
this.apifyClient = this.newClient();
|
|
86
|
-
this.eventManager = new PlatformEventManager(this.config);
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
/**
|
|
90
|
-
* Runs the main user function that performs the job of the actor
|
|
91
|
-
* and terminates the process when the user function finishes.
|
|
92
|
-
*
|
|
93
|
-
* **The `Actor.main()` function is optional** and is provided merely for your convenience.
|
|
94
|
-
* It is mainly useful when you're running your code as an actor on the [Apify platform](https://apify.com/actors).
|
|
95
|
-
* However, if you want to use Apify SDK tools directly inside your existing projects, e.g.
|
|
96
|
-
* running in an [Express](https://expressjs.com/) server, on
|
|
97
|
-
* [Google Cloud functions](https://cloud.google.com/functions)
|
|
98
|
-
* or [AWS Lambda](https://aws.amazon.com/lambda/), it's better to avoid
|
|
99
|
-
* it since the function terminates the main process when it finishes!
|
|
100
|
-
*
|
|
101
|
-
* The `Actor.main()` function performs the following actions:
|
|
102
|
-
*
|
|
103
|
-
* - When running on the Apify platform (i.e. `APIFY_IS_AT_HOME` environment variable is set),
|
|
104
|
-
* it sets up a connection to listen for platform events.
|
|
105
|
-
* For example, to get a notification about an imminent migration to another server.
|
|
106
|
-
* See {@apilink Actor.events} for details.
|
|
107
|
-
* - It checks that either `APIFY_TOKEN` or `APIFY_LOCAL_STORAGE_DIR` environment variable
|
|
108
|
-
* is defined. If not, the functions sets `APIFY_LOCAL_STORAGE_DIR` to `./apify_storage`
|
|
109
|
-
* inside the current working directory. This is to simplify running code examples.
|
|
110
|
-
* - It invokes the user function passed as the `userFunc` parameter.
|
|
111
|
-
* - If the user function returned a promise, waits for it to resolve.
|
|
112
|
-
* - If the user function throws an exception or some other error is encountered,
|
|
113
|
-
* prints error details to console so that they are stored to the log.
|
|
114
|
-
* - Exits the Node.js process, with zero exit code on success and non-zero on errors.
|
|
115
|
-
*
|
|
116
|
-
* The user function can be synchronous:
|
|
117
|
-
*
|
|
118
|
-
* ```javascript
|
|
119
|
-
* await Actor.main(() => {
|
|
120
|
-
* // My synchronous function that returns immediately
|
|
121
|
-
* console.log('Hello world from actor!');
|
|
122
|
-
* });
|
|
123
|
-
* ```
|
|
124
|
-
*
|
|
125
|
-
* If the user function returns a promise, it is considered asynchronous:
|
|
126
|
-
* ```javascript
|
|
127
|
-
* import { gotScraping } from 'got-scraping';
|
|
128
|
-
*
|
|
129
|
-
* await Actor.main(() => {
|
|
130
|
-
* // My asynchronous function that returns a promise
|
|
131
|
-
* return gotScraping('http://www.example.com').then((html) => {
|
|
132
|
-
* console.log(html);
|
|
133
|
-
* });
|
|
134
|
-
* });
|
|
135
|
-
* ```
|
|
136
|
-
*
|
|
137
|
-
* To simplify your code, you can take advantage of the `async`/`await` keywords:
|
|
138
|
-
*
|
|
139
|
-
* ```javascript
|
|
140
|
-
* import { gotScraping } from 'got-scraping';
|
|
141
|
-
*
|
|
142
|
-
* await Actor.main(async () => {
|
|
143
|
-
* // My asynchronous function
|
|
144
|
-
* const html = await request('http://www.example.com');
|
|
145
|
-
* console.log(html);
|
|
146
|
-
* });
|
|
147
|
-
* ```
|
|
148
|
-
*
|
|
149
|
-
* @param userFunc User function to be executed. If it returns a promise,
|
|
150
|
-
* the promise will be awaited. The user function is called with no arguments.
|
|
151
|
-
* @param options
|
|
152
|
-
* @ignore
|
|
153
|
-
*/
|
|
154
|
-
main<T>(userFunc: UserFunc, options?: MainOptions): Promise<T> {
|
|
155
|
-
if (!userFunc || typeof userFunc !== 'function') {
|
|
156
|
-
throw new Error(`First parameter for Actor.main() must be a function (was '${userFunc === null ? 'null' : typeof userFunc}').`);
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
return (async () => {
|
|
160
|
-
await this.init(options);
|
|
161
|
-
let ret: T;
|
|
162
|
-
|
|
163
|
-
try {
|
|
164
|
-
ret = await userFunc() as T;
|
|
165
|
-
await this.exit(options);
|
|
166
|
-
} catch (err: any) {
|
|
167
|
-
log.exception(err, err.message);
|
|
168
|
-
await this.exit({ exitCode: EXIT_CODES.ERROR_USER_FUNCTION_THREW });
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
return ret!;
|
|
172
|
-
})();
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
/**
|
|
176
|
-
* @ignore
|
|
177
|
-
*/
|
|
178
|
-
async init(options: InitOptions = {}): Promise<void> {
|
|
179
|
-
if (this.initialized) {
|
|
180
|
-
return;
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
// If the warning about forgotten init call was emitted, we will not continue the init procedure.
|
|
184
|
-
if (this.warnedAboutMissingInitCall) {
|
|
185
|
-
throw new Error([
|
|
186
|
-
'Actor.init() was called after a method that would access a storage client was used.',
|
|
187
|
-
'This in an invalid state. Please make sure to call Actor.init() before such methods are called.',
|
|
188
|
-
].join('\n'));
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
this.initialized = true;
|
|
192
|
-
|
|
193
|
-
logSystemInfo();
|
|
194
|
-
printOutdatedSdkWarning();
|
|
195
|
-
|
|
196
|
-
// reset global config instance to respect APIFY_ prefixed env vars
|
|
197
|
-
CoreConfiguration.globalConfig = Configuration.getGlobalConfig();
|
|
198
|
-
|
|
199
|
-
if (this.isAtHome()) {
|
|
200
|
-
this.config.set('availableMemoryRatio', 1);
|
|
201
|
-
this.config.set('disableBrowserSandbox', true); // for browser launcher, adds `--no-sandbox` to args
|
|
202
|
-
this.config.useStorageClient(this.apifyClient);
|
|
203
|
-
this.config.useEventManager(this.eventManager);
|
|
204
|
-
} else if (options.storage) {
|
|
205
|
-
this.config.useStorageClient(options.storage);
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
// Init the event manager the config uses
|
|
209
|
-
await this.config.getEventManager().init();
|
|
210
|
-
|
|
211
|
-
await purgeDefaultStorages(this.config);
|
|
212
|
-
Configuration.storage.enterWith(this.config);
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
/**
|
|
216
|
-
* @ignore
|
|
217
|
-
*/
|
|
218
|
-
async exit(messageOrOptions?: string | ExitOptions, options: ExitOptions = {}): Promise<void> {
|
|
219
|
-
options = typeof messageOrOptions === 'string' ? { ...options, statusMessage: messageOrOptions } : { ...messageOrOptions, ...options };
|
|
220
|
-
options.exit ??= true;
|
|
221
|
-
options.exitCode ??= EXIT_CODES.SUCCESS;
|
|
222
|
-
options.timeoutSecs ??= 30;
|
|
223
|
-
|
|
224
|
-
// Close the event manager and emit the final PERSIST_STATE event
|
|
225
|
-
await this.config.getEventManager().close();
|
|
226
|
-
|
|
227
|
-
// Emit the exit event
|
|
228
|
-
this.config.getEventManager().emit(EventType.EXIT, options);
|
|
229
|
-
|
|
230
|
-
// Wait for all event listeners to be processed
|
|
231
|
-
log.debug(`Waiting for all event listeners to complete their execution (with ${options.timeoutSecs} seconds timeout)`);
|
|
232
|
-
await addTimeoutToPromise(
|
|
233
|
-
() => this.config.getEventManager().waitForAllListenersToComplete(),
|
|
234
|
-
options.timeoutSecs * 1000,
|
|
235
|
-
`Waiting for all event listeners to complete their execution timed out after ${options.timeoutSecs} seconds`,
|
|
236
|
-
);
|
|
237
|
-
|
|
238
|
-
const client = this.config.getStorageClient();
|
|
239
|
-
|
|
240
|
-
if (client.teardown) {
|
|
241
|
-
let finished = false;
|
|
242
|
-
setTimeout(() => {
|
|
243
|
-
if (!finished) {
|
|
244
|
-
log.info('Waiting for the storage to write its state to file system.');
|
|
245
|
-
}
|
|
246
|
-
}, 1000);
|
|
247
|
-
await client.teardown();
|
|
248
|
-
finished = true;
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
if (options.exitCode > 0) {
|
|
252
|
-
options.statusMessage ??= `Actor finished with an error (exit code ${options.exitCode})`;
|
|
253
|
-
log.error(options.statusMessage);
|
|
254
|
-
} else {
|
|
255
|
-
options.statusMessage ??= `Actor finished successfully (exit code ${options.exitCode})`;
|
|
256
|
-
log.info(options.statusMessage);
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
if (!options.exit) {
|
|
260
|
-
return;
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
process.exit(options.exitCode);
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
/**
|
|
267
|
-
* @ignore
|
|
268
|
-
*/
|
|
269
|
-
async fail(messageOrOptions?: string | ExitOptions, options: ExitOptions = {}): Promise<void> {
|
|
270
|
-
return this.exit(messageOrOptions, { exitCode: 1, ...options });
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
/**
|
|
274
|
-
* @ignore
|
|
275
|
-
*/
|
|
276
|
-
on(event: EventTypeName, listener: (...args: any[]) => any): void {
|
|
277
|
-
this.config.getEventManager().on(event, listener);
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
/**
|
|
281
|
-
* @ignore
|
|
282
|
-
*/
|
|
283
|
-
off(event: EventTypeName, listener?: (...args: any[]) => any): void {
|
|
284
|
-
this.config.getEventManager().off(event, listener);
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
/**
|
|
288
|
-
* Runs an actor on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable).
|
|
289
|
-
*
|
|
290
|
-
* The result of the function is an {@apilink ActorRun} object that contains details about the actor run.
|
|
291
|
-
*
|
|
292
|
-
* If you want to run an actor task rather than an actor, please use the {@apilink Actor.callTask} function instead.
|
|
293
|
-
*
|
|
294
|
-
* For more information about actors, read the [documentation](https://docs.apify.com/actor).
|
|
295
|
-
*
|
|
296
|
-
* **Example usage:**
|
|
297
|
-
*
|
|
298
|
-
* ```javascript
|
|
299
|
-
* const run = await Actor.call('apify/hello-world', { myInput: 123 });
|
|
300
|
-
* ```
|
|
301
|
-
*
|
|
302
|
-
* @param actorId
|
|
303
|
-
* Allowed formats are `username/actor-name`, `userId/actor-name` or actor ID.
|
|
304
|
-
* @param [input]
|
|
305
|
-
* Input for the actor. If it is an object, it will be stringified to
|
|
306
|
-
* JSON and its content type set to `application/json; charset=utf-8`.
|
|
307
|
-
* Otherwise the `options.contentType` parameter must be provided.
|
|
308
|
-
* @param [options]
|
|
309
|
-
* @ignore
|
|
310
|
-
*/
|
|
311
|
-
async call(actorId: string, input?: unknown, options: CallOptions = {}): Promise<ClientActorRun> {
|
|
312
|
-
const { token, ...rest } = options;
|
|
313
|
-
const client = token ? this.newClient({ token }) : this.apifyClient;
|
|
314
|
-
|
|
315
|
-
return client.actor(actorId).call(input, rest);
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
/**
|
|
319
|
-
* Runs an actor on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable),
|
|
320
|
-
* unlike `Actor.call`, this method just starts the run without waiting for finish.
|
|
321
|
-
*
|
|
322
|
-
* The result of the function is an {@apilink ActorRun} object that contains details about the actor run.
|
|
323
|
-
*
|
|
324
|
-
* For more information about actors, read the
|
|
325
|
-
* [documentation](https://docs.apify.com/actor).
|
|
326
|
-
*
|
|
327
|
-
* **Example usage:**
|
|
328
|
-
*
|
|
329
|
-
* ```javascript
|
|
330
|
-
* const run = await Actor.start('apify/hello-world', { myInput: 123 });
|
|
331
|
-
* ```
|
|
332
|
-
*
|
|
333
|
-
* @param actorId
|
|
334
|
-
* Allowed formats are `username/actor-name`, `userId/actor-name` or actor ID.
|
|
335
|
-
* @param [input]
|
|
336
|
-
* Input for the actor. If it is an object, it will be stringified to
|
|
337
|
-
* JSON and its content type set to `application/json; charset=utf-8`.
|
|
338
|
-
* Otherwise the `options.contentType` parameter must be provided.
|
|
339
|
-
* @param [options]
|
|
340
|
-
* @ignore
|
|
341
|
-
*/
|
|
342
|
-
async start(actorId: string, input?: unknown, options: CallOptions = {}): Promise<ClientActorRun> {
|
|
343
|
-
const { token, ...rest } = options;
|
|
344
|
-
const client = token ? this.newClient({ token }) : this.apifyClient;
|
|
345
|
-
|
|
346
|
-
return client.actor(actorId).start(input, rest);
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
/**
|
|
350
|
-
* Aborts given actor run on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable).
|
|
351
|
-
*
|
|
352
|
-
* The result of the function is an {@apilink ActorRun} object that contains details about the actor run.
|
|
353
|
-
*
|
|
354
|
-
* For more information about actors, read the
|
|
355
|
-
* [documentation](https://docs.apify.com/actor).
|
|
356
|
-
*
|
|
357
|
-
* **Example usage:**
|
|
358
|
-
*
|
|
359
|
-
* ```javascript
|
|
360
|
-
* const run = await Actor.abort(runId);
|
|
361
|
-
* ```
|
|
362
|
-
* @ignore
|
|
363
|
-
*/
|
|
364
|
-
async abort(runId: string, options: AbortOptions = {}): Promise<ClientActorRun> {
|
|
365
|
-
const { token, statusMessage, ...rest } = options;
|
|
366
|
-
const client = token ? this.newClient({ token }) : this.apifyClient;
|
|
367
|
-
|
|
368
|
-
if (statusMessage) {
|
|
369
|
-
await this.setStatusMessage(statusMessage);
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
return client.run(runId).abort(rest);
|
|
373
|
-
}
|
|
374
|
-
|
|
375
|
-
/**
|
|
376
|
-
* Runs an actor task on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable).
|
|
377
|
-
*
|
|
378
|
-
* The result of the function is an {@apilink ActorRun} object that contains details about the actor run.
|
|
379
|
-
*
|
|
380
|
-
* Note that an actor task is a saved input configuration and options for an actor.
|
|
381
|
-
* If you want to run an actor directly rather than an actor task, please use the
|
|
382
|
-
* {@apilink Actor.call} function instead.
|
|
383
|
-
*
|
|
384
|
-
* For more information about actor tasks, read the [documentation](https://docs.apify.com/tasks).
|
|
385
|
-
*
|
|
386
|
-
* **Example usage:**
|
|
387
|
-
*
|
|
388
|
-
* ```javascript
|
|
389
|
-
* const run = await Actor.callTask('bob/some-task');
|
|
390
|
-
* ```
|
|
391
|
-
*
|
|
392
|
-
* @param taskId
|
|
393
|
-
* Allowed formats are `username/task-name`, `userId/task-name` or task ID.
|
|
394
|
-
* @param [input]
|
|
395
|
-
* Input overrides for the actor task. If it is an object, it will be stringified to
|
|
396
|
-
* JSON and its content type set to `application/json; charset=utf-8`.
|
|
397
|
-
* Provided input will be merged with actor task input.
|
|
398
|
-
* @param [options]
|
|
399
|
-
* @ignore
|
|
400
|
-
*/
|
|
401
|
-
async callTask(taskId: string, input?: Dictionary, options: CallTaskOptions = {}): Promise<ClientActorRun> {
|
|
402
|
-
const { token, ...rest } = options;
|
|
403
|
-
const client = token ? this.newClient({ token }) : this.apifyClient;
|
|
404
|
-
|
|
405
|
-
return client.task(taskId).call(input, rest);
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
/**
|
|
409
|
-
* Transforms this actor run to an actor run of a given actor. The system stops the current container and starts
|
|
410
|
-
* the new container instead. All the default storages are preserved and the new input is stored under the `INPUT-METAMORPH-1` key
|
|
411
|
-
* in the same default key-value store.
|
|
412
|
-
*
|
|
413
|
-
* @param targetActorId
|
|
414
|
-
* Either `username/actor-name` or actor ID of an actor to which we want to metamorph.
|
|
415
|
-
* @param [input]
|
|
416
|
-
* Input for the actor. If it is an object, it will be stringified to
|
|
417
|
-
* JSON and its content type set to `application/json; charset=utf-8`.
|
|
418
|
-
* Otherwise, the `options.contentType` parameter must be provided.
|
|
419
|
-
* @param [options]
|
|
420
|
-
* @ignore
|
|
421
|
-
*/
|
|
422
|
-
async metamorph(targetActorId: string, input?: unknown, options: MetamorphOptions = {}): Promise<void> {
|
|
423
|
-
if (!this.isAtHome()) {
|
|
424
|
-
log.warning('Actor.metamorph() is only supported when running on the Apify platform.');
|
|
425
|
-
return;
|
|
426
|
-
}
|
|
427
|
-
|
|
428
|
-
const {
|
|
429
|
-
customAfterSleepMillis = this.config.get('metamorphAfterSleepMillis'),
|
|
430
|
-
...metamorphOpts
|
|
431
|
-
} = options;
|
|
432
|
-
const runId = this.config.get('actorRunId')!;
|
|
433
|
-
await this.apifyClient.run(runId).metamorph(targetActorId, input, metamorphOpts);
|
|
434
|
-
|
|
435
|
-
// Wait some time for container to be stopped.
|
|
436
|
-
await sleep(customAfterSleepMillis);
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
/**
|
|
440
|
-
* Internally reboots this actor. The system stops the current container and starts
|
|
441
|
-
* a new container with the same run ID.
|
|
442
|
-
*
|
|
443
|
-
* @ignore
|
|
444
|
-
*/
|
|
445
|
-
async reboot(): Promise<void> {
|
|
446
|
-
if (!this.isAtHome()) {
|
|
447
|
-
log.warning('Actor.reboot() is only supported when running on the Apify platform.');
|
|
448
|
-
return;
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
// Waiting for all the listeners to finish, as `.metamorph()` kills the container.
|
|
452
|
-
await Promise.all([
|
|
453
|
-
// `persistState` for individual RequestLists, RequestQueue... instances to be persisted
|
|
454
|
-
...this.config.getEventManager().listeners(EventType.PERSIST_STATE).map((x) => x()),
|
|
455
|
-
// `migrating` to pause Apify crawlers
|
|
456
|
-
...this.config.getEventManager().listeners(EventType.MIGRATING).map((x) => x()),
|
|
457
|
-
]);
|
|
458
|
-
|
|
459
|
-
const actorId = this.config.get('actorId')!;
|
|
460
|
-
await this.metamorph(actorId);
|
|
461
|
-
}
|
|
462
|
-
|
|
463
|
-
/**
|
|
464
|
-
* Creates an ad-hoc webhook for the current actor run, which lets you receive a notification when the actor run finished or failed.
|
|
465
|
-
* For more information about Apify actor webhooks, please see the [documentation](https://docs.apify.com/webhooks).
|
|
466
|
-
*
|
|
467
|
-
* Note that webhooks are only supported for actors running on the Apify platform.
|
|
468
|
-
* In local environment, the function will print a warning and have no effect.
|
|
469
|
-
*
|
|
470
|
-
* @param options
|
|
471
|
-
* @returns The return value is the Webhook object.
|
|
472
|
-
* For more information, see the [Get webhook](https://apify.com/docs/api/v2#/reference/webhooks/webhook-object/get-webhook) API endpoint.
|
|
473
|
-
* @ignore
|
|
474
|
-
*/
|
|
475
|
-
async addWebhook(options: WebhookOptions): Promise<Webhook | undefined> {
|
|
476
|
-
ow(options, ow.object.exactShape({
|
|
477
|
-
eventTypes: ow.array.ofType<WebhookEventType>(ow.string),
|
|
478
|
-
requestUrl: ow.string,
|
|
479
|
-
payloadTemplate: ow.optional.string,
|
|
480
|
-
idempotencyKey: ow.optional.string,
|
|
481
|
-
}));
|
|
482
|
-
|
|
483
|
-
const { eventTypes, requestUrl, payloadTemplate, idempotencyKey } = options;
|
|
484
|
-
|
|
485
|
-
if (!this.isAtHome()) {
|
|
486
|
-
log.warning('Actor.addWebhook() is only supported when running on the Apify platform. The webhook will not be invoked.');
|
|
487
|
-
return undefined;
|
|
488
|
-
}
|
|
489
|
-
|
|
490
|
-
const runId = this.config.get('actorRunId')!;
|
|
491
|
-
if (!runId) {
|
|
492
|
-
throw new Error(`Environment variable ${ENV_VARS.ACTOR_RUN_ID} is not set!`);
|
|
493
|
-
}
|
|
494
|
-
|
|
495
|
-
return this.apifyClient.webhooks().create({
|
|
496
|
-
isAdHoc: true,
|
|
497
|
-
eventTypes,
|
|
498
|
-
condition: {
|
|
499
|
-
actorRunId: runId,
|
|
500
|
-
},
|
|
501
|
-
requestUrl,
|
|
502
|
-
payloadTemplate,
|
|
503
|
-
idempotencyKey,
|
|
504
|
-
});
|
|
505
|
-
}
|
|
506
|
-
|
|
507
|
-
/**
|
|
508
|
-
* Sets the status message for the current actor run.
|
|
509
|
-
*
|
|
510
|
-
* @param options
|
|
511
|
-
* @returns The return value is the Run object.
|
|
512
|
-
* For more information, see the [Actor Runs](https://docs.apify.com/api/v2#/reference/actor-runs/) API endpoints.
|
|
513
|
-
* @ignore
|
|
514
|
-
*/
|
|
515
|
-
async setStatusMessage(statusMessage: string): Promise<ClientActorRun> {
|
|
516
|
-
ow(statusMessage, ow.string);
|
|
517
|
-
|
|
518
|
-
const runId = this.config.get('actorRunId')!;
|
|
519
|
-
if (!runId) {
|
|
520
|
-
throw new Error(`Environment variable ${ENV_VARS.ACTOR_RUN_ID} is not set!`);
|
|
521
|
-
}
|
|
522
|
-
|
|
523
|
-
return this.apifyClient.run(runId).update({ statusMessage });
|
|
524
|
-
}
|
|
525
|
-
|
|
526
|
-
/**
|
|
527
|
-
* Stores an object or an array of objects to the default {@apilink Dataset} of the current actor run.
|
|
528
|
-
*
|
|
529
|
-
* This is just a convenient shortcut for {@apilink Dataset.pushData}.
|
|
530
|
-
* For example, calling the following code:
|
|
531
|
-
* ```javascript
|
|
532
|
-
* await Actor.pushData({ myValue: 123 });
|
|
533
|
-
* ```
|
|
534
|
-
*
|
|
535
|
-
* is equivalent to:
|
|
536
|
-
* ```javascript
|
|
537
|
-
* const dataset = await Actor.openDataset();
|
|
538
|
-
* await dataset.pushData({ myValue: 123 });
|
|
539
|
-
* ```
|
|
540
|
-
*
|
|
541
|
-
* For more information, see {@apilink Actor.openDataset} and {@apilink Dataset.pushData}
|
|
542
|
-
*
|
|
543
|
-
* **IMPORTANT**: Make sure to use the `await` keyword when calling `pushData()`,
|
|
544
|
-
* otherwise the actor process might finish before the data are stored!
|
|
545
|
-
*
|
|
546
|
-
* @param item Object or array of objects containing data to be stored in the default dataset.
|
|
547
|
-
* The objects must be serializable to JSON and the JSON representation of each object must be smaller than 9MB.
|
|
548
|
-
* @ignore
|
|
549
|
-
*/
|
|
550
|
-
async pushData(item: Data | Data[]): Promise<void> {
|
|
551
|
-
this._ensureActorInit('pushData');
|
|
552
|
-
|
|
553
|
-
const dataset = await this.openDataset();
|
|
554
|
-
return dataset.pushData(item);
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
/**
|
|
558
|
-
* Opens a dataset and returns a promise resolving to an instance of the {@apilink Dataset} class.
|
|
559
|
-
*
|
|
560
|
-
* Datasets are used to store structured data where each object stored has the same attributes,
|
|
561
|
-
* such as online store products or real estate offers.
|
|
562
|
-
* The actual data is stored either on the local filesystem or in the cloud.
|
|
563
|
-
*
|
|
564
|
-
* For more details and code examples, see the {@apilink Dataset} class.
|
|
565
|
-
*
|
|
566
|
-
* @param [datasetIdOrName]
|
|
567
|
-
* ID or name of the dataset to be opened. If `null` or `undefined`,
|
|
568
|
-
* the function returns the default dataset associated with the actor run.
|
|
569
|
-
* @param [options]
|
|
570
|
-
* @ignore
|
|
571
|
-
*/
|
|
572
|
-
async openDataset(
|
|
573
|
-
datasetIdOrName?: string | null,
|
|
574
|
-
options: OpenStorageOptions = {},
|
|
575
|
-
): Promise<Dataset<Data>> {
|
|
576
|
-
ow(datasetIdOrName, ow.optional.string);
|
|
577
|
-
ow(options, ow.object.exactShape({
|
|
578
|
-
forceCloud: ow.optional.boolean,
|
|
579
|
-
}));
|
|
580
|
-
|
|
581
|
-
this._ensureActorInit('openDataset');
|
|
582
|
-
|
|
583
|
-
return this._openStorage<Dataset<Data>>(Dataset, datasetIdOrName, options);
|
|
584
|
-
}
|
|
585
|
-
|
|
586
|
-
/**
|
|
587
|
-
* Gets a value from the default {@apilink KeyValueStore} associated with the current actor run.
|
|
588
|
-
*
|
|
589
|
-
* This is just a convenient shortcut for {@apilink KeyValueStore.getValue}.
|
|
590
|
-
* For example, calling the following code:
|
|
591
|
-
* ```javascript
|
|
592
|
-
* const value = await Actor.getValue('my-key');
|
|
593
|
-
* ```
|
|
594
|
-
*
|
|
595
|
-
* is equivalent to:
|
|
596
|
-
* ```javascript
|
|
597
|
-
* const store = await Actor.openKeyValueStore();
|
|
598
|
-
* const value = await store.getValue('my-key');
|
|
599
|
-
* ```
|
|
600
|
-
*
|
|
601
|
-
* To store the value to the default key-value store, you can use the {@apilink Actor.setValue} function.
|
|
602
|
-
*
|
|
603
|
-
* For more information, see {@apilink Actor.openKeyValueStore}
|
|
604
|
-
* and {@apilink KeyValueStore.getValue}.
|
|
605
|
-
*
|
|
606
|
-
* @param key Unique record key.
|
|
607
|
-
* @returns
|
|
608
|
-
* Returns a promise that resolves to an object, string
|
|
609
|
-
* or [`Buffer`](https://nodejs.org/api/buffer.html), depending
|
|
610
|
-
* on the MIME content type of the record, or `null`
|
|
611
|
-
* if the record is missing.
|
|
612
|
-
* @ignore
|
|
613
|
-
*/
|
|
614
|
-
async getValue<T = unknown>(key: string): Promise<T | null> {
|
|
615
|
-
this._ensureActorInit('getValue');
|
|
616
|
-
|
|
617
|
-
const store = await this.openKeyValueStore();
|
|
618
|
-
return store.getValue<T>(key);
|
|
619
|
-
}
|
|
620
|
-
|
|
621
|
-
/**
|
|
622
|
-
* Stores or deletes a value in the default {@apilink KeyValueStore} associated with the current actor run.
|
|
623
|
-
*
|
|
624
|
-
* This is just a convenient shortcut for {@apilink KeyValueStore.setValue}.
|
|
625
|
-
* For example, calling the following code:
|
|
626
|
-
* ```javascript
|
|
627
|
-
* await Actor.setValue('OUTPUT', { foo: "bar" });
|
|
628
|
-
* ```
|
|
629
|
-
*
|
|
630
|
-
* is equivalent to:
|
|
631
|
-
* ```javascript
|
|
632
|
-
* const store = await Actor.openKeyValueStore();
|
|
633
|
-
* await store.setValue('OUTPUT', { foo: "bar" });
|
|
634
|
-
* ```
|
|
635
|
-
*
|
|
636
|
-
* To get a value from the default key-value store, you can use the {@apilink Actor.getValue} function.
|
|
637
|
-
*
|
|
638
|
-
* For more information, see {@apilink Actor.openKeyValueStore}
|
|
639
|
-
* and {@apilink KeyValueStore.getValue}.
|
|
640
|
-
*
|
|
641
|
-
* @param key
|
|
642
|
-
* Unique record key.
|
|
643
|
-
* @param value
|
|
644
|
-
* Record data, which can be one of the following values:
|
|
645
|
-
* - If `null`, the record in the key-value store is deleted.
|
|
646
|
-
* - If no `options.contentType` is specified, `value` can be any JavaScript object, and it will be stringified to JSON.
|
|
647
|
-
* - If `options.contentType` is set, `value` is taken as is, and it must be a `String` or [`Buffer`](https://nodejs.org/api/buffer.html).
|
|
648
|
-
* For any other value an error will be thrown.
|
|
649
|
-
* @param [options]
|
|
650
|
-
* @ignore
|
|
651
|
-
*/
|
|
652
|
-
async setValue<T>(key: string, value: T | null, options: RecordOptions = {}): Promise<void> {
|
|
653
|
-
this._ensureActorInit('setValue');
|
|
654
|
-
|
|
655
|
-
const store = await this.openKeyValueStore();
|
|
656
|
-
return store.setValue(key, value, options);
|
|
657
|
-
}
|
|
658
|
-
|
|
659
|
-
/**
|
|
660
|
-
* Gets the actor input value from the default {@apilink KeyValueStore} associated with the current actor run.
|
|
661
|
-
*
|
|
662
|
-
* This is just a convenient shortcut for [`keyValueStore.getValue('INPUT')`](core/class/KeyValueStore#getValue).
|
|
663
|
-
* For example, calling the following code:
|
|
664
|
-
* ```javascript
|
|
665
|
-
* const input = await Actor.getInput();
|
|
666
|
-
* ```
|
|
667
|
-
*
|
|
668
|
-
* is equivalent to:
|
|
669
|
-
* ```javascript
|
|
670
|
-
* const store = await Actor.openKeyValueStore();
|
|
671
|
-
* await store.getValue('INPUT');
|
|
672
|
-
* ```
|
|
673
|
-
*
|
|
674
|
-
* Note that the `getInput()` function does not cache the value read from the key-value store.
|
|
675
|
-
* If you need to use the input multiple times in your actor,
|
|
676
|
-
* it is far more efficient to read it once and store it locally.
|
|
677
|
-
*
|
|
678
|
-
* For more information, see {@apilink Actor.openKeyValueStore}
|
|
679
|
-
* and {@apilink KeyValueStore.getValue}.
|
|
680
|
-
*
|
|
681
|
-
* @returns
|
|
682
|
-
* Returns a promise that resolves to an object, string
|
|
683
|
-
* or [`Buffer`](https://nodejs.org/api/buffer.html), depending
|
|
684
|
-
* on the MIME content type of the record, or `null`
|
|
685
|
-
* if the record is missing.
|
|
686
|
-
* @ignore
|
|
687
|
-
*/
|
|
688
|
-
async getInput<T = Dictionary | string | Buffer>(): Promise<T | null> {
|
|
689
|
-
this._ensureActorInit('getInput');
|
|
690
|
-
|
|
691
|
-
const inputSecretsPrivateKeyFile = this.config.get('inputSecretsPrivateKeyFile');
|
|
692
|
-
const inputSecretsPrivateKeyPassphrase = this.config.get('inputSecretsPrivateKeyPassphrase');
|
|
693
|
-
const input = await this.getValue<T>(this.config.get('inputKey'));
|
|
694
|
-
if (ow.isValid(input, ow.object.nonEmpty) && inputSecretsPrivateKeyFile && inputSecretsPrivateKeyPassphrase) {
|
|
695
|
-
const privateKey = createPrivateKey({
|
|
696
|
-
key: Buffer.from(inputSecretsPrivateKeyFile, 'base64'),
|
|
697
|
-
passphrase: inputSecretsPrivateKeyPassphrase,
|
|
698
|
-
});
|
|
699
|
-
return decryptInputSecrets<T>({ input, privateKey });
|
|
700
|
-
}
|
|
701
|
-
return input;
|
|
702
|
-
}
|
|
703
|
-
|
|
704
|
-
/**
|
|
705
|
-
* Opens a key-value store and returns a promise resolving to an instance of the {@apilink KeyValueStore} class.
|
|
706
|
-
*
|
|
707
|
-
* Key-value stores are used to store records or files, along with their MIME content type.
|
|
708
|
-
* The records are stored and retrieved using a unique key.
|
|
709
|
-
* The actual data is stored either on a local filesystem or in the Apify cloud.
|
|
710
|
-
*
|
|
711
|
-
* For more details and code examples, see the {@apilink KeyValueStore} class.
|
|
712
|
-
*
|
|
713
|
-
* @param [storeIdOrName]
|
|
714
|
-
* ID or name of the key-value store to be opened. If `null` or `undefined`,
|
|
715
|
-
* the function returns the default key-value store associated with the actor run.
|
|
716
|
-
* @param [options]
|
|
717
|
-
* @ignore
|
|
718
|
-
*/
|
|
719
|
-
async openKeyValueStore(storeIdOrName?: string | null, options: OpenStorageOptions = {}): Promise<KeyValueStore> {
|
|
720
|
-
ow(storeIdOrName, ow.optional.string);
|
|
721
|
-
ow(options, ow.object.exactShape({
|
|
722
|
-
forceCloud: ow.optional.boolean,
|
|
723
|
-
}));
|
|
724
|
-
|
|
725
|
-
this._ensureActorInit('openKeyValueStore');
|
|
726
|
-
|
|
727
|
-
return this._openStorage(KeyValueStore, storeIdOrName, options);
|
|
728
|
-
}
|
|
729
|
-
|
|
730
|
-
/**
|
|
731
|
-
* Opens a request queue and returns a promise resolving to an instance
|
|
732
|
-
* of the {@apilink RequestQueue} class.
|
|
733
|
-
*
|
|
734
|
-
* {@apilink RequestQueue} represents a queue of URLs to crawl, which is stored either on local filesystem or in the cloud.
|
|
735
|
-
* The queue is used for deep crawling of websites, where you start with several URLs and then
|
|
736
|
-
* recursively follow links to other pages. The data structure supports both breadth-first
|
|
737
|
-
* and depth-first crawling orders.
|
|
738
|
-
*
|
|
739
|
-
* For more details and code examples, see the {@apilink RequestQueue} class.
|
|
740
|
-
*
|
|
741
|
-
* @param [queueIdOrName]
|
|
742
|
-
* ID or name of the request queue to be opened. If `null` or `undefined`,
|
|
743
|
-
* the function returns the default request queue associated with the actor run.
|
|
744
|
-
* @param [options]
|
|
745
|
-
* @ignore
|
|
746
|
-
*/
|
|
747
|
-
async openRequestQueue(queueIdOrName?: string | null, options: OpenStorageOptions = {}): Promise<RequestQueue> {
|
|
748
|
-
ow(queueIdOrName, ow.optional.string);
|
|
749
|
-
ow(options, ow.object.exactShape({
|
|
750
|
-
forceCloud: ow.optional.boolean,
|
|
751
|
-
}));
|
|
752
|
-
|
|
753
|
-
this._ensureActorInit('openRequestQueue');
|
|
754
|
-
|
|
755
|
-
return this._openStorage(RequestQueue, queueIdOrName, options);
|
|
756
|
-
}
|
|
757
|
-
|
|
758
|
-
/**
|
|
759
|
-
* Creates a proxy configuration and returns a promise resolving to an instance
|
|
760
|
-
* of the {@apilink ProxyConfiguration} class that is already initialized.
|
|
761
|
-
*
|
|
762
|
-
* Configures connection to a proxy server with the provided options. Proxy servers are used to prevent target websites from blocking
|
|
763
|
-
* your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
|
|
764
|
-
* them to use the selected proxies for all connections.
|
|
765
|
-
*
|
|
766
|
-
* For more details and code examples, see the {@apilink ProxyConfiguration} class.
|
|
767
|
-
*
|
|
768
|
-
* ```javascript
|
|
769
|
-
*
|
|
770
|
-
* // Returns initialized proxy configuration class
|
|
771
|
-
* const proxyConfiguration = await Actor.createProxyConfiguration({
|
|
772
|
-
* groups: ['GROUP1', 'GROUP2'] // List of Apify proxy groups
|
|
773
|
-
* countryCode: 'US'
|
|
774
|
-
* });
|
|
775
|
-
*
|
|
776
|
-
* const crawler = new CheerioCrawler({
|
|
777
|
-
* // ...
|
|
778
|
-
* proxyConfiguration,
|
|
779
|
-
* requestHandler({ proxyInfo }) {
|
|
780
|
-
* const usedProxyUrl = proxyInfo.url; // Getting the proxy URL
|
|
781
|
-
* }
|
|
782
|
-
* })
|
|
783
|
-
*
|
|
784
|
-
* ```
|
|
785
|
-
*
|
|
786
|
-
* For compatibility with existing Actor Input UI (Input Schema), this function
|
|
787
|
-
* returns `undefined` when the following object is passed as `proxyConfigurationOptions`.
|
|
788
|
-
*
|
|
789
|
-
* ```
|
|
790
|
-
* { useApifyProxy: false }
|
|
791
|
-
* ```
|
|
792
|
-
* @ignore
|
|
793
|
-
*/
|
|
794
|
-
async createProxyConfiguration(
|
|
795
|
-
proxyConfigurationOptions: ProxyConfigurationOptions & { useApifyProxy?: boolean } = {},
|
|
796
|
-
): Promise<ProxyConfiguration | undefined> {
|
|
797
|
-
// Compatibility fix for Input UI where proxy: None returns { useApifyProxy: false }
|
|
798
|
-
// Without this, it would cause proxy to use the zero config / auto mode.
|
|
799
|
-
const { useApifyProxy, ...options } = proxyConfigurationOptions;
|
|
800
|
-
const dontUseApifyProxy = useApifyProxy === false;
|
|
801
|
-
const dontUseCustomProxies = !proxyConfigurationOptions.proxyUrls;
|
|
802
|
-
|
|
803
|
-
if (dontUseApifyProxy && dontUseCustomProxies) {
|
|
804
|
-
return undefined;
|
|
805
|
-
}
|
|
806
|
-
|
|
807
|
-
const proxyConfiguration = new ProxyConfiguration(options, this.config);
|
|
808
|
-
await proxyConfiguration.initialize();
|
|
809
|
-
|
|
810
|
-
return proxyConfiguration;
|
|
811
|
-
}
|
|
812
|
-
|
|
813
|
-
/**
|
|
814
|
-
* Returns a new {@apilink ApifyEnv} object which contains information parsed from all the `APIFY_XXX` environment variables.
|
|
815
|
-
*
|
|
816
|
-
* For the list of the `APIFY_XXX` environment variables, see
|
|
817
|
-
* [Actor documentation](https://docs.apify.com/actor/run#environment-variables).
|
|
818
|
-
* If some variables are not defined or are invalid, the corresponding value in the resulting object will be null.
|
|
819
|
-
* @ignore
|
|
820
|
-
*/
|
|
821
|
-
getEnv(): ApifyEnv {
|
|
822
|
-
// NOTE: Don't throw if env vars are invalid to simplify local development and debugging of actors
|
|
823
|
-
const env = process.env || {};
|
|
824
|
-
const envVars = {} as ApifyEnv;
|
|
825
|
-
|
|
826
|
-
for (const [shortName, fullName] of Object.entries(ENV_VARS)) {
|
|
827
|
-
const camelCaseName = snakeCaseToCamelCase(shortName) as keyof ApifyEnv;
|
|
828
|
-
let value: string | number | Date | undefined = env[fullName];
|
|
829
|
-
|
|
830
|
-
// Parse dates and integers.
|
|
831
|
-
if (value && fullName.endsWith('_AT')) {
|
|
832
|
-
const unix = Date.parse(value);
|
|
833
|
-
value = unix > 0 ? new Date(unix) : undefined;
|
|
834
|
-
} else if ((INTEGER_ENV_VARS as readonly string[]).includes(fullName)) {
|
|
835
|
-
value = parseInt(value!, 10);
|
|
836
|
-
}
|
|
837
|
-
|
|
838
|
-
Reflect.set(envVars, camelCaseName, value || value === 0 ? value : null);
|
|
839
|
-
}
|
|
840
|
-
|
|
841
|
-
return envVars;
|
|
842
|
-
}
|
|
843
|
-
|
|
844
|
-
/**
|
|
845
|
-
* Returns a new instance of the Apify API client. The `ApifyClient` class is provided
|
|
846
|
-
* by the [apify-client](https://www.npmjs.com/package/apify-client)
|
|
847
|
-
* NPM package, and it is automatically configured using the `APIFY_API_BASE_URL`, and `APIFY_TOKEN`
|
|
848
|
-
* environment variables. You can override the token via the available options. That's useful
|
|
849
|
-
* if you want to use the client as a different Apify user than the SDK internals are using.
|
|
850
|
-
* @ignore
|
|
851
|
-
*/
|
|
852
|
-
newClient(options: ApifyClientOptions = {}): ApifyClient {
|
|
853
|
-
const { storageDir, ...storageClientOptions } = this.config.get('storageClientOptions') as Dictionary;
|
|
854
|
-
return new ApifyClient({
|
|
855
|
-
baseUrl: this.config.get('apiBaseUrl'),
|
|
856
|
-
token: this.config.get('token'),
|
|
857
|
-
...storageClientOptions,
|
|
858
|
-
...options, // allow overriding the instance configuration
|
|
859
|
-
});
|
|
860
|
-
}
|
|
861
|
-
|
|
862
|
-
/**
|
|
863
|
-
* Returns `true` when code is running on Apify platform and `false` otherwise (for example locally).
|
|
864
|
-
* @ignore
|
|
865
|
-
*/
|
|
866
|
-
isAtHome(): boolean {
|
|
867
|
-
return !!process.env[ENV_VARS.IS_AT_HOME];
|
|
868
|
-
}
|
|
869
|
-
|
|
870
|
-
/**
|
|
871
|
-
* Easily create and manage state values. All state values are automatically persisted.
|
|
872
|
-
*
|
|
873
|
-
* Values can be modified by simply using the assignment operator.
|
|
874
|
-
*
|
|
875
|
-
* @param name The name of the store to use.
|
|
876
|
-
* @param defaultValue If the store does not yet have a value in it, the value will be initialized with the `defaultValue` you provide.
|
|
877
|
-
* @param options An optional object parameter where a custom `keyValueStoreName` and `config` can be passed in.
|
|
878
|
-
*/
|
|
879
|
-
async useState<State extends Dictionary = Dictionary>(
|
|
880
|
-
name?: string,
|
|
881
|
-
defaultValue = {} as State,
|
|
882
|
-
options?: UseStateOptions,
|
|
883
|
-
) {
|
|
884
|
-
const kvStore = await KeyValueStore.open(options?.keyValueStoreName, { config: options?.config || Configuration.getGlobalConfig() });
|
|
885
|
-
return kvStore.getAutoSavedValue<State>(name || 'APIFY_GLOBAL_STATE', defaultValue);
|
|
886
|
-
}
|
|
887
|
-
|
|
888
|
-
/**
|
|
889
|
-
* Easily create and manage state values. All state values are automatically persisted.
|
|
890
|
-
*
|
|
891
|
-
* Values can be modified by simply using the assignment operator.
|
|
892
|
-
*
|
|
893
|
-
* @param name The name of the store to use.
|
|
894
|
-
* @param defaultValue If the store does not yet have a value in it, the value will be initialized with the `defaultValue` you provide.
|
|
895
|
-
* @param options An optional object parameter where a custom `keyValueStoreName` and `config` can be passed in.
|
|
896
|
-
*/
|
|
897
|
-
static async useState<State extends Dictionary = Dictionary>(
|
|
898
|
-
name?: string,
|
|
899
|
-
defaultValue = {} as State,
|
|
900
|
-
options?: UseStateOptions,
|
|
901
|
-
) {
|
|
902
|
-
return Actor.getDefaultInstance().useState<State>(name, defaultValue, options);
|
|
903
|
-
}
|
|
904
|
-
|
|
905
|
-
/**
|
|
906
|
-
* Runs the main user function that performs the job of the actor
|
|
907
|
-
* and terminates the process when the user function finishes.
|
|
908
|
-
*
|
|
909
|
-
* **The `Actor.main()` function is optional** and is provided merely for your convenience.
|
|
910
|
-
* It is mainly useful when you're running your code as an actor on the [Apify platform](https://apify.com/actors).
|
|
911
|
-
* However, if you want to use Apify SDK tools directly inside your existing projects, e.g.
|
|
912
|
-
* running in an [Express](https://expressjs.com/) server, on
|
|
913
|
-
* [Google Cloud functions](https://cloud.google.com/functions)
|
|
914
|
-
* or [AWS Lambda](https://aws.amazon.com/lambda/), it's better to avoid
|
|
915
|
-
* it since the function terminates the main process when it finishes!
|
|
916
|
-
*
|
|
917
|
-
* The `Actor.main()` function performs the following actions:
|
|
918
|
-
*
|
|
919
|
-
* - When running on the Apify platform (i.e. `APIFY_IS_AT_HOME` environment variable is set),
|
|
920
|
-
* it sets up a connection to listen for platform events.
|
|
921
|
-
* For example, to get a notification about an imminent migration to another server.
|
|
922
|
-
* See {@apilink Actor.events} for details.
|
|
923
|
-
* - It checks that either `APIFY_TOKEN` or `APIFY_LOCAL_STORAGE_DIR` environment variable
|
|
924
|
-
* is defined. If not, the functions sets `APIFY_LOCAL_STORAGE_DIR` to `./apify_storage`
|
|
925
|
-
* inside the current working directory. This is to simplify running code examples.
|
|
926
|
-
* - It invokes the user function passed as the `userFunc` parameter.
|
|
927
|
-
* - If the user function returned a promise, waits for it to resolve.
|
|
928
|
-
* - If the user function throws an exception or some other error is encountered,
|
|
929
|
-
* prints error details to console so that they are stored to the log.
|
|
930
|
-
* - Exits the Node.js process, with zero exit code on success and non-zero on errors.
|
|
931
|
-
*
|
|
932
|
-
* The user function can be synchronous:
|
|
933
|
-
*
|
|
934
|
-
* ```javascript
|
|
935
|
-
* await Actor.main(() => {
|
|
936
|
-
* // My synchronous function that returns immediately
|
|
937
|
-
* console.log('Hello world from actor!');
|
|
938
|
-
* });
|
|
939
|
-
* ```
|
|
940
|
-
*
|
|
941
|
-
* If the user function returns a promise, it is considered asynchronous:
|
|
942
|
-
* ```javascript
|
|
943
|
-
* import { gotScraping } from 'got-scraping';
|
|
944
|
-
*
|
|
945
|
-
* await Actor.main(() => {
|
|
946
|
-
* // My asynchronous function that returns a promise
|
|
947
|
-
* return gotScraping('http://www.example.com').then((html) => {
|
|
948
|
-
* console.log(html);
|
|
949
|
-
* });
|
|
950
|
-
* });
|
|
951
|
-
* ```
|
|
952
|
-
*
|
|
953
|
-
* To simplify your code, you can take advantage of the `async`/`await` keywords:
|
|
954
|
-
*
|
|
955
|
-
* ```javascript
|
|
956
|
-
* import { gotScraping } from 'got-scraping';
|
|
957
|
-
*
|
|
958
|
-
* await Actor.main(async () => {
|
|
959
|
-
* // My asynchronous function
|
|
960
|
-
* const html = await gotScraping('http://www.example.com');
|
|
961
|
-
* console.log(html);
|
|
962
|
-
* });
|
|
963
|
-
* ```
|
|
964
|
-
*
|
|
965
|
-
* @param userFunc User function to be executed. If it returns a promise,
|
|
966
|
-
* the promise will be awaited. The user function is called with no arguments.
|
|
967
|
-
* @param options
|
|
968
|
-
*/
|
|
969
|
-
static main<T>(userFunc: UserFunc<T>, options?: MainOptions): Promise<T> {
|
|
970
|
-
return Actor.getDefaultInstance().main<T>(userFunc, options);
|
|
971
|
-
}
|
|
972
|
-
|
|
973
|
-
static async init(options: InitOptions = {}): Promise<void> {
|
|
974
|
-
return Actor.getDefaultInstance().init(options);
|
|
975
|
-
}
|
|
976
|
-
|
|
977
|
-
static async exit(messageOrOptions?: string | ExitOptions, options: ExitOptions = {}): Promise<void> {
|
|
978
|
-
return Actor.getDefaultInstance().exit(messageOrOptions, options);
|
|
979
|
-
}
|
|
980
|
-
|
|
981
|
-
static async fail(messageOrOptions?: string | ExitOptions, options: ExitOptions = {}): Promise<void> {
|
|
982
|
-
return Actor.getDefaultInstance().fail(messageOrOptions, options);
|
|
983
|
-
}
|
|
984
|
-
|
|
985
|
-
static on(event: EventTypeName, listener: (...args: any[]) => any): void {
|
|
986
|
-
Actor.getDefaultInstance().on(event, listener);
|
|
987
|
-
}
|
|
988
|
-
|
|
989
|
-
static off(event: EventTypeName, listener?: (...args: any[]) => any): void {
|
|
990
|
-
Actor.getDefaultInstance().off(event, listener);
|
|
991
|
-
}
|
|
992
|
-
|
|
993
|
-
/**
|
|
994
|
-
* Runs an actor on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable).
|
|
995
|
-
*
|
|
996
|
-
* The result of the function is an {@apilink ActorRun} object that contains details about the actor run.
|
|
997
|
-
*
|
|
998
|
-
* If you want to run an actor task rather than an actor, please use the {@apilink Actor.callTask} function instead.
|
|
999
|
-
*
|
|
1000
|
-
* For more information about actors, read the [documentation](https://docs.apify.com/actor).
|
|
1001
|
-
*
|
|
1002
|
-
* **Example usage:**
|
|
1003
|
-
*
|
|
1004
|
-
* ```javascript
|
|
1005
|
-
* const run = await Actor.call('apify/hello-world', { myInput: 123 });
|
|
1006
|
-
* ```
|
|
1007
|
-
*
|
|
1008
|
-
* @param actorId
|
|
1009
|
-
* Allowed formats are `username/actor-name`, `userId/actor-name` or actor ID.
|
|
1010
|
-
* @param [input]
|
|
1011
|
-
* Input for the actor. If it is an object, it will be stringified to
|
|
1012
|
-
* JSON and its content type set to `application/json; charset=utf-8`.
|
|
1013
|
-
* Otherwise the `options.contentType` parameter must be provided.
|
|
1014
|
-
* @param [options]
|
|
1015
|
-
*/
|
|
1016
|
-
static async call(actorId: string, input?: unknown, options: CallOptions = {}): Promise<ClientActorRun> {
|
|
1017
|
-
return Actor.getDefaultInstance().call(actorId, input, options);
|
|
1018
|
-
}
|
|
1019
|
-
|
|
1020
|
-
/**
|
|
1021
|
-
* Runs an actor task on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable).
|
|
1022
|
-
*
|
|
1023
|
-
* The result of the function is an {@apilink ActorRun} object that contains details about the actor run.
|
|
1024
|
-
*
|
|
1025
|
-
* Note that an actor task is a saved input configuration and options for an actor.
|
|
1026
|
-
* If you want to run an actor directly rather than an actor task, please use the
|
|
1027
|
-
* {@apilink Actor.call} function instead.
|
|
1028
|
-
*
|
|
1029
|
-
* For more information about actor tasks, read the [documentation](https://docs.apify.com/tasks).
|
|
1030
|
-
*
|
|
1031
|
-
* **Example usage:**
|
|
1032
|
-
*
|
|
1033
|
-
* ```javascript
|
|
1034
|
-
* const run = await Actor.callTask('bob/some-task');
|
|
1035
|
-
* ```
|
|
1036
|
-
*
|
|
1037
|
-
* @param taskId
|
|
1038
|
-
* Allowed formats are `username/task-name`, `userId/task-name` or task ID.
|
|
1039
|
-
* @param [input]
|
|
1040
|
-
* Input overrides for the actor task. If it is an object, it will be stringified to
|
|
1041
|
-
* JSON and its content type set to `application/json; charset=utf-8`.
|
|
1042
|
-
* Provided input will be merged with actor task input.
|
|
1043
|
-
* @param [options]
|
|
1044
|
-
*/
|
|
1045
|
-
static async callTask(taskId: string, input?: Dictionary, options: CallTaskOptions = {}): Promise<ClientActorRun> {
|
|
1046
|
-
return Actor.getDefaultInstance().callTask(taskId, input, options);
|
|
1047
|
-
}
|
|
1048
|
-
|
|
1049
|
-
/**
|
|
1050
|
-
* Runs an actor on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable),
|
|
1051
|
-
* unlike `Actor.call`, this method just starts the run without waiting for finish.
|
|
1052
|
-
*
|
|
1053
|
-
* The result of the function is an {@apilink ActorRun} object that contains details about the actor run.
|
|
1054
|
-
*
|
|
1055
|
-
* For more information about actors, read the
|
|
1056
|
-
* [documentation](https://docs.apify.com/actor).
|
|
1057
|
-
*
|
|
1058
|
-
* **Example usage:**
|
|
1059
|
-
*
|
|
1060
|
-
* ```javascript
|
|
1061
|
-
* const run = await Actor.start('apify/hello-world', { myInput: 123 });
|
|
1062
|
-
* ```
|
|
1063
|
-
*
|
|
1064
|
-
* @param actorId
|
|
1065
|
-
* Allowed formats are `username/actor-name`, `userId/actor-name` or actor ID.
|
|
1066
|
-
* @param [input]
|
|
1067
|
-
* Input for the actor. If it is an object, it will be stringified to
|
|
1068
|
-
* JSON and its content type set to `application/json; charset=utf-8`.
|
|
1069
|
-
* Otherwise the `options.contentType` parameter must be provided.
|
|
1070
|
-
* @param [options]
|
|
1071
|
-
*/
|
|
1072
|
-
static async start(actorId: string, input?: Dictionary, options: CallOptions = {}): Promise<ClientActorRun> {
|
|
1073
|
-
return Actor.getDefaultInstance().start(actorId, input, options);
|
|
1074
|
-
}
|
|
1075
|
-
|
|
1076
|
-
/**
|
|
1077
|
-
* Aborts given actor run on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable).
|
|
1078
|
-
*
|
|
1079
|
-
* The result of the function is an {@apilink ActorRun} object that contains details about the actor run.
|
|
1080
|
-
*
|
|
1081
|
-
* For more information about actors, read the
|
|
1082
|
-
* [documentation](https://docs.apify.com/actor).
|
|
1083
|
-
*
|
|
1084
|
-
* **Example usage:**
|
|
1085
|
-
*
|
|
1086
|
-
* ```javascript
|
|
1087
|
-
* const run = await Actor.abort(runId);
|
|
1088
|
-
* ```
|
|
1089
|
-
*/
|
|
1090
|
-
static async abort(runId: string, options: AbortOptions = {}): Promise<ClientActorRun> {
|
|
1091
|
-
return Actor.getDefaultInstance().abort(runId, options);
|
|
1092
|
-
}
|
|
1093
|
-
|
|
1094
|
-
/**
|
|
1095
|
-
* Transforms this actor run to an actor run of a given actor. The system stops the current container and starts
|
|
1096
|
-
* the new container instead. All the default storages are preserved and the new input is stored under the `INPUT-METAMORPH-1` key
|
|
1097
|
-
* in the same default key-value store.
|
|
1098
|
-
*
|
|
1099
|
-
* @param targetActorId
|
|
1100
|
-
* Either `username/actor-name` or actor ID of an actor to which we want to metamorph.
|
|
1101
|
-
* @param [input]
|
|
1102
|
-
* Input for the actor. If it is an object, it will be stringified to
|
|
1103
|
-
* JSON and its content type set to `application/json; charset=utf-8`.
|
|
1104
|
-
* Otherwise, the `options.contentType` parameter must be provided.
|
|
1105
|
-
* @param [options]
|
|
1106
|
-
*/
|
|
1107
|
-
static async metamorph(targetActorId: string, input?: unknown, options: MetamorphOptions = {}): Promise<void> {
|
|
1108
|
-
return Actor.getDefaultInstance().metamorph(targetActorId, input, options);
|
|
1109
|
-
}
|
|
1110
|
-
|
|
1111
|
-
/**
|
|
1112
|
-
* Internally reboots this actor run. The system stops the current container and starts
|
|
1113
|
-
* a new container with the same run id.
|
|
1114
|
-
*/
|
|
1115
|
-
static async reboot(): Promise<void> {
|
|
1116
|
-
return Actor.getDefaultInstance().reboot();
|
|
1117
|
-
}
|
|
1118
|
-
|
|
1119
|
-
/**
|
|
1120
|
-
* Creates an ad-hoc webhook for the current actor run, which lets you receive a notification when the actor run finished or failed.
|
|
1121
|
-
* For more information about Apify actor webhooks, please see the [documentation](https://docs.apify.com/webhooks).
|
|
1122
|
-
*
|
|
1123
|
-
* Note that webhooks are only supported for actors running on the Apify platform.
|
|
1124
|
-
* In local environment, the function will print a warning and have no effect.
|
|
1125
|
-
*
|
|
1126
|
-
* @param options
|
|
1127
|
-
* @returns The return value is the Webhook object.
|
|
1128
|
-
* For more information, see the [Get webhook](https://apify.com/docs/api/v2#/reference/webhooks/webhook-object/get-webhook) API endpoint.
|
|
1129
|
-
*/
|
|
1130
|
-
static async addWebhook(options: WebhookOptions): Promise<Webhook | undefined> {
|
|
1131
|
-
return Actor.getDefaultInstance().addWebhook(options);
|
|
1132
|
-
}
|
|
1133
|
-
|
|
1134
|
-
/**
|
|
1135
|
-
* Sets the status message for the current actor run.
|
|
1136
|
-
*
|
|
1137
|
-
* @param options
|
|
1138
|
-
* @returns The return value is the Run object.
|
|
1139
|
-
* For more information, see the [Actor Runs](https://docs.apify.com/api/v2#/reference/actor-runs/) API endpoints.
|
|
1140
|
-
*/
|
|
1141
|
-
static async setStatusMessage(statusMessage: string): Promise<ClientActorRun> {
|
|
1142
|
-
return Actor.getDefaultInstance().setStatusMessage(statusMessage);
|
|
1143
|
-
}
|
|
1144
|
-
|
|
1145
|
-
/**
|
|
1146
|
-
* Stores an object or an array of objects to the default {@apilink Dataset} of the current actor run.
|
|
1147
|
-
*
|
|
1148
|
-
* This is just a convenient shortcut for {@apilink Dataset.pushData}.
|
|
1149
|
-
* For example, calling the following code:
|
|
1150
|
-
* ```javascript
|
|
1151
|
-
* await Actor.pushData({ myValue: 123 });
|
|
1152
|
-
* ```
|
|
1153
|
-
*
|
|
1154
|
-
* is equivalent to:
|
|
1155
|
-
* ```javascript
|
|
1156
|
-
* const dataset = await Actor.openDataset();
|
|
1157
|
-
* await dataset.pushData({ myValue: 123 });
|
|
1158
|
-
* ```
|
|
1159
|
-
*
|
|
1160
|
-
* For more information, see {@apilink Actor.openDataset} and {@apilink Dataset.pushData}
|
|
1161
|
-
*
|
|
1162
|
-
* **IMPORTANT**: Make sure to use the `await` keyword when calling `pushData()`,
|
|
1163
|
-
* otherwise the actor process might finish before the data are stored!
|
|
1164
|
-
*
|
|
1165
|
-
* @param item Object or array of objects containing data to be stored in the default dataset.
|
|
1166
|
-
* The objects must be serializable to JSON and the JSON representation of each object must be smaller than 9MB.
|
|
1167
|
-
*/
|
|
1168
|
-
static async pushData<Data extends Dictionary = Dictionary>(item: Data | Data[]): Promise<void> {
|
|
1169
|
-
return Actor.getDefaultInstance().pushData(item);
|
|
1170
|
-
}
|
|
1171
|
-
|
|
1172
|
-
/**
|
|
1173
|
-
* Opens a dataset and returns a promise resolving to an instance of the {@apilink Dataset} class.
|
|
1174
|
-
*
|
|
1175
|
-
* Datasets are used to store structured data where each object stored has the same attributes,
|
|
1176
|
-
* such as online store products or real estate offers.
|
|
1177
|
-
* The actual data is stored either on the local filesystem or in the cloud.
|
|
1178
|
-
*
|
|
1179
|
-
* For more details and code examples, see the {@apilink Dataset} class.
|
|
1180
|
-
*
|
|
1181
|
-
* @param [datasetIdOrName]
|
|
1182
|
-
* ID or name of the dataset to be opened. If `null` or `undefined`,
|
|
1183
|
-
* the function returns the default dataset associated with the actor run.
|
|
1184
|
-
* @param [options]
|
|
1185
|
-
*/
|
|
1186
|
-
static async openDataset<Data extends Dictionary = Dictionary>(
|
|
1187
|
-
datasetIdOrName?: string | null, options: OpenStorageOptions = {},
|
|
1188
|
-
): Promise<Dataset<Data>> {
|
|
1189
|
-
return Actor.getDefaultInstance().openDataset(datasetIdOrName, options);
|
|
1190
|
-
}
|
|
1191
|
-
|
|
1192
|
-
/**
|
|
1193
|
-
* Gets a value from the default {@apilink KeyValueStore} associated with the current actor run.
|
|
1194
|
-
*
|
|
1195
|
-
* This is just a convenient shortcut for {@apilink KeyValueStore.getValue}.
|
|
1196
|
-
* For example, calling the following code:
|
|
1197
|
-
* ```javascript
|
|
1198
|
-
* const value = await Actor.getValue('my-key');
|
|
1199
|
-
* ```
|
|
1200
|
-
*
|
|
1201
|
-
* is equivalent to:
|
|
1202
|
-
* ```javascript
|
|
1203
|
-
* const store = await Actor.openKeyValueStore();
|
|
1204
|
-
* const value = await store.getValue('my-key');
|
|
1205
|
-
* ```
|
|
1206
|
-
*
|
|
1207
|
-
* To store the value to the default key-value store, you can use the {@apilink Actor.setValue} function.
|
|
1208
|
-
*
|
|
1209
|
-
* For more information, see {@apilink Actor.openKeyValueStore}
|
|
1210
|
-
* and {@apilink KeyValueStore.getValue}.
|
|
1211
|
-
*
|
|
1212
|
-
* @param key Unique record key.
|
|
1213
|
-
* @returns
|
|
1214
|
-
* Returns a promise that resolves to an object, string
|
|
1215
|
-
* or [`Buffer`](https://nodejs.org/api/buffer.html), depending
|
|
1216
|
-
* on the MIME content type of the record, or `null`
|
|
1217
|
-
* if the record is missing.
|
|
1218
|
-
*/
|
|
1219
|
-
static async getValue<T = unknown>(key: string): Promise<T | null> {
|
|
1220
|
-
return Actor.getDefaultInstance().getValue(key);
|
|
1221
|
-
}
|
|
1222
|
-
|
|
1223
|
-
/**
|
|
1224
|
-
* Stores or deletes a value in the default {@apilink KeyValueStore} associated with the current actor run.
|
|
1225
|
-
*
|
|
1226
|
-
* This is just a convenient shortcut for {@apilink KeyValueStore.setValue}.
|
|
1227
|
-
* For example, calling the following code:
|
|
1228
|
-
* ```javascript
|
|
1229
|
-
* await Actor.setValue('OUTPUT', { foo: "bar" });
|
|
1230
|
-
* ```
|
|
1231
|
-
*
|
|
1232
|
-
* is equivalent to:
|
|
1233
|
-
* ```javascript
|
|
1234
|
-
* const store = await Actor.openKeyValueStore();
|
|
1235
|
-
* await store.setValue('OUTPUT', { foo: "bar" });
|
|
1236
|
-
* ```
|
|
1237
|
-
*
|
|
1238
|
-
* To get a value from the default key-value store, you can use the {@apilink Actor.getValue} function.
|
|
1239
|
-
*
|
|
1240
|
-
* For more information, see {@apilink Actor.openKeyValueStore}
|
|
1241
|
-
* and {@apilink KeyValueStore.getValue}.
|
|
1242
|
-
*
|
|
1243
|
-
* @param key
|
|
1244
|
-
* Unique record key.
|
|
1245
|
-
* @param value
|
|
1246
|
-
* Record data, which can be one of the following values:
|
|
1247
|
-
* - If `null`, the record in the key-value store is deleted.
|
|
1248
|
-
* - If no `options.contentType` is specified, `value` can be any JavaScript object, and it will be stringified to JSON.
|
|
1249
|
-
* - If `options.contentType` is set, `value` is taken as is, and it must be a `String` or [`Buffer`](https://nodejs.org/api/buffer.html).
|
|
1250
|
-
* For any other value an error will be thrown.
|
|
1251
|
-
* @param [options]
|
|
1252
|
-
*/
|
|
1253
|
-
static async setValue<T>(key: string, value: T | null, options: RecordOptions = {}): Promise<void> {
|
|
1254
|
-
return Actor.getDefaultInstance().setValue(key, value, options);
|
|
1255
|
-
}
|
|
1256
|
-
|
|
1257
|
-
/**
|
|
1258
|
-
* Gets the actor input value from the default {@apilink KeyValueStore} associated with the current actor run.
|
|
1259
|
-
*
|
|
1260
|
-
* This is just a convenient shortcut for {@apilink KeyValueStore.getValue | `keyValueStore.getValue('INPUT')`}.
|
|
1261
|
-
* For example, calling the following code:
|
|
1262
|
-
* ```javascript
|
|
1263
|
-
* const input = await Actor.getInput();
|
|
1264
|
-
* ```
|
|
1265
|
-
*
|
|
1266
|
-
* is equivalent to:
|
|
1267
|
-
* ```javascript
|
|
1268
|
-
* const store = await Actor.openKeyValueStore();
|
|
1269
|
-
* await store.getValue('INPUT');
|
|
1270
|
-
* ```
|
|
1271
|
-
*
|
|
1272
|
-
* Note that the `getInput()` function does not cache the value read from the key-value store.
|
|
1273
|
-
* If you need to use the input multiple times in your actor,
|
|
1274
|
-
* it is far more efficient to read it once and store it locally.
|
|
1275
|
-
*
|
|
1276
|
-
* For more information, see {@apilink Actor.openKeyValueStore} and {@apilink KeyValueStore.getValue}.
|
|
1277
|
-
*
|
|
1278
|
-
* @returns
|
|
1279
|
-
* Returns a promise that resolves to an object, string
|
|
1280
|
-
* or [`Buffer`](https://nodejs.org/api/buffer.html), depending
|
|
1281
|
-
* on the MIME content type of the record, or `null`
|
|
1282
|
-
* if the record is missing.
|
|
1283
|
-
*/
|
|
1284
|
-
static async getInput<T = Dictionary | string | Buffer>(): Promise<T | null> {
|
|
1285
|
-
return Actor.getDefaultInstance().getInput();
|
|
1286
|
-
}
|
|
1287
|
-
|
|
1288
|
-
/**
|
|
1289
|
-
* Opens a key-value store and returns a promise resolving to an instance of the {@apilink KeyValueStore} class.
|
|
1290
|
-
*
|
|
1291
|
-
* Key-value stores are used to store records or files, along with their MIME content type.
|
|
1292
|
-
* The records are stored and retrieved using a unique key.
|
|
1293
|
-
* The actual data is stored either on a local filesystem or in the Apify cloud.
|
|
1294
|
-
*
|
|
1295
|
-
* For more details and code examples, see the {@apilink KeyValueStore} class.
|
|
1296
|
-
*
|
|
1297
|
-
* @param [storeIdOrName]
|
|
1298
|
-
* ID or name of the key-value store to be opened. If `null` or `undefined`,
|
|
1299
|
-
* the function returns the default key-value store associated with the actor run.
|
|
1300
|
-
* @param [options]
|
|
1301
|
-
*/
|
|
1302
|
-
static async openKeyValueStore(storeIdOrName?: string | null, options: OpenStorageOptions = {}): Promise<KeyValueStore> {
|
|
1303
|
-
return Actor.getDefaultInstance().openKeyValueStore(storeIdOrName, options);
|
|
1304
|
-
}
|
|
1305
|
-
|
|
1306
|
-
/**
|
|
1307
|
-
* Opens a request queue and returns a promise resolving to an instance
|
|
1308
|
-
* of the {@apilink RequestQueue} class.
|
|
1309
|
-
*
|
|
1310
|
-
* {@apilink RequestQueue} represents a queue of URLs to crawl, which is stored either on local filesystem or in the cloud.
|
|
1311
|
-
* The queue is used for deep crawling of websites, where you start with several URLs and then
|
|
1312
|
-
* recursively follow links to other pages. The data structure supports both breadth-first
|
|
1313
|
-
* and depth-first crawling orders.
|
|
1314
|
-
*
|
|
1315
|
-
* For more details and code examples, see the {@apilink RequestQueue} class.
|
|
1316
|
-
*
|
|
1317
|
-
* @param [queueIdOrName]
|
|
1318
|
-
* ID or name of the request queue to be opened. If `null` or `undefined`,
|
|
1319
|
-
* the function returns the default request queue associated with the actor run.
|
|
1320
|
-
* @param [options]
|
|
1321
|
-
*/
|
|
1322
|
-
static async openRequestQueue(queueIdOrName?: string | null, options: OpenStorageOptions = {}): Promise<RequestQueue> {
|
|
1323
|
-
return Actor.getDefaultInstance().openRequestQueue(queueIdOrName, options);
|
|
1324
|
-
}
|
|
1325
|
-
|
|
1326
|
-
/**
|
|
1327
|
-
* Creates a proxy configuration and returns a promise resolving to an instance
|
|
1328
|
-
* of the {@apilink ProxyConfiguration} class that is already initialized.
|
|
1329
|
-
*
|
|
1330
|
-
* Configures connection to a proxy server with the provided options. Proxy servers are used to prevent target websites from blocking
|
|
1331
|
-
* your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
|
|
1332
|
-
* them to use the selected proxies for all connections.
|
|
1333
|
-
*
|
|
1334
|
-
* For more details and code examples, see the {@apilink ProxyConfiguration} class.
|
|
1335
|
-
*
|
|
1336
|
-
* ```javascript
|
|
1337
|
-
*
|
|
1338
|
-
* // Returns initialized proxy configuration class
|
|
1339
|
-
* const proxyConfiguration = await Actor.createProxyConfiguration({
|
|
1340
|
-
* groups: ['GROUP1', 'GROUP2'] // List of Apify proxy groups
|
|
1341
|
-
* countryCode: 'US'
|
|
1342
|
-
* });
|
|
1343
|
-
*
|
|
1344
|
-
* const crawler = new CheerioCrawler({
|
|
1345
|
-
* // ...
|
|
1346
|
-
* proxyConfiguration,
|
|
1347
|
-
* requestHandler({ proxyInfo }) {
|
|
1348
|
-
* const usedProxyUrl = proxyInfo.url; // Getting the proxy URL
|
|
1349
|
-
* }
|
|
1350
|
-
* })
|
|
1351
|
-
*
|
|
1352
|
-
* ```
|
|
1353
|
-
*
|
|
1354
|
-
* For compatibility with existing Actor Input UI (Input Schema), this function
|
|
1355
|
-
* returns `undefined` when the following object is passed as `proxyConfigurationOptions`.
|
|
1356
|
-
*
|
|
1357
|
-
* ```
|
|
1358
|
-
* { useApifyProxy: false }
|
|
1359
|
-
* ```
|
|
1360
|
-
*/
|
|
1361
|
-
static async createProxyConfiguration(
|
|
1362
|
-
proxyConfigurationOptions: ProxyConfigurationOptions & { useApifyProxy?: boolean } = {},
|
|
1363
|
-
): Promise<ProxyConfiguration | undefined> {
|
|
1364
|
-
return Actor.getDefaultInstance().createProxyConfiguration(proxyConfigurationOptions);
|
|
1365
|
-
}
|
|
1366
|
-
|
|
1367
|
-
/**
|
|
1368
|
-
* Returns a new {@apilink ApifyEnv} object which contains information parsed from all the `APIFY_XXX` environment variables.
|
|
1369
|
-
*
|
|
1370
|
-
* For the list of the `APIFY_XXX` environment variables, see
|
|
1371
|
-
* [Actor documentation](https://docs.apify.com/actor/run#environment-variables).
|
|
1372
|
-
* If some of the variables are not defined or are invalid, the corresponding value in the resulting object will be null.
|
|
1373
|
-
*/
|
|
1374
|
-
static getEnv(): ApifyEnv {
|
|
1375
|
-
return Actor.getDefaultInstance().getEnv();
|
|
1376
|
-
}
|
|
1377
|
-
|
|
1378
|
-
/**
|
|
1379
|
-
* Returns a new instance of the Apify API client. The `ApifyClient` class is provided
|
|
1380
|
-
* by the [apify-client](https://www.npmjs.com/package/apify-client)
|
|
1381
|
-
* NPM package, and it is automatically configured using the `APIFY_API_BASE_URL`, and `APIFY_TOKEN`
|
|
1382
|
-
* environment variables. You can override the token via the available options. That's useful
|
|
1383
|
-
* if you want to use the client as a different Apify user than the SDK internals are using.
|
|
1384
|
-
*/
|
|
1385
|
-
static newClient(options: ApifyClientOptions = {}): ApifyClient {
|
|
1386
|
-
return Actor.getDefaultInstance().newClient(options);
|
|
1387
|
-
}
|
|
1388
|
-
|
|
1389
|
-
/**
|
|
1390
|
-
* Returns `true` when code is running on Apify platform and `false` otherwise (for example locally).
|
|
1391
|
-
*/
|
|
1392
|
-
static isAtHome(): boolean {
|
|
1393
|
-
return Actor.getDefaultInstance().isAtHome();
|
|
1394
|
-
}
|
|
1395
|
-
|
|
1396
|
-
/** Default {@apilink ApifyClient} instance. */
|
|
1397
|
-
static get apifyClient(): ApifyClient {
|
|
1398
|
-
return Actor.getDefaultInstance().apifyClient;
|
|
1399
|
-
}
|
|
1400
|
-
|
|
1401
|
-
/** Default {@apilink Configuration} instance. */
|
|
1402
|
-
static get config(): Configuration {
|
|
1403
|
-
return Actor.getDefaultInstance().config;
|
|
1404
|
-
}
|
|
1405
|
-
|
|
1406
|
-
/** @internal */
|
|
1407
|
-
static getDefaultInstance(): Actor {
|
|
1408
|
-
this._instance ??= new Actor();
|
|
1409
|
-
return this._instance;
|
|
1410
|
-
}
|
|
1411
|
-
|
|
1412
|
-
private _openStorage<T extends IStorage>(storageClass: Constructor<T>, id?: string, options: OpenStorageOptions = {}) {
|
|
1413
|
-
const client = options.forceCloud ? this.apifyClient : undefined;
|
|
1414
|
-
return StorageManager.openStorage<T>(storageClass, id, client, this.config);
|
|
1415
|
-
}
|
|
1416
|
-
|
|
1417
|
-
private _ensureActorInit(methodCalled: string) {
|
|
1418
|
-
// If we already warned the user once, don't do it again to prevent spam
|
|
1419
|
-
if (this.warnedAboutMissingInitCall) {
|
|
1420
|
-
return;
|
|
1421
|
-
}
|
|
1422
|
-
|
|
1423
|
-
if (this.initialized) {
|
|
1424
|
-
return;
|
|
1425
|
-
}
|
|
1426
|
-
|
|
1427
|
-
this.warnedAboutMissingInitCall = true;
|
|
1428
|
-
|
|
1429
|
-
log.warning([
|
|
1430
|
-
`Actor.${methodCalled}() was called but the actor instance was not initialized.`,
|
|
1431
|
-
'Did you forget to call Actor.init()?',
|
|
1432
|
-
].join('\n'));
|
|
1433
|
-
}
|
|
1434
|
-
}
|
|
1435
|
-
|
|
1436
|
-
export interface InitOptions {
|
|
1437
|
-
storage?: StorageClient;
|
|
1438
|
-
}
|
|
1439
|
-
|
|
1440
|
-
export interface MainOptions extends ExitOptions, InitOptions {}
|
|
1441
|
-
|
|
1442
|
-
/**
|
|
1443
|
-
* Parsed representation of the `APIFY_XXX` environmental variables.
|
|
1444
|
-
* This object is returned by the {@apilink Actor.getEnv} function.
|
|
1445
|
-
*/
|
|
1446
|
-
export interface ApifyEnv {
|
|
1447
|
-
/**
|
|
1448
|
-
* ID of the actor (APIFY_ACTOR_ID)
|
|
1449
|
-
*/
|
|
1450
|
-
actorId: string | null;
|
|
1451
|
-
|
|
1452
|
-
/**
|
|
1453
|
-
* ID of the actor run (APIFY_ACTOR_RUN_ID)
|
|
1454
|
-
*/
|
|
1455
|
-
actorRunId: string | null;
|
|
1456
|
-
|
|
1457
|
-
/**
|
|
1458
|
-
* ID of the actor task (APIFY_ACTOR_TASK_ID)
|
|
1459
|
-
*/
|
|
1460
|
-
actorTaskId: string | null;
|
|
1461
|
-
|
|
1462
|
-
/**
|
|
1463
|
-
* ID of the user who started the actor - note that it might be
|
|
1464
|
-
* different than the owner ofthe actor (APIFY_USER_ID)
|
|
1465
|
-
*/
|
|
1466
|
-
userId: string | null;
|
|
1467
|
-
|
|
1468
|
-
/**
|
|
1469
|
-
* Authentication token representing privileges given to the actor run,
|
|
1470
|
-
* it can be passed to various Apify APIs (APIFY_TOKEN)
|
|
1471
|
-
*/
|
|
1472
|
-
token: string | null;
|
|
1473
|
-
|
|
1474
|
-
/**
|
|
1475
|
-
* Date when the actor was started (APIFY_STARTED_AT)
|
|
1476
|
-
*/
|
|
1477
|
-
startedAt: Date | null;
|
|
1478
|
-
|
|
1479
|
-
/**
|
|
1480
|
-
* Date when the actor will time out (APIFY_TIMEOUT_AT)
|
|
1481
|
-
*/
|
|
1482
|
-
timeoutAt: Date | null;
|
|
1483
|
-
|
|
1484
|
-
/**
|
|
1485
|
-
* ID of the key-value store where input and output data of this
|
|
1486
|
-
* actor is stored (APIFY_DEFAULT_KEY_VALUE_STORE_ID)
|
|
1487
|
-
*/
|
|
1488
|
-
defaultKeyValueStoreId: string | null;
|
|
1489
|
-
|
|
1490
|
-
/**
|
|
1491
|
-
* ID of the dataset where input and output data of this
|
|
1492
|
-
* actor is stored (APIFY_DEFAULT_DATASET_ID)
|
|
1493
|
-
*/
|
|
1494
|
-
defaultDatasetId: string | null;
|
|
1495
|
-
|
|
1496
|
-
/**
|
|
1497
|
-
* Amount of memory allocated for the actor,
|
|
1498
|
-
* in megabytes (APIFY_MEMORY_MBYTES)
|
|
1499
|
-
*/
|
|
1500
|
-
memoryMbytes: number | null;
|
|
1501
|
-
}
|
|
1502
|
-
|
|
1503
|
-
export type UserFunc<T = unknown> = () => Awaitable<T>;
|
|
1504
|
-
|
|
1505
|
-
export interface CallOptions extends ActorStartOptions {
|
|
1506
|
-
/**
|
|
1507
|
-
* User API token that is used to run the actor. By default, it is taken from the `APIFY_TOKEN` environment variable.
|
|
1508
|
-
*/
|
|
1509
|
-
token?: string;
|
|
1510
|
-
}
|
|
1511
|
-
|
|
1512
|
-
export interface CallTaskOptions extends TaskStartOptions {
|
|
1513
|
-
/**
|
|
1514
|
-
* User API token that is used to run the actor. By default, it is taken from the `APIFY_TOKEN` environment variable.
|
|
1515
|
-
*/
|
|
1516
|
-
token?: string;
|
|
1517
|
-
}
|
|
1518
|
-
|
|
1519
|
-
export interface AbortOptions extends RunAbortOptions {
|
|
1520
|
-
/**
|
|
1521
|
-
* User API token that is used to run the actor. By default, it is taken from the `APIFY_TOKEN` environment variable.
|
|
1522
|
-
*/
|
|
1523
|
-
token?: string;
|
|
1524
|
-
|
|
1525
|
-
/** Exit with given status message */
|
|
1526
|
-
statusMessage?: string;
|
|
1527
|
-
}
|
|
1528
|
-
|
|
1529
|
-
export interface WebhookOptions {
|
|
1530
|
-
/**
|
|
1531
|
-
* Array of event types, which you can set for actor run, see
|
|
1532
|
-
* the [actor run events](https://docs.apify.com/webhooks/events#actor-run) in the Apify doc.
|
|
1533
|
-
*/
|
|
1534
|
-
eventTypes: readonly WebhookEventType[];
|
|
1535
|
-
|
|
1536
|
-
/**
|
|
1537
|
-
* URL which will be requested using HTTP POST request, when actor run will reach the set event type.
|
|
1538
|
-
*/
|
|
1539
|
-
requestUrl: string;
|
|
1540
|
-
|
|
1541
|
-
/**
|
|
1542
|
-
* Payload template is a JSON-like string that describes the structure of the webhook POST request payload.
|
|
1543
|
-
* It uses JSON syntax, extended with a double curly braces syntax for injecting variables `{{variable}}`.
|
|
1544
|
-
* Those variables are resolved at the time of the webhook's dispatch, and a list of available variables with their descriptions
|
|
1545
|
-
* is available in the [Apify webhook documentation](https://docs.apify.com/webhooks).
|
|
1546
|
-
* If `payloadTemplate` is omitted, the default payload template is used
|
|
1547
|
-
* ([view docs](https://docs.apify.com/webhooks/actions#payload-template)).
|
|
1548
|
-
*/
|
|
1549
|
-
payloadTemplate?: string;
|
|
1550
|
-
|
|
1551
|
-
/**
|
|
1552
|
-
* Idempotency key enables you to ensure that a webhook will not be added multiple times in case of
|
|
1553
|
-
* an actor restart or other situation that would cause the `addWebhook()` function to be called again.
|
|
1554
|
-
* We suggest using the actor run ID as the idempotency key. You can get the run ID by calling
|
|
1555
|
-
* {@apilink Actor.getEnv} function.
|
|
1556
|
-
*/
|
|
1557
|
-
idempotencyKey?: string;
|
|
1558
|
-
}
|
|
1559
|
-
|
|
1560
|
-
export interface MetamorphOptions {
|
|
1561
|
-
/**
|
|
1562
|
-
* Content type for the `input`. If not specified,
|
|
1563
|
-
* `input` is expected to be an object that will be stringified to JSON and content type set to
|
|
1564
|
-
* `application/json; charset=utf-8`. If `options.contentType` is specified, then `input` must be a
|
|
1565
|
-
* `String` or `Buffer`.
|
|
1566
|
-
*/
|
|
1567
|
-
contentType?: string;
|
|
1568
|
-
|
|
1569
|
-
/**
|
|
1570
|
-
* Tag or number of the target actor build to metamorph into (e.g. `beta` or `1.2.345`).
|
|
1571
|
-
* If not provided, the run uses build tag or number from the default actor run configuration (typically `latest`).
|
|
1572
|
-
*/
|
|
1573
|
-
build?: string;
|
|
1574
|
-
|
|
1575
|
-
/** @internal */
|
|
1576
|
-
customAfterSleepMillis?: number;
|
|
1577
|
-
}
|
|
1578
|
-
|
|
1579
|
-
export interface ExitOptions {
|
|
1580
|
-
/** Exit with given status message */
|
|
1581
|
-
statusMessage?: string;
|
|
1582
|
-
/**
|
|
1583
|
-
* Amount of time, in seconds, to wait for all event handlers to finish before exiting the process.
|
|
1584
|
-
* @default 30
|
|
1585
|
-
*/
|
|
1586
|
-
timeoutSecs?: number;
|
|
1587
|
-
/** Exit code, defaults to 0 */
|
|
1588
|
-
exitCode?: number;
|
|
1589
|
-
/** Call `process.exit()`? Defaults to true */
|
|
1590
|
-
exit?: boolean;
|
|
1591
|
-
}
|
|
1592
|
-
|
|
1593
|
-
export interface OpenStorageOptions {
|
|
1594
|
-
/**
|
|
1595
|
-
* If set to `true` then the cloud storage is used even if the `APIFY_LOCAL_STORAGE_DIR`
|
|
1596
|
-
* environment variable is set. This way it is possible to combine local and cloud storage.
|
|
1597
|
-
* @default false
|
|
1598
|
-
*/
|
|
1599
|
-
forceCloud?: boolean;
|
|
1600
|
-
}
|
|
1601
|
-
|
|
1602
|
-
export { ClientActorRun as ActorRun };
|
|
1603
|
-
|
|
1604
|
-
/**
|
|
1605
|
-
* Exit codes for the actor process.
|
|
1606
|
-
* The error codes must be in the range 1-128, to avoid collision with signal exits
|
|
1607
|
-
* and to ensure Docker will handle them correctly!
|
|
1608
|
-
* @internal should be removed if we decide to remove `Actor.main()`
|
|
1609
|
-
*/
|
|
1610
|
-
export const EXIT_CODES = {
|
|
1611
|
-
SUCCESS: 0,
|
|
1612
|
-
ERROR_USER_FUNCTION_THREW: 91,
|
|
1613
|
-
ERROR_UNKNOWN: 92,
|
|
1614
|
-
};
|