apify 3.7.3-beta.9 → 4.0.0-beta.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,7 @@
1
- "use strict";
2
1
  // TODO: https://github.com/apify/apify-shared-js/issues/547
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.getDefaultsFromInputSchema = exports.readInputSchema = exports.noActorInputSchemaDefinedMarker = void 0;
5
- const tslib_1 = require("tslib");
6
- const node_fs_1 = require("node:fs");
7
- const node_path_1 = require("node:path");
8
- const node_process_1 = tslib_1.__importDefault(require("node:process"));
2
+ import { existsSync, readFileSync } from 'node:fs';
3
+ import { join } from 'node:path';
4
+ import process from 'node:process';
9
5
  // These paths are used *if* there is no `input` field in the actor.json configuration file!
10
6
  const DEFAULT_INPUT_SCHEMA_PATHS = [
11
7
  ['.actor', 'INPUT_SCHEMA.json'],
@@ -16,8 +12,8 @@ const DEFAULT_INPUT_SCHEMA_PATHS = [
16
12
  const ACTOR_SPECIFICATION_FOLDER = '.actor';
17
13
  const LOCAL_CONFIG_NAME = 'actor.json';
18
14
  const readJSONIfExists = (path) => {
19
- if ((0, node_fs_1.existsSync)(path)) {
20
- const content = (0, node_fs_1.readFileSync)(path, 'utf8');
15
+ if (existsSync(path)) {
16
+ const content = readFileSync(path, 'utf8');
21
17
  return JSON.parse(content);
22
18
  }
23
19
  return null;
@@ -25,21 +21,21 @@ const readJSONIfExists = (path) => {
25
21
  /**
26
22
  * @ignore
27
23
  */
28
- exports.noActorInputSchemaDefinedMarker = Symbol.for('apify.noActorInputSchemaDefined');
29
- const readInputSchema = () => {
30
- const localConfig = readJSONIfExists((0, node_path_1.join)(node_process_1.default.cwd(), ACTOR_SPECIFICATION_FOLDER, LOCAL_CONFIG_NAME));
24
+ export const noActorInputSchemaDefinedMarker = Symbol.for('apify.noActorInputSchemaDefined');
25
+ export const readInputSchema = () => {
26
+ const localConfig = readJSONIfExists(join(process.cwd(), ACTOR_SPECIFICATION_FOLDER, LOCAL_CONFIG_NAME));
31
27
  // Input schema nested in the actor config
32
28
  if (typeof localConfig?.input === 'object') {
33
29
  return localConfig.input;
34
30
  }
35
31
  // Input schema path from the actor config
36
32
  if (typeof localConfig?.input === 'string') {
37
- const fullPath = (0, node_path_1.join)(node_process_1.default.cwd(), ACTOR_SPECIFICATION_FOLDER, localConfig.input);
33
+ const fullPath = join(process.cwd(), ACTOR_SPECIFICATION_FOLDER, localConfig.input);
38
34
  return readJSONIfExists(fullPath);
39
35
  }
40
36
  // Try to find it from possible default paths
41
37
  for (const path of DEFAULT_INPUT_SCHEMA_PATHS) {
42
- const fullPath = (0, node_path_1.join)(node_process_1.default.cwd(), ...path);
38
+ const fullPath = join(process.cwd(), ...path);
43
39
  const result = readJSONIfExists(fullPath);
44
40
  if (result) {
45
41
  return result;
@@ -47,12 +43,11 @@ const readInputSchema = () => {
47
43
  }
48
44
  // If we are in an Actor context, BUT we do not have an input schema defined, we want to skip the warning
49
45
  if (!localConfig?.input) {
50
- return exports.noActorInputSchemaDefinedMarker;
46
+ return noActorInputSchemaDefinedMarker;
51
47
  }
52
48
  return null;
53
49
  };
54
- exports.readInputSchema = readInputSchema;
55
- const getDefaultsFromInputSchema = (inputSchema) => {
50
+ export const getDefaultsFromInputSchema = (inputSchema) => {
56
51
  const defaults = {};
57
52
  for (const [key, fieldSchema] of Object.entries(inputSchema.properties)) {
58
53
  if (fieldSchema.default !== undefined) {
@@ -61,4 +56,3 @@ const getDefaultsFromInputSchema = (inputSchema) => {
61
56
  }
62
57
  return defaults;
63
58
  };
64
- exports.getDefaultsFromInputSchema = getDefaultsFromInputSchema;
@@ -1,4 +1,4 @@
1
- import type { StorageManagerOptions } from '@crawlee/core';
1
+ import type { StorageOpenOptions } from '@crawlee/core';
2
2
  import { KeyValueStore as CoreKeyValueStore } from '@crawlee/core';
3
3
  /**
4
4
  * @inheritDoc
@@ -7,10 +7,15 @@ export declare class KeyValueStore extends CoreKeyValueStore {
7
7
  /**
8
8
  * Returns a URL for the given key that may be used to publicly
9
9
  * access the value in the remote key-value store.
10
+ *
11
+ * On the Apify platform the URL is signed with the store's
12
+ * `urlSigningSecretKey` so that anyone with the URL can read the record
13
+ * without authentication. Locally we delegate to crawlee's default
14
+ * implementation (which produces a `file://` URL or returns `undefined`).
10
15
  */
11
- getPublicUrl(key: string): string;
16
+ getPublicUrl(key: string): Promise<string | undefined>;
12
17
  /**
13
18
  * @inheritDoc
14
19
  */
15
- static open(storeIdOrName?: string | null, options?: StorageManagerOptions): Promise<KeyValueStore>;
20
+ static open(storeIdOrName?: string | null, options?: StorageOpenOptions): Promise<KeyValueStore>;
16
21
  }
@@ -1,30 +1,34 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.KeyValueStore = void 0;
4
- const core_1 = require("@crawlee/core");
5
- const apify_client_1 = require("apify-client");
6
- const utilities_1 = require("@apify/utilities");
7
- // @ts-ignore newer crawlee versions already declare this method in core
8
- const { getPublicUrl } = core_1.KeyValueStore.prototype;
1
+ import { KeyValueStore as CoreKeyValueStore } from '@crawlee/core';
2
+ import { KeyValueStoreClient as RemoteKeyValueStoreClient } from 'apify-client';
3
+ import { createHmacSignature } from '@apify/utilities';
9
4
  /**
10
5
  * @inheritDoc
11
6
  */
12
- class KeyValueStore extends core_1.KeyValueStore {
7
+ export class KeyValueStore extends CoreKeyValueStore {
13
8
  /**
14
9
  * Returns a URL for the given key that may be used to publicly
15
10
  * access the value in the remote key-value store.
11
+ *
12
+ * On the Apify platform the URL is signed with the store's
13
+ * `urlSigningSecretKey` so that anyone with the URL can read the record
14
+ * without authentication. Locally we delegate to crawlee's default
15
+ * implementation (which produces a `file://` URL or returns `undefined`).
16
16
  */
17
- getPublicUrl(key) {
17
+ async getPublicUrl(key) {
18
18
  const config = this.config;
19
- const isLocalStore = !(
20
- // eslint-disable-next-line dot-notation
21
- (this['client'] instanceof apify_client_1.KeyValueStoreClient));
22
- if (isLocalStore && getPublicUrl) {
23
- return getPublicUrl.call(this, key);
19
+ // Detect a remote (Apify) store by its client type rather than by
20
+ // `isAtHome`, so that a `forceCloud` store opened locally still gets a
21
+ // signed Apify URL (matching the platform behaviour). `client` is
22
+ // `private` on `CoreKeyValueStore`, so bypass the visibility check.
23
+ const { client } = this;
24
+ const isLocalStore = !(client instanceof RemoteKeyValueStoreClient);
25
+ if (isLocalStore) {
26
+ return super.getPublicUrl(key);
24
27
  }
25
- const publicUrl = new URL(`${config.get('apiPublicBaseUrl')}/v2/key-value-stores/${this.id}/records/${key}`);
26
- if (this.storageObject?.urlSigningSecretKey) {
27
- publicUrl.searchParams.append('signature', (0, utilities_1.createHmacSignature)(this.storageObject.urlSigningSecretKey, key));
28
+ const publicUrl = new URL(`${config.apiPublicBaseUrl}/v2/key-value-stores/${this.id}/records/${key}`);
29
+ const metadata = (await client.getMetadata());
30
+ if (metadata?.urlSigningSecretKey) {
31
+ publicUrl.searchParams.append('signature', createHmacSignature(metadata.urlSigningSecretKey, key));
28
32
  }
29
33
  return publicUrl.toString();
30
34
  }
@@ -35,6 +39,3 @@ class KeyValueStore extends core_1.KeyValueStore {
35
39
  return super.open(storeIdOrName, options);
36
40
  }
37
41
  }
38
- exports.KeyValueStore = KeyValueStore;
39
- // @ts-ignore newer crawlee versions already declare this method in core
40
- core_1.KeyValueStore.prototype.getPublicUrl = KeyValueStore.prototype.getPublicUrl;
@@ -26,15 +26,10 @@ import { Configuration } from './configuration.js';
26
26
  * You can use it to persist the state of the Actor and gracefully stop your in-progress tasks,
27
27
  * so that they are not interrupted by the migration.
28
28
  * For example, this is used by the {@link RequestList} class.
29
- * If you pass `gracefulShutdown: true` to {@link Actor.init}, the SDK will automatically call
30
- * {@link Actor.reboot} when this event is received, which speeds up the migration and lets the
31
- * run continue on a new worker.
32
29
  * - `aborting`: `void`
33
30
  * When a user aborts an Actor run on the Apify platform, they can choose to abort gracefully to allow
34
31
  * the Actor some time before getting killed. This graceful abort emits the `aborting` event which the SDK
35
32
  * uses to gracefully stop running crawls and you can use it to do your own cleanup as well.
36
- * If you pass `gracefulShutdown: true` to {@link Actor.init}, the SDK will automatically call
37
- * {@link Actor.exit} when this event is received.
38
33
  * - `persistState`: `{ "isMigrating": Boolean }`
39
34
  * Emitted in regular intervals (by default 60 seconds) to notify all components of Apify SDK that it is time to persist
40
35
  * their state, in order to avoid repeating all work when the Actor restarts.
@@ -1,11 +1,8 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.PlatformEventManager = void 0;
4
- const core_1 = require("@crawlee/core");
5
- const ws_1 = require("ws");
6
- const consts_1 = require("@apify/consts");
7
- const utilities_1 = require("@apify/utilities");
8
- const configuration_js_1 = require("./configuration.js");
1
+ import { EventManager } from '@crawlee/core';
2
+ import { WebSocket } from 'ws';
3
+ import { ACTOR_ENV_VARS, ACTOR_EVENT_NAMES } from '@apify/consts';
4
+ import { betterClearInterval } from '@apify/utilities';
5
+ import { Configuration } from './configuration.js';
9
6
  /**
10
7
  * Gets an instance of a Node.js'
11
8
  * [EventEmitter](https://nodejs.org/api/events.html#events_class_eventemitter)
@@ -32,15 +29,10 @@ const configuration_js_1 = require("./configuration.js");
32
29
  * You can use it to persist the state of the Actor and gracefully stop your in-progress tasks,
33
30
  * so that they are not interrupted by the migration.
34
31
  * For example, this is used by the {@link RequestList} class.
35
- * If you pass `gracefulShutdown: true` to {@link Actor.init}, the SDK will automatically call
36
- * {@link Actor.reboot} when this event is received, which speeds up the migration and lets the
37
- * run continue on a new worker.
38
32
  * - `aborting`: `void`
39
33
  * When a user aborts an Actor run on the Apify platform, they can choose to abort gracefully to allow
40
34
  * the Actor some time before getting killed. This graceful abort emits the `aborting` event which the SDK
41
35
  * uses to gracefully stop running crawls and you can use it to do your own cleanup as well.
42
- * If you pass `gracefulShutdown: true` to {@link Actor.init}, the SDK will automatically call
43
- * {@link Actor.exit} when this event is received.
44
36
  * - `persistState`: `{ "isMigrating": Boolean }`
45
37
  * Emitted in regular intervals (by default 60 seconds) to notify all components of Apify SDK that it is time to persist
46
38
  * their state, in order to avoid repeating all work when the Actor restarts.
@@ -49,22 +41,15 @@ const configuration_js_1 = require("./configuration.js");
49
41
  * Note that the `persistState` event is provided merely for user convenience,
50
42
  * you can achieve the same effect using `setInterval()` and listening for the `migrating` event.
51
43
  */
52
- class PlatformEventManager extends core_1.EventManager {
53
- constructor(config = configuration_js_1.Configuration.getGlobalConfig()) {
54
- super();
55
- Object.defineProperty(this, "config", {
56
- enumerable: true,
57
- configurable: true,
58
- writable: true,
59
- value: config
60
- });
61
- /** Websocket connection to Actor events. */
62
- Object.defineProperty(this, "eventsWs", {
63
- enumerable: true,
64
- configurable: true,
65
- writable: true,
66
- value: void 0
44
+ export class PlatformEventManager extends EventManager {
45
+ config;
46
+ /** Websocket connection to Actor events. */
47
+ eventsWs;
48
+ constructor(config = Configuration.getGlobalConfig()) {
49
+ super({
50
+ persistStateIntervalMillis: config.persistStateIntervalMillis,
67
51
  });
52
+ this.config = config;
68
53
  }
69
54
  /**
70
55
  * Initializes `Actor.events` event emitter by creating a connection to a websocket that provides them.
@@ -75,24 +60,24 @@ class PlatformEventManager extends core_1.EventManager {
75
60
  return;
76
61
  }
77
62
  await super.init();
78
- const eventsWsUrl = this.config.get('actorEventsWsUrl');
63
+ const eventsWsUrl = this.config.actorEventsWsUrl;
79
64
  // Locally there is no web socket to connect, so just print a log message.
80
65
  if (!eventsWsUrl) {
81
- this.log.debug(`Environment variable ${consts_1.ACTOR_ENV_VARS.EVENTS_WEBSOCKET_URL} is not set, no events from Apify platform will be emitted.`);
66
+ this.log.debug(`Environment variable ${ACTOR_ENV_VARS.EVENTS_WEBSOCKET_URL} is not set, no events from Apify platform will be emitted.`);
82
67
  return;
83
68
  }
84
69
  this.createWebSocketConnection(eventsWsUrl);
85
70
  }
86
71
  createWebSocketConnection(eventsWsUrl) {
87
- this.eventsWs = new ws_1.WebSocket(eventsWsUrl);
72
+ this.eventsWs = new WebSocket(eventsWsUrl);
88
73
  this.eventsWs.on('message', (message) => {
89
74
  if (!message)
90
75
  return;
91
76
  try {
92
77
  const { name, data } = JSON.parse(String(message));
93
78
  this.events.emit(name, data);
94
- if (name === consts_1.ACTOR_EVENT_NAMES.MIGRATING) {
95
- (0, utilities_1.betterClearInterval)(this.intervals.persistState); // Don't send any other persist state event.
79
+ if (name === ACTOR_EVENT_NAMES.MIGRATING) {
80
+ betterClearInterval(this.intervals.persistState); // Don't send any other persist state event.
96
81
  this.events.emit("persistState" /* EventType.PERSIST_STATE */, {
97
82
  isMigrating: true,
98
83
  });
@@ -126,4 +111,3 @@ class PlatformEventManager extends core_1.EventManager {
126
111
  this.eventsWs?.close();
127
112
  }
128
113
  }
129
- exports.PlatformEventManager = PlatformEventManager;
@@ -1,6 +1,8 @@
1
- import type { ProxyConfigurationOptions as CoreProxyConfigurationOptions, ProxyInfo as CoreProxyInfo } from '@crawlee/core';
1
+ import type { ProxyConfigurationOptions as CoreProxyConfigurationOptions } from '@crawlee/core';
2
2
  import { ProxyConfiguration as CoreProxyConfiguration } from '@crawlee/core';
3
+ import type { ProxyInfo as CoreProxyInfo } from '@crawlee/types';
3
4
  import { Configuration } from './configuration.js';
5
+ type NewUrlOptions = Parameters<CoreProxyConfiguration['newProxyInfo']>[0];
4
6
  export interface ProxyConfigurationOptions extends CoreProxyConfigurationOptions {
5
7
  /**
6
8
  * User's password for the proxy. By default, it is taken from the `APIFY_PROXY_PASSWORD`
@@ -24,32 +26,27 @@ export interface ProxyConfigurationOptions extends CoreProxyConfigurationOptions
24
26
  * on the Apify cloud, or when using the [Apify CLI](https://github.com/apify/apify-cli).
25
27
  */
26
28
  countryCode?: string;
29
+ /**
30
+ * If set, all proxied requests will use IP addresses geolocated to the specified subdivision (e.g. US state).
31
+ * Requires `countryCode` to be set. The value must follow the ISO 3166-2 subdivision code format,
32
+ * e.g. `'CA'` for California when `countryCode` is `'US'`.
33
+ */
34
+ subdivisionCode?: string;
27
35
  /**
28
36
  * Same option as `groups` which can be used to
29
- * configure the proxy by UI input schema. You should use the `groups` option in your crawler code.
37
+ * configurate the proxy by UI input schema. You should use the `groups` option in your crawler code.
30
38
  */
31
39
  apifyProxyGroups?: string[];
32
40
  /**
33
41
  * Same option as `countryCode` which can be used to
34
- * configure the proxy by UI input schema. You should use the `countryCode` option in your crawler code.
42
+ * configurate the proxy by UI input schema. You should use the `countryCode` option in your crawler code.
35
43
  */
36
44
  apifyProxyCountry?: string;
37
- /**
38
- * If set, all proxied requests will use IP addresses geolocated to the specified subdivision (e.g. US state).
39
- * Requires `countryCode` to be set. The value must follow the ISO 3166-2 subdivision code format,
40
- * e.g. `'CA'` for California when `countryCode` is `'US'`.
41
- */
42
- subdivisionCode?: string;
43
45
  /**
44
46
  * Same option as `subdivisionCode` which can be used to
45
- * configure the proxy by UI input schema. You should use the `subdivisionCode` option in your crawler code.
47
+ * configurate the proxy by UI input schema. You should use the `subdivisionCode` option in your crawler code.
46
48
  */
47
49
  apifyProxySubdivision?: string;
48
- /**
49
- * Multiple different ProxyConfigurationOptions stratified into tiers. Crawlee crawlers will switch between those tiers
50
- * based on the blocked request statistics.
51
- */
52
- tieredProxyConfig?: Omit<ProxyConfigurationOptions, keyof CoreProxyConfigurationOptions | 'tieredProxyConfig'>[];
53
50
  /**
54
51
  * As part of the init process, we verify the configuration by checking the proxy status endpoint.
55
52
  * This can make the init slower, to opt-out of this, use `checkAccess: false` (defaults to `true`).
@@ -80,9 +77,6 @@ export interface ProxyConfigurationOptions extends CoreProxyConfigurationOptions
80
77
  * requestHandler({ proxyInfo }) {
81
78
  * // Getting used proxy URL
82
79
  * const proxyUrl = proxyInfo.url;
83
- *
84
- * // Getting ID of used Session
85
- * const sessionIdentifier = proxyInfo.sessionId;
86
80
  * }
87
81
  * })
88
82
  *
@@ -93,7 +87,7 @@ export interface ProxyInfo extends CoreProxyInfo {
93
87
  * An array of proxy groups to be used by the [Apify Proxy](https://docs.apify.com/proxy).
94
88
  * If not provided, the proxy will select the groups automatically.
95
89
  */
96
- groups: string[];
90
+ groups?: string[];
97
91
  /**
98
92
  * If set and relevant proxies are available in your Apify account, all proxied requests will
99
93
  * use IP addresses that are geolocated to the specified country. For example `GB` for IPs
@@ -170,51 +164,27 @@ export declare class ProxyConfiguration extends CoreProxyConfiguration {
170
164
  *
171
165
  * You should use the {@link createProxyConfiguration} function to create a pre-initialized
172
166
  * `ProxyConfiguration` instance instead of calling this manually.
173
- *
174
- * As part of the init process, we verify the configuration by checking the proxy status endpoint.
175
- * This can make the init slower, to opt-out of this, use `checkAccess: false`.
176
167
  */
177
168
  initialize(options?: {
178
169
  checkAccess?: boolean;
179
170
  }): Promise<boolean>;
180
171
  /**
181
- * This function creates a new {@link ProxyInfo} info object.
182
- * It is used by CheerioCrawler and PuppeteerCrawler to generate proxy URLs and also to allow the user to inspect
183
- * the currently used proxy via the requestHandler parameter `proxyInfo`.
184
- * Use it if you want to work with a rich representation of a proxy URL.
185
- * If you need the URL string only, use {@link ProxyConfiguration.newUrl}.
186
- * @param [sessionId]
187
- * Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
188
- * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
189
- * When the provided sessionId is a number, it's converted to a string. Property sessionId of
190
- * {@link ProxyInfo} is always returned as a type string.
191
- *
192
- * All the HTTP requests going through the proxy with the same session identifier
193
- * will use the same target proxy server (i.e. the same IP address).
194
- * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
195
- * @return Represents information about used proxy and its configuration.
196
- */
197
- newProxyInfo(sessionId?: string | number, options?: Parameters<CoreProxyConfiguration['newProxyInfo']>[1]): Promise<ProxyInfo | undefined>;
198
- /**
199
- * Returns a new proxy URL based on provided configuration options and the `sessionId` parameter.
200
- * @param [sessionId]
201
- * Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
202
- * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
203
- * When the provided sessionId is a number, it's converted to a string.
204
- *
205
- * All the HTTP requests going through the proxy with the same session identifier
206
- * will use the same target proxy server (i.e. the same IP address).
207
- * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
208
- * @return A string with a proxy URL, including authentication credentials and port number.
209
- * For example, `http://bob:password123@proxy.example.com:8000`
172
+ * Returns a new {@link ProxyInfo} object with a fresh proxy URL. Each call mints an
173
+ * independent URL; for Apify Proxy a random session id is embedded so consecutive
174
+ * calls resolve to different IPs.
175
+ */
176
+ newProxyInfo(options?: NewUrlOptions): Promise<ProxyInfo | undefined>;
177
+ /**
178
+ * Returns a new proxy URL. For Apify Proxy, each call generates a URL with a fresh
179
+ * random session id, so consecutive calls return independent URLs. For custom
180
+ * `proxyUrls`, the URLs are rotated round-robin.
210
181
  */
211
- newUrl(sessionId?: string | number, options?: Parameters<CoreProxyConfiguration['newUrl']>[1]): Promise<string | undefined>;
212
- protected _generateTieredProxyUrls(tieredProxyConfig: NonNullable<ProxyConfigurationOptions['tieredProxyConfig']>, globalOptions: ProxyConfigurationOptions): string[][];
182
+ newUrl(options?: NewUrlOptions): Promise<string | undefined>;
213
183
  /**
214
184
  * Returns proxy username.
215
185
  */
216
- protected _getUsername(sessionId?: string): string;
217
- protected composeDefaultUrl(sessionId?: string): string;
186
+ protected _getUsername(sessionId: string): string;
187
+ protected composeDefaultUrl(sessionId: string): string;
218
188
  /**
219
189
  * Fetch & set the proxy password from Apify API if an Apify token is provided.
220
190
  */
@@ -239,3 +209,4 @@ export declare class ProxyConfiguration extends CoreProxyConfiguration {
239
209
  */
240
210
  protected _throwCannotCombineCustomWithApify(): void;
241
211
  }
212
+ export {};