apify 4.0.0-beta.12 → 4.0.0-beta.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +12 -48
  2. package/dist/actor.d.ts +157 -61
  3. package/dist/actor.js +278 -91
  4. package/dist/apify_storage_client.d.ts +54 -0
  5. package/dist/apify_storage_client.js +152 -0
  6. package/dist/charging.d.ts +43 -2
  7. package/dist/charging.js +196 -54
  8. package/dist/configuration.d.ts +79 -132
  9. package/dist/configuration.js +114 -141
  10. package/dist/index.d.ts +2 -2
  11. package/dist/index.js +1 -2
  12. package/dist/input-schemas.d.ts +7 -0
  13. package/dist/input-schemas.js +58 -0
  14. package/dist/key_value_store.d.ts +8 -4
  15. package/dist/key_value_store.js +19 -11
  16. package/dist/platform_event_manager.d.ts +0 -1
  17. package/dist/platform_event_manager.js +5 -5
  18. package/dist/proxy_configuration.d.ts +41 -44
  19. package/dist/proxy_configuration.js +65 -103
  20. package/dist/storage.d.ts +58 -0
  21. package/dist/storage.js +79 -0
  22. package/dist/utils.d.ts +0 -1
  23. package/dist/utils.js +2 -4
  24. package/package.json +123 -73
  25. package/.turbo/turbo-build.log +0 -26
  26. package/.turbo/turbo-copy.log +0 -4
  27. package/dist/LICENSE.md +0 -201
  28. package/dist/README.md +0 -98
  29. package/dist/actor.d.ts.map +0 -1
  30. package/dist/actor.js.map +0 -1
  31. package/dist/charging.d.ts.map +0 -1
  32. package/dist/charging.js.map +0 -1
  33. package/dist/configuration.d.ts.map +0 -1
  34. package/dist/configuration.js.map +0 -1
  35. package/dist/index.d.ts.map +0 -1
  36. package/dist/index.js.map +0 -1
  37. package/dist/key_value_store.d.ts.map +0 -1
  38. package/dist/key_value_store.js.map +0 -1
  39. package/dist/package.json +0 -75
  40. package/dist/platform_event_manager.d.ts.map +0 -1
  41. package/dist/platform_event_manager.js.map +0 -1
  42. package/dist/proxy_configuration.d.ts.map +0 -1
  43. package/dist/proxy_configuration.js.map +0 -1
  44. package/dist/utils.d.ts.map +0 -1
  45. package/dist/utils.js.map +0 -1
@@ -1,7 +1,6 @@
1
1
  import { KeyValueStore as CoreKeyValueStore } from '@crawlee/core';
2
+ import { KeyValueStoreClient as RemoteKeyValueStoreClient } from 'apify-client';
2
3
  import { createHmacSignature } from '@apify/utilities';
3
- // @ts-ignore newer crawlee versions already declare this method in core
4
- const { getPublicUrl } = CoreKeyValueStore.prototype;
5
4
  /**
6
5
  * @inheritDoc
7
6
  */
@@ -9,15 +8,27 @@ export class KeyValueStore extends CoreKeyValueStore {
9
8
  /**
10
9
  * Returns a URL for the given key that may be used to publicly
11
10
  * access the value in the remote key-value store.
11
+ *
12
+ * On the Apify platform the URL is signed with the store's
13
+ * `urlSigningSecretKey` so that anyone with the URL can read the record
14
+ * without authentication. Locally we delegate to crawlee's default
15
+ * implementation (which produces a `file://` URL or returns `undefined`).
12
16
  */
13
- getPublicUrl(key) {
17
+ async getPublicUrl(key) {
14
18
  const config = this.config;
15
- if (!config.get('isAtHome') && getPublicUrl) {
16
- return getPublicUrl.call(this, key);
19
+ // Detect a remote (Apify) store by its client type rather than by
20
+ // `isAtHome`, so that a `forceCloud` store opened locally still gets a
21
+ // signed Apify URL (matching the platform behaviour). `client` is
22
+ // `private` on `CoreKeyValueStore`, so bypass the visibility check.
23
+ const { client } = this;
24
+ const isLocalStore = !(client instanceof RemoteKeyValueStoreClient);
25
+ if (isLocalStore) {
26
+ return super.getPublicUrl(key);
17
27
  }
18
- const publicUrl = new URL(`${config.get('apiPublicBaseUrl')}/v2/key-value-stores/${this.id}/records/${key}`);
19
- if (this.storageObject?.urlSigningSecretKey) {
20
- publicUrl.searchParams.append('signature', createHmacSignature(this.storageObject.urlSigningSecretKey, key));
28
+ const publicUrl = new URL(`${config.apiPublicBaseUrl}/v2/key-value-stores/${this.id}/records/${key}`);
29
+ const metadata = (await client.getMetadata());
30
+ if (metadata?.urlSigningSecretKey) {
31
+ publicUrl.searchParams.append('signature', createHmacSignature(metadata.urlSigningSecretKey, key));
21
32
  }
22
33
  return publicUrl.toString();
23
34
  }
@@ -28,6 +39,3 @@ export class KeyValueStore extends CoreKeyValueStore {
28
39
  return super.open(storeIdOrName, options);
29
40
  }
30
41
  }
31
- // @ts-ignore newer crawlee versions already declare this method in core
32
- CoreKeyValueStore.prototype.getPublicUrl = KeyValueStore.prototype.getPublicUrl;
33
- //# sourceMappingURL=key_value_store.js.map
@@ -56,4 +56,3 @@ export declare class PlatformEventManager extends EventManager {
56
56
  */
57
57
  close(): Promise<void>;
58
58
  }
59
- //# sourceMappingURL=platform_event_manager.d.ts.map
@@ -46,7 +46,9 @@ export class PlatformEventManager extends EventManager {
46
46
  /** Websocket connection to Actor events. */
47
47
  eventsWs;
48
48
  constructor(config = Configuration.getGlobalConfig()) {
49
- super();
49
+ super({
50
+ persistStateIntervalMillis: config.persistStateIntervalMillis,
51
+ });
50
52
  this.config = config;
51
53
  }
52
54
  /**
@@ -58,7 +60,7 @@ export class PlatformEventManager extends EventManager {
58
60
  return;
59
61
  }
60
62
  await super.init();
61
- const eventsWsUrl = this.config.get('actorEventsWsUrl');
63
+ const eventsWsUrl = this.config.actorEventsWsUrl;
62
64
  // Locally there is no web socket to connect, so just print a log message.
63
65
  if (!eventsWsUrl) {
64
66
  this.log.debug(`Environment variable ${ACTOR_ENV_VARS.EVENTS_WEBSOCKET_URL} is not set, no events from Apify platform will be emitted.`);
@@ -87,8 +89,7 @@ export class PlatformEventManager extends EventManager {
87
89
  });
88
90
  this.eventsWs.on('error', (err) => {
89
91
  // Don't print this error as this happens in the case of very short Actor.main().
90
- if (err.message ===
91
- 'WebSocket was closed before the connection was established')
92
+ if (err.message === 'WebSocket was closed before the connection was established')
92
93
  return;
93
94
  this.log.exception(err, 'web socket connection failed');
94
95
  });
@@ -110,4 +111,3 @@ export class PlatformEventManager extends EventManager {
110
111
  this.eventsWs?.close();
111
112
  }
112
113
  }
113
- //# sourceMappingURL=platform_event_manager.js.map
@@ -1,6 +1,8 @@
1
- import type { ProxyConfigurationOptions as CoreProxyConfigurationOptions, ProxyInfo as CoreProxyInfo } from '@crawlee/core';
1
+ import type { ProxyConfigurationOptions as CoreProxyConfigurationOptions } from '@crawlee/core';
2
2
  import { ProxyConfiguration as CoreProxyConfiguration } from '@crawlee/core';
3
+ import type { ProxyInfo as CoreProxyInfo } from '@crawlee/types';
3
4
  import { Configuration } from './configuration.js';
5
+ type NewUrlOptions = Parameters<CoreProxyConfiguration['newProxyInfo']>[0];
4
6
  export interface ProxyConfigurationOptions extends CoreProxyConfigurationOptions {
5
7
  /**
6
8
  * User's password for the proxy. By default, it is taken from the `APIFY_PROXY_PASSWORD`
@@ -24,6 +26,12 @@ export interface ProxyConfigurationOptions extends CoreProxyConfigurationOptions
24
26
  * on the Apify cloud, or when using the [Apify CLI](https://github.com/apify/apify-cli).
25
27
  */
26
28
  countryCode?: string;
29
+ /**
30
+ * If set, all proxied requests will use IP addresses geolocated to the specified subdivision (e.g. US state).
31
+ * Requires `countryCode` to be set. The value must follow the ISO 3166-2 subdivision code format,
32
+ * e.g. `'CA'` for California when `countryCode` is `'US'`.
33
+ */
34
+ subdivisionCode?: string;
27
35
  /**
28
36
  * Same option as `groups` which can be used to
29
37
  * configurate the proxy by UI input schema. You should use the `groups` option in your crawler code.
@@ -35,10 +43,15 @@ export interface ProxyConfigurationOptions extends CoreProxyConfigurationOptions
35
43
  */
36
44
  apifyProxyCountry?: string;
37
45
  /**
38
- * Multiple different ProxyConfigurationOptions stratified into tiers. Crawlee crawlers will switch between those tiers
39
- * based on the blocked request statistics.
46
+ * Same option as `subdivisionCode` which can be used to
47
+ * configurate the proxy by UI input schema. You should use the `subdivisionCode` option in your crawler code.
48
+ */
49
+ apifyProxySubdivision?: string;
50
+ /**
51
+ * As part of the init process, we verify the configuration by checking the proxy status endpoint.
52
+ * This can make the init slower, to opt-out of this, use `checkAccess: false` (defaults to `true`).
40
53
  */
41
- tieredProxyConfig?: Omit<ProxyConfigurationOptions, keyof CoreProxyConfigurationOptions | 'tieredProxyConfig'>[];
54
+ checkAccess?: boolean;
42
55
  }
43
56
  /**
44
57
  * The main purpose of the ProxyInfo object is to provide information
@@ -64,9 +77,6 @@ export interface ProxyConfigurationOptions extends CoreProxyConfigurationOptions
64
77
  * requestHandler({ proxyInfo }) {
65
78
  * // Getting used proxy URL
66
79
  * const proxyUrl = proxyInfo.url;
67
- *
68
- * // Getting ID of used Session
69
- * const sessionIdentifier = proxyInfo.sessionId;
70
80
  * }
71
81
  * })
72
82
  *
@@ -77,7 +87,7 @@ export interface ProxyInfo extends CoreProxyInfo {
77
87
  * An array of proxy groups to be used by the [Apify Proxy](https://docs.apify.com/proxy).
78
88
  * If not provided, the proxy will select the groups automatically.
79
89
  */
80
- groups: string[];
90
+ groups?: string[];
81
91
  /**
82
92
  * If set and relevant proxies are available in your Apify account, all proxied requests will
83
93
  * use IP addresses that are geolocated to the specified country. For example `GB` for IPs
@@ -89,6 +99,11 @@ export interface ProxyInfo extends CoreProxyInfo {
89
99
  * This parameter is optional, by default, the proxy uses all available proxy servers from all countries.
90
100
  */
91
101
  countryCode?: string;
102
+ /**
103
+ * If set, all proxied requests use IP addresses geolocated to the specified subdivision (e.g. US state).
104
+ * ISO 3166-2 subdivision code, e.g. `'CA'` when `countryCode` is `'US'`.
105
+ */
106
+ subdivisionCode?: string;
92
107
  /**
93
108
  * User's password for the proxy. By default, it is taken from the `APIFY_PROXY_PASSWORD`
94
109
  * environment variable, which is automatically set by the system when running the Actors
@@ -133,6 +148,7 @@ export declare class ProxyConfiguration extends CoreProxyConfiguration {
133
148
  readonly config: Configuration;
134
149
  private groups;
135
150
  private countryCode?;
151
+ private subdivisionCode?;
136
152
  private password?;
137
153
  private hostname;
138
154
  private port?;
@@ -149,45 +165,26 @@ export declare class ProxyConfiguration extends CoreProxyConfiguration {
149
165
  * You should use the {@link createProxyConfiguration} function to create a pre-initialized
150
166
  * `ProxyConfiguration` instance instead of calling this manually.
151
167
  */
152
- initialize(): Promise<boolean>;
153
- /**
154
- * This function creates a new {@link ProxyInfo} info object.
155
- * It is used by CheerioCrawler and PuppeteerCrawler to generate proxy URLs and also to allow the user to inspect
156
- * the currently used proxy via the requestHandler parameter `proxyInfo`.
157
- * Use it if you want to work with a rich representation of a proxy URL.
158
- * If you need the URL string only, use {@link ProxyConfiguration.newUrl}.
159
- * @param [sessionId]
160
- * Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
161
- * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
162
- * When the provided sessionId is a number, it's converted to a string. Property sessionId of
163
- * {@link ProxyInfo} is always returned as a type string.
164
- *
165
- * All the HTTP requests going through the proxy with the same session identifier
166
- * will use the same target proxy server (i.e. the same IP address).
167
- * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
168
- * @return Represents information about used proxy and its configuration.
169
- */
170
- newProxyInfo(sessionId?: string | number, options?: Parameters<CoreProxyConfiguration['newProxyInfo']>[1]): Promise<ProxyInfo | undefined>;
171
- /**
172
- * Returns a new proxy URL based on provided configuration options and the `sessionId` parameter.
173
- * @param [sessionId]
174
- * Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
175
- * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
176
- * When the provided sessionId is a number, it's converted to a string.
177
- *
178
- * All the HTTP requests going through the proxy with the same session identifier
179
- * will use the same target proxy server (i.e. the same IP address).
180
- * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
181
- * @return A string with a proxy URL, including authentication credentials and port number.
182
- * For example, `http://bob:password123@proxy.example.com:8000`
168
+ initialize(options?: {
169
+ checkAccess?: boolean;
170
+ }): Promise<boolean>;
171
+ /**
172
+ * Returns a new {@link ProxyInfo} object with a fresh proxy URL. Each call mints an
173
+ * independent URL; for Apify Proxy a random session id is embedded so consecutive
174
+ * calls resolve to different IPs.
175
+ */
176
+ newProxyInfo(options?: NewUrlOptions): Promise<ProxyInfo | undefined>;
177
+ /**
178
+ * Returns a new proxy URL. For Apify Proxy, each call generates a URL with a fresh
179
+ * random session id, so consecutive calls return independent URLs. For custom
180
+ * `proxyUrls`, the URLs are rotated round-robin.
183
181
  */
184
- newUrl(sessionId?: string | number, options?: Parameters<CoreProxyConfiguration['newUrl']>[1]): Promise<string | undefined>;
185
- protected _generateTieredProxyUrls(tieredProxyConfig: NonNullable<ProxyConfigurationOptions['tieredProxyConfig']>, globalOptions: ProxyConfigurationOptions): string[][];
182
+ newUrl(options?: NewUrlOptions): Promise<string | undefined>;
186
183
  /**
187
184
  * Returns proxy username.
188
185
  */
189
- protected _getUsername(sessionId?: string): string;
190
- protected composeDefaultUrl(sessionId?: string): string;
186
+ protected _getUsername(sessionId: string): string;
187
+ protected composeDefaultUrl(sessionId: string): string;
191
188
  /**
192
189
  * Fetch & set the proxy password from Apify API if an Apify token is provided.
193
190
  */
@@ -212,4 +209,4 @@ export declare class ProxyConfiguration extends CoreProxyConfiguration {
212
209
  */
213
210
  protected _throwCannotCombineCustomWithApify(): void;
214
211
  }
215
- //# sourceMappingURL=proxy_configuration.d.ts.map
212
+ export {};
@@ -5,11 +5,15 @@ import { APIFY_ENV_VARS, APIFY_PROXY_VALUE_REGEX } from '@apify/consts';
5
5
  import { cryptoRandomObjectId } from '@apify/utilities';
6
6
  import { Actor } from './actor.js';
7
7
  import { Configuration } from './configuration.js';
8
- // https://docs.apify.com/proxy/datacenter-proxy#username-parameters
9
- const MAX_SESSION_ID_LENGTH = 50;
10
8
  const CHECK_ACCESS_REQUEST_TIMEOUT_MILLIS = 4_000;
11
9
  const CHECK_ACCESS_MAX_ATTEMPTS = 2;
12
10
  const COUNTRY_CODE_REGEX = /^[A-Z]{2}$/;
11
+ // ISO 3166-2 subdivision codes are 1–3 uppercase alphanumeric characters, e.g. 'CA' (California), 'NSW' (New South Wales), '9' (Wien, AT-9)
12
+ const SUBDIVISION_CODE_REGEX = /^[A-Z0-9]{1,3}$/;
13
+ // Apify Proxy session identifier embedded in the proxy username — opaque to
14
+ // users; a fresh one is minted for every URL the SDK hands out so that the
15
+ // returned proxy URLs are independent.
16
+ const SESSION_ID_LENGTH = 12;
13
17
  /**
14
18
  * Configures connection to a proxy server with the provided options. Proxy servers are used to prevent target websites from blocking
15
19
  * your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
@@ -47,6 +51,7 @@ export class ProxyConfiguration extends CoreProxyConfiguration {
47
51
  config;
48
52
  groups;
49
53
  countryCode;
54
+ subdivisionCode;
50
55
  password;
51
56
  hostname;
52
57
  port;
@@ -67,35 +72,37 @@ export class ProxyConfiguration extends CoreProxyConfiguration {
67
72
  apifyProxyGroups: ow.optional.array.ofType(ow.string.matches(APIFY_PROXY_VALUE_REGEX)),
68
73
  countryCode: ow.optional.string.matches(COUNTRY_CODE_REGEX),
69
74
  apifyProxyCountry: ow.optional.string.matches(COUNTRY_CODE_REGEX),
75
+ subdivisionCode: ow.optional.string.matches(SUBDIVISION_CODE_REGEX),
76
+ apifyProxySubdivision: ow.optional.string.matches(SUBDIVISION_CODE_REGEX),
70
77
  password: ow.optional.string,
71
- tieredProxyUrls: ow.optional.array.ofType(ow.array.ofType(ow.string)),
72
- tieredProxyConfig: ow.optional.array.ofType(ow.object),
73
78
  }));
74
- const { groups = [], apifyProxyGroups = [], countryCode, apifyProxyCountry, password = config.get('proxyPassword'), tieredProxyConfig, tieredProxyUrls, } = options;
75
- this.tieredProxyUrls ??= tieredProxyUrls;
76
- if (tieredProxyConfig) {
77
- this.tieredProxyUrls = this._generateTieredProxyUrls(tieredProxyConfig, options);
78
- }
79
+ const { groups = [], apifyProxyGroups = [], countryCode, apifyProxyCountry, subdivisionCode, apifyProxySubdivision, password = config.proxyPassword, } = options;
79
80
  const groupsToUse = groups.length ? groups : apifyProxyGroups;
80
81
  const countryCodeToUse = countryCode || apifyProxyCountry;
81
- const hostname = config.get('proxyHostname');
82
- const port = config.get('proxyPort');
82
+ const subdivisionCodeToUse = subdivisionCode || apifyProxySubdivision;
83
+ const hostname = config.proxyHostname;
84
+ const port = config.proxyPort;
85
+ // The Apify Proxy subdivision is expressed as part of the country
86
+ // username parameter (`country-US_CA`), so a country is required.
87
+ if (subdivisionCodeToUse && !countryCodeToUse) {
88
+ throw new Error('ProxyConfiguration: "subdivisionCode" requires "countryCode" to be set.');
89
+ }
83
90
  // Validation
84
- if ((proxyUrls || newUrlFunction) &&
85
- (groupsToUse.length || countryCodeToUse)) {
91
+ if ((proxyUrls || newUrlFunction) && (groupsToUse.length || countryCodeToUse || subdivisionCodeToUse)) {
86
92
  this._throwCannotCombineCustomWithApify();
87
93
  }
88
94
  if (proxyUrls && newUrlFunction)
89
95
  this._throwCannotCombineCustomMethods();
90
96
  this.groups = groupsToUse;
91
97
  this.countryCode = countryCodeToUse;
98
+ this.subdivisionCode = subdivisionCodeToUse;
92
99
  this.password = password;
93
100
  this.hostname = hostname;
94
101
  this.port = port;
95
102
  this.usesApifyProxy = !this.proxyUrls && !this.newUrlFunction;
96
- if (proxyUrls && proxyUrls.some((url) => url.includes('apify.com'))) {
103
+ if (proxyUrls && proxyUrls.some((url) => url?.includes('apify.com'))) {
97
104
  this.log.warning('Some Apify proxy features may work incorrectly. Please consider setting up Apify properties instead of `proxyUrls`.\n' +
98
- 'See https://sdk.apify.com/docs/guides/proxy-management#apify-proxy-configuration');
105
+ 'See https://docs.apify.com/sdk/js/docs/concepts/proxy-management#apify-proxy-configuration');
99
106
  }
100
107
  }
101
108
  /**
@@ -106,7 +113,7 @@ export class ProxyConfiguration extends CoreProxyConfiguration {
106
113
  * You should use the {@link createProxyConfiguration} function to create a pre-initialized
107
114
  * `ProxyConfiguration` instance instead of calling this manually.
108
115
  */
109
- async initialize() {
116
+ async initialize(options) {
110
117
  if (this.usesApifyProxy) {
111
118
  if (!this.password) {
112
119
  await this._setPasswordIfToken();
@@ -124,111 +131,66 @@ export class ProxyConfiguration extends CoreProxyConfiguration {
124
131
  `so that the SDK can fetch the proxy password from Apify API, when ${APIFY_ENV_VARS.PROXY_PASSWORD} is not defined`);
125
132
  }
126
133
  }
127
- return this._checkAccess();
134
+ if (options?.checkAccess !== false) {
135
+ return this._checkAccess();
136
+ }
128
137
  }
129
138
  return true;
130
139
  }
131
140
  /**
132
- * This function creates a new {@link ProxyInfo} info object.
133
- * It is used by CheerioCrawler and PuppeteerCrawler to generate proxy URLs and also to allow the user to inspect
134
- * the currently used proxy via the requestHandler parameter `proxyInfo`.
135
- * Use it if you want to work with a rich representation of a proxy URL.
136
- * If you need the URL string only, use {@link ProxyConfiguration.newUrl}.
137
- * @param [sessionId]
138
- * Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
139
- * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
140
- * When the provided sessionId is a number, it's converted to a string. Property sessionId of
141
- * {@link ProxyInfo} is always returned as a type string.
142
- *
143
- * All the HTTP requests going through the proxy with the same session identifier
144
- * will use the same target proxy server (i.e. the same IP address).
145
- * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
146
- * @return Represents information about used proxy and its configuration.
141
+ * Returns a new {@link ProxyInfo} object with a fresh proxy URL. Each call mints an
142
+ * independent URL; for Apify Proxy a random session id is embedded so consecutive
143
+ * calls resolve to different IPs.
147
144
  */
148
- async newProxyInfo(sessionId, options) {
149
- if (typeof sessionId === 'number')
150
- sessionId = `${sessionId}`;
151
- ow(sessionId, ow.optional.string
152
- .maxLength(MAX_SESSION_ID_LENGTH)
153
- .matches(APIFY_PROXY_VALUE_REGEX));
154
- const proxyInfo = await super.newProxyInfo(sessionId, options);
155
- if (!proxyInfo)
156
- return proxyInfo;
157
- const { groups, countryCode, password, port, hostname } = (this.usesApifyProxy ? this : new URL(proxyInfo.url));
158
- return {
159
- ...proxyInfo,
160
- sessionId,
161
- groups,
162
- countryCode,
163
- // this.password is not encoded, but the password from the URL will be, we need to normalize
164
- password: this.usesApifyProxy
165
- ? (password ?? '')
166
- : decodeURIComponent(password),
167
- hostname,
168
- port: port,
145
+ async newProxyInfo(options) {
146
+ const url = await this.newUrl(options);
147
+ if (!url)
148
+ return undefined;
149
+ const parsed = new URL(url);
150
+ const result = {
151
+ url,
152
+ username: decodeURIComponent(parsed.username),
153
+ password: decodeURIComponent(parsed.password),
154
+ hostname: parsed.hostname,
155
+ port: parsed.port,
169
156
  };
157
+ if (this.usesApifyProxy) {
158
+ result.groups = this.groups;
159
+ if (this.countryCode !== undefined)
160
+ result.countryCode = this.countryCode;
161
+ if (this.subdivisionCode !== undefined)
162
+ result.subdivisionCode = this.subdivisionCode;
163
+ }
164
+ return result;
170
165
  }
171
166
  /**
172
- * Returns a new proxy URL based on provided configuration options and the `sessionId` parameter.
173
- * @param [sessionId]
174
- * Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
175
- * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
176
- * When the provided sessionId is a number, it's converted to a string.
177
- *
178
- * All the HTTP requests going through the proxy with the same session identifier
179
- * will use the same target proxy server (i.e. the same IP address).
180
- * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
181
- * @return A string with a proxy URL, including authentication credentials and port number.
182
- * For example, `http://bob:password123@proxy.example.com:8000`
167
+ * Returns a new proxy URL. For Apify Proxy, each call generates a URL with a fresh
168
+ * random session id, so consecutive calls return independent URLs. For custom
169
+ * `proxyUrls`, the URLs are rotated round-robin.
183
170
  */
184
- async newUrl(sessionId, options) {
185
- if (typeof sessionId === 'number')
186
- sessionId = `${sessionId}`;
187
- ow(sessionId, ow.optional.string
188
- .maxLength(MAX_SESSION_ID_LENGTH)
189
- .matches(APIFY_PROXY_VALUE_REGEX));
190
- if (this.newUrlFunction) {
191
- return ((await this._callNewUrlFunction(sessionId, {
192
- request: options?.request,
193
- })) ?? undefined);
194
- }
195
- if (this.proxyUrls) {
196
- return this._handleCustomUrl(sessionId);
171
+ async newUrl(options) {
172
+ if (this.newUrlFunction || this.proxyUrls) {
173
+ return super.newUrl(options);
197
174
  }
198
- if (this.tieredProxyUrls) {
199
- return (this._handleTieredUrl(sessionId ?? cryptoRandomObjectId(6), options).proxyUrl ?? undefined);
200
- }
201
- return this.composeDefaultUrl(sessionId);
202
- }
203
- _generateTieredProxyUrls(tieredProxyConfig, globalOptions) {
204
- return tieredProxyConfig.map((config) => [
205
- new ProxyConfiguration({
206
- ...globalOptions,
207
- ...config,
208
- tieredProxyConfig: undefined,
209
- }).composeDefaultUrl(),
210
- ]);
175
+ return this.composeDefaultUrl(cryptoRandomObjectId(SESSION_ID_LENGTH));
211
176
  }
212
177
  /**
213
178
  * Returns proxy username.
214
179
  */
215
180
  _getUsername(sessionId) {
216
- let username;
217
- const { groups, countryCode } = this;
181
+ const { groups, countryCode, subdivisionCode } = this;
218
182
  const parts = [];
219
183
  if (groups && groups.length) {
220
184
  parts.push(`groups-${groups.join('+')}`);
221
185
  }
222
- if (sessionId) {
223
- parts.push(`session-${sessionId}`);
186
+ parts.push(`session-${sessionId}`);
187
+ if (subdivisionCode) {
188
+ parts.push(`country-${countryCode}_${subdivisionCode}`);
224
189
  }
225
- if (countryCode) {
190
+ else if (countryCode) {
226
191
  parts.push(`country-${countryCode}`);
227
192
  }
228
- username = parts.join(',');
229
- if (parts.length === 0)
230
- username = 'auto';
231
- return username;
193
+ return parts.join(',');
232
194
  }
233
195
  composeDefaultUrl(sessionId) {
234
196
  const username = this._getUsername(sessionId);
@@ -243,7 +205,7 @@ export class ProxyConfiguration extends CoreProxyConfiguration {
243
205
  */
244
206
  // TODO: Make this private
245
207
  async _setPasswordIfToken() {
246
- const token = this.config.get('token');
208
+ const { token } = this.config;
247
209
  if (!token)
248
210
  return;
249
211
  try {
@@ -291,7 +253,7 @@ export class ProxyConfiguration extends CoreProxyConfiguration {
291
253
  * Apify Proxy can be down for a second or a minute, but this should not crash processes.
292
254
  */
293
255
  async _fetchStatus() {
294
- const proxyStatusUrl = this.config.get('proxyStatusUrl', 'http://proxy.apify.com');
256
+ const { proxyStatusUrl } = this.config;
295
257
  const requestOpts = {
296
258
  url: `${proxyStatusUrl}/?format=json`,
297
259
  proxyUrl: await this.newUrl(),
@@ -316,7 +278,7 @@ export class ProxyConfiguration extends CoreProxyConfiguration {
316
278
  _throwCannotCombineCustomWithApify() {
317
279
  throw new Error('Cannot combine custom proxies with Apify Proxy! ' +
318
280
  'It is not allowed to set "options.proxyUrls" or "options.newUrlFunction" combined with ' +
319
- '"options.groups" or "options.apifyProxyGroups" and "options.countryCode" or "options.apifyProxyCountry".');
281
+ '"options.groups", "options.apifyProxyGroups", "options.countryCode", "options.apifyProxyCountry", ' +
282
+ '"options.subdivisionCode" or "options.apifyProxySubdivision".');
320
283
  }
321
284
  }
322
- //# sourceMappingURL=proxy_configuration.js.map
@@ -0,0 +1,58 @@
1
+ import type { Constructor, IStorage, StorageOpenOptions } from '@crawlee/core';
2
+ import type { StorageClient } from '@crawlee/types';
3
+ import type { Configuration } from './configuration.js';
4
+ export interface OpenStorageOptions {
5
+ /**
6
+ * If set to `true` then the cloud storage is used even if the `CRAWLEE_STORAGE_DIR`
7
+ * environment variable is set. This way it is possible to combine local and cloud storage.
8
+ * @default false
9
+ */
10
+ forceCloud?: boolean;
11
+ }
12
+ /**
13
+ * Identifies a storage by its alias from the Actor's schema storages
14
+ * (resolved via the `ACTOR_STORAGES_JSON` environment variable).
15
+ */
16
+ export interface StorageAlias {
17
+ alias: string;
18
+ }
19
+ /**
20
+ * Identifies a storage by its platform ID.
21
+ */
22
+ export interface StorageId {
23
+ id: string;
24
+ }
25
+ /**
26
+ * Identifies a storage by its name.
27
+ */
28
+ export interface StorageName {
29
+ name: string;
30
+ }
31
+ /**
32
+ * Identifies a storage to open. Can be:
33
+ * - A plain `string` for backward compatibility (treated as ID or name)
34
+ * - `{ alias: string }` to resolve from the Actor's schema storages (`ACTOR_STORAGES_JSON`)
35
+ * - `{ id: string }` to open by explicit platform ID
36
+ * - `{ name: string }` to open by explicit name
37
+ */
38
+ export type StorageIdentifier = string | StorageAlias | StorageId | StorageName;
39
+ /**
40
+ * Identifies a storage to open, without alias support.
41
+ * Used for key-value stores and request queues, which do not support aliases.
42
+ * Can be:
43
+ * - A plain `string` for backward compatibility (treated as ID or name)
44
+ * - `{ id: string }` to open by explicit platform ID
45
+ * - `{ name: string }` to open by explicit name
46
+ */
47
+ export type StorageIdentifierWithoutAlias = string | StorageId | StorageName;
48
+ export interface OpenStorageContext {
49
+ config: Configuration;
50
+ client?: StorageClient;
51
+ purgedStorageAliases: Set<string>;
52
+ }
53
+ /**
54
+ * Opens a storage by its identifier, handling Apify alias resolution and local purging.
55
+ */
56
+ export declare function openStorage<T extends IStorage>(storageClass: Constructor<T> & {
57
+ open(id?: string | null, options?: StorageOpenOptions): Promise<T>;
58
+ }, identifier: StorageIdentifier | null | undefined, context: OpenStorageContext): Promise<T>;
@@ -0,0 +1,79 @@
1
+ import { ApifyStorageClient } from './apify_storage_client.js';
2
+ const STORAGE_TYPE_KEYS = {
3
+ Dataset: 'datasets',
4
+ KeyValueStore: 'keyValueStores',
5
+ RequestQueue: 'requestQueues',
6
+ };
7
+ const parsedStoragesJson = new Map();
8
+ /**
9
+ * Resolves a {@link StorageIdentifier} to a plain string ID or name
10
+ * that can be passed to crawlee v4's `<Storage>.open()`.
11
+ */
12
+ function resolveStorageIdentifier(storageType, identifier, config) {
13
+ if (identifier === null || identifier === undefined) {
14
+ return undefined;
15
+ }
16
+ if (typeof identifier === 'string') {
17
+ return identifier;
18
+ }
19
+ if ('id' in identifier) {
20
+ return identifier.id;
21
+ }
22
+ if ('name' in identifier) {
23
+ return identifier.name;
24
+ }
25
+ // { alias: string }
26
+ const storagesJson = config.actorStoragesJson;
27
+ if (config.isAtHome && storagesJson) {
28
+ let storages;
29
+ try {
30
+ if (!parsedStoragesJson.has(storagesJson)) {
31
+ parsedStoragesJson.set(storagesJson, JSON.parse(storagesJson));
32
+ }
33
+ storages = parsedStoragesJson.get(storagesJson);
34
+ }
35
+ catch {
36
+ throw new Error(`Failed to parse ACTOR_STORAGES_JSON environment variable: ${storagesJson}`);
37
+ }
38
+ const typeKey = STORAGE_TYPE_KEYS[storageType];
39
+ const resolvedId = storages[typeKey]?.[identifier.alias];
40
+ if (resolvedId) {
41
+ return resolvedId;
42
+ }
43
+ throw new Error(`Storage alias "${identifier.alias}" not found in ACTOR_STORAGES_JSON for storage type "${storageType}". ` +
44
+ `Available aliases: ${Object.keys(storages[typeKey] ?? {}).join(', ') || '(none)'}`);
45
+ }
46
+ // When using local storage, just use the alias as a name.
47
+ // When using platform storage, we can't just make up a name — the alias must be
48
+ // in ACTOR_STORAGES_JSON.
49
+ if (config.isAtHome) {
50
+ throw new Error(`Storage alias "${identifier.alias}" cannot be resolved because ACTOR_STORAGES_JSON is not set. ` +
51
+ `Aliases are only available for storages declared in the Actor's schema.`);
52
+ }
53
+ return identifier.alias;
54
+ }
55
+ /**
56
+ * Opens a storage by its identifier, handling Apify alias resolution and local purging.
57
+ */
58
+ export async function openStorage(storageClass, identifier, context) {
59
+ const isAlias = identifier !== null && identifier !== undefined && typeof identifier === 'object' && 'alias' in identifier;
60
+ if (isAlias && !context.config.isAtHome && context.client instanceof ApifyStorageClient) {
61
+ throw new Error('The `alias` option is not allowed for Apify-based storages running outside of Apify');
62
+ }
63
+ const resolvedIdOrName = resolveStorageIdentifier(storageClass.name, identifier, context.config);
64
+ // When running locally, purge aliased storages on first open
65
+ // (similar to how crawlee purges default storages on start).
66
+ if (isAlias &&
67
+ !context.config.isAtHome &&
68
+ context.config.purgeOnStart &&
69
+ !context.purgedStorageAliases.has(identifier.alias)) {
70
+ context.purgedStorageAliases.add(identifier.alias);
71
+ const existingStorage = await storageClass.open(resolvedIdOrName ?? null, {
72
+ storageClient: context.client,
73
+ });
74
+ await existingStorage.drop();
75
+ }
76
+ return storageClass.open(resolvedIdOrName ?? null, {
77
+ storageClient: context.client,
78
+ });
79
+ }
package/dist/utils.d.ts CHANGED
@@ -18,4 +18,3 @@ export declare function checkCrawleeVersion(): void;
18
18
  * @ignore
19
19
  */
20
20
  export declare function printOutdatedSdkWarning(): void;
21
- //# sourceMappingURL=utils.d.ts.map