apify 3.7.3-beta.9 → 4.0.0-beta.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,21 +1,19 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.ProxyConfiguration = void 0;
4
- const tslib_1 = require("tslib");
5
- const core_1 = require("@crawlee/core");
6
- const utils_1 = require("@crawlee/utils");
7
- const ow_1 = tslib_1.__importDefault(require("ow"));
8
- const consts_1 = require("@apify/consts");
9
- const utilities_1 = require("@apify/utilities");
10
- const actor_js_1 = require("./actor.js");
11
- const configuration_js_1 = require("./configuration.js");
12
- // https://docs.apify.com/proxy/datacenter-proxy#username-parameters
13
- const MAX_SESSION_ID_LENGTH = 50;
14
- const CHECK_ACCESS_REQUEST_TIMEOUT_MILLIS = 4000;
1
+ import { ProxyConfiguration as CoreProxyConfiguration } from '@crawlee/core';
2
+ import { gotScraping } from 'got-scraping';
3
+ import ow from 'ow';
4
+ import { APIFY_ENV_VARS, APIFY_PROXY_VALUE_REGEX } from '@apify/consts';
5
+ import { cryptoRandomObjectId } from '@apify/utilities';
6
+ import { Actor } from './actor.js';
7
+ import { Configuration } from './configuration.js';
8
+ const CHECK_ACCESS_REQUEST_TIMEOUT_MILLIS = 4_000;
15
9
  const CHECK_ACCESS_MAX_ATTEMPTS = 2;
16
10
  const COUNTRY_CODE_REGEX = /^[A-Z]{2}$/;
17
11
  // ISO 3166-2 subdivision codes are 1–3 uppercase alphanumeric characters, e.g. 'CA' (California), 'NSW' (New South Wales), '9' (Wien, AT-9)
18
12
  const SUBDIVISION_CODE_REGEX = /^[A-Z0-9]{1,3}$/;
13
+ // Apify Proxy session identifier embedded in the proxy username — opaque to
14
+ // users; a fresh one is minted for every URL the SDK hands out so that the
15
+ // returned proxy URLs are independent.
16
+ const SESSION_ID_LENGTH = 12;
19
17
  /**
20
18
  * Configures connection to a proxy server with the provided options. Proxy servers are used to prevent target websites from blocking
21
19
  * your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
@@ -49,86 +47,43 @@ const SUBDIVISION_CODE_REGEX = /^[A-Z0-9]{1,3}$/;
49
47
  * ```
50
48
  * @category Scaling
51
49
  */
52
- class ProxyConfiguration extends core_1.ProxyConfiguration {
50
+ export class ProxyConfiguration extends CoreProxyConfiguration {
51
+ config;
52
+ groups;
53
+ countryCode;
54
+ subdivisionCode;
55
+ password;
56
+ hostname;
57
+ port;
58
+ usesApifyProxy;
53
59
  /**
54
60
  * @internal
55
61
  */
56
- constructor(options = {}, config = configuration_js_1.Configuration.getGlobalConfig()) {
62
+ constructor(options = {}, config = Configuration.getGlobalConfig()) {
57
63
  const { proxyUrls, newUrlFunction, ...rest } = options;
58
64
  super({
59
65
  proxyUrls,
60
66
  newUrlFunction,
61
67
  ['validateRequired']: false,
62
68
  });
63
- Object.defineProperty(this, "config", {
64
- enumerable: true,
65
- configurable: true,
66
- writable: true,
67
- value: config
68
- });
69
- Object.defineProperty(this, "groups", {
70
- enumerable: true,
71
- configurable: true,
72
- writable: true,
73
- value: void 0
74
- });
75
- Object.defineProperty(this, "countryCode", {
76
- enumerable: true,
77
- configurable: true,
78
- writable: true,
79
- value: void 0
80
- });
81
- Object.defineProperty(this, "subdivisionCode", {
82
- enumerable: true,
83
- configurable: true,
84
- writable: true,
85
- value: void 0
86
- });
87
- Object.defineProperty(this, "password", {
88
- enumerable: true,
89
- configurable: true,
90
- writable: true,
91
- value: void 0
92
- });
93
- Object.defineProperty(this, "hostname", {
94
- enumerable: true,
95
- configurable: true,
96
- writable: true,
97
- value: void 0
98
- });
99
- Object.defineProperty(this, "port", {
100
- enumerable: true,
101
- configurable: true,
102
- writable: true,
103
- value: void 0
104
- });
105
- Object.defineProperty(this, "usesApifyProxy", {
106
- enumerable: true,
107
- configurable: true,
108
- writable: true,
109
- value: void 0
110
- });
111
- (0, ow_1.default)(rest, ow_1.default.object.partialShape({
112
- groups: ow_1.default.optional.array.ofType(ow_1.default.string.matches(consts_1.APIFY_PROXY_VALUE_REGEX)),
113
- apifyProxyGroups: ow_1.default.optional.array.ofType(ow_1.default.string.matches(consts_1.APIFY_PROXY_VALUE_REGEX)),
114
- countryCode: ow_1.default.optional.string.matches(COUNTRY_CODE_REGEX),
115
- apifyProxyCountry: ow_1.default.optional.string.matches(COUNTRY_CODE_REGEX),
116
- subdivisionCode: ow_1.default.optional.string.matches(SUBDIVISION_CODE_REGEX),
117
- apifyProxySubdivision: ow_1.default.optional.string.matches(SUBDIVISION_CODE_REGEX),
118
- password: ow_1.default.optional.string,
119
- tieredProxyUrls: ow_1.default.optional.array.ofType(ow_1.default.array.ofType(ow_1.default.string)),
120
- tieredProxyConfig: ow_1.default.optional.array.ofType(ow_1.default.object),
69
+ this.config = config;
70
+ ow(rest, ow.object.exactShape({
71
+ groups: ow.optional.array.ofType(ow.string.matches(APIFY_PROXY_VALUE_REGEX)),
72
+ apifyProxyGroups: ow.optional.array.ofType(ow.string.matches(APIFY_PROXY_VALUE_REGEX)),
73
+ countryCode: ow.optional.string.matches(COUNTRY_CODE_REGEX),
74
+ apifyProxyCountry: ow.optional.string.matches(COUNTRY_CODE_REGEX),
75
+ subdivisionCode: ow.optional.string.matches(SUBDIVISION_CODE_REGEX),
76
+ apifyProxySubdivision: ow.optional.string.matches(SUBDIVISION_CODE_REGEX),
77
+ password: ow.optional.string,
121
78
  }));
122
- const { groups = [], apifyProxyGroups = [], countryCode, apifyProxyCountry, subdivisionCode, apifyProxySubdivision, password = config.get('proxyPassword'), tieredProxyConfig, tieredProxyUrls, } = options;
123
- this.tieredProxyUrls ?? (this.tieredProxyUrls = tieredProxyUrls);
124
- if (tieredProxyConfig) {
125
- this.tieredProxyUrls = this._generateTieredProxyUrls(tieredProxyConfig, options);
126
- }
79
+ const { groups = [], apifyProxyGroups = [], countryCode, apifyProxyCountry, subdivisionCode, apifyProxySubdivision, password = config.proxyPassword, } = options;
127
80
  const groupsToUse = groups.length ? groups : apifyProxyGroups;
128
81
  const countryCodeToUse = countryCode || apifyProxyCountry;
129
82
  const subdivisionCodeToUse = subdivisionCode || apifyProxySubdivision;
130
- const hostname = config.get('proxyHostname');
131
- const port = config.get('proxyPort');
83
+ const hostname = config.proxyHostname;
84
+ const port = config.proxyPort;
85
+ // The Apify Proxy subdivision is expressed as part of the country
86
+ // username parameter (`country-US_CA`), so a country is required.
132
87
  if (subdivisionCodeToUse && !countryCodeToUse) {
133
88
  throw new Error('ProxyConfiguration: "subdivisionCode" requires "countryCode" to be set.');
134
89
  }
@@ -157,9 +112,6 @@ class ProxyConfiguration extends core_1.ProxyConfiguration {
157
112
  *
158
113
  * You should use the {@link createProxyConfiguration} function to create a pre-initialized
159
114
  * `ProxyConfiguration` instance instead of calling this manually.
160
- *
161
- * As part of the init process, we verify the configuration by checking the proxy status endpoint.
162
- * This can make the init slower, to opt-out of this, use `checkAccess: false`.
163
115
  */
164
116
  async initialize(options) {
165
117
  if (this.usesApifyProxy) {
@@ -167,16 +119,16 @@ class ProxyConfiguration extends core_1.ProxyConfiguration {
167
119
  await this._setPasswordIfToken();
168
120
  }
169
121
  if (!this.password) {
170
- if (actor_js_1.Actor.isAtHome()) {
171
- throw new Error(`Apify Proxy password must be provided using options.password or the "${consts_1.APIFY_ENV_VARS.PROXY_PASSWORD}" environment variable. ` +
172
- `You can also provide your Apify token via the "${consts_1.APIFY_ENV_VARS.TOKEN}" environment variable, ` +
173
- `so that the SDK can fetch the proxy password from Apify API, when ${consts_1.APIFY_ENV_VARS.PROXY_PASSWORD} is not defined`);
122
+ if (Actor.isAtHome()) {
123
+ throw new Error(`Apify Proxy password must be provided using options.password or the "${APIFY_ENV_VARS.PROXY_PASSWORD}" environment variable. ` +
124
+ `You can also provide your Apify token via the "${APIFY_ENV_VARS.TOKEN}" environment variable, ` +
125
+ `so that the SDK can fetch the proxy password from Apify API, when ${APIFY_ENV_VARS.PROXY_PASSWORD} is not defined`);
174
126
  }
175
127
  else {
176
128
  this.log.warning(`No proxy password or token detected, running without proxy. To use Apify Proxy locally, ` +
177
- `provide options.password or "${consts_1.APIFY_ENV_VARS.PROXY_PASSWORD}" environment variable. ` +
178
- `You can also provide your Apify token via the "${consts_1.APIFY_ENV_VARS.TOKEN}" environment variable, ` +
179
- `so that the SDK can fetch the proxy password from Apify API, when ${consts_1.APIFY_ENV_VARS.PROXY_PASSWORD} is not defined`);
129
+ `provide options.password or "${APIFY_ENV_VARS.PROXY_PASSWORD}" environment variable. ` +
130
+ `You can also provide your Apify token via the "${APIFY_ENV_VARS.TOKEN}" environment variable, ` +
131
+ `so that the SDK can fetch the proxy password from Apify API, when ${APIFY_ENV_VARS.PROXY_PASSWORD} is not defined`);
180
132
  }
181
133
  }
182
134
  if (options?.checkAccess !== false) {
@@ -186,104 +138,59 @@ class ProxyConfiguration extends core_1.ProxyConfiguration {
186
138
  return true;
187
139
  }
188
140
  /**
189
- * This function creates a new {@link ProxyInfo} info object.
190
- * It is used by CheerioCrawler and PuppeteerCrawler to generate proxy URLs and also to allow the user to inspect
191
- * the currently used proxy via the requestHandler parameter `proxyInfo`.
192
- * Use it if you want to work with a rich representation of a proxy URL.
193
- * If you need the URL string only, use {@link ProxyConfiguration.newUrl}.
194
- * @param [sessionId]
195
- * Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
196
- * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
197
- * When the provided sessionId is a number, it's converted to a string. Property sessionId of
198
- * {@link ProxyInfo} is always returned as a type string.
199
- *
200
- * All the HTTP requests going through the proxy with the same session identifier
201
- * will use the same target proxy server (i.e. the same IP address).
202
- * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
203
- * @return Represents information about used proxy and its configuration.
141
+ * Returns a new {@link ProxyInfo} object with a fresh proxy URL. Each call mints an
142
+ * independent URL; for Apify Proxy a random session id is embedded so consecutive
143
+ * calls resolve to different IPs.
204
144
  */
205
- async newProxyInfo(sessionId, options) {
206
- if (typeof sessionId === 'number')
207
- sessionId = `${sessionId}`;
208
- (0, ow_1.default)(sessionId, ow_1.default.optional.string.maxLength(MAX_SESSION_ID_LENGTH).matches(consts_1.APIFY_PROXY_VALUE_REGEX));
209
- const proxyInfo = await super.newProxyInfo(sessionId, options);
210
- if (!proxyInfo)
211
- return proxyInfo;
212
- const { groups, countryCode, subdivisionCode, password, port, hostname } = (this.usesApifyProxy ? this : new URL(proxyInfo.url));
213
- return {
214
- ...proxyInfo,
215
- sessionId,
216
- groups,
217
- countryCode,
218
- subdivisionCode,
219
- // this.password is not encoded, but the password from the URL will be, we need to normalize
220
- password: this.usesApifyProxy ? (password ?? '') : decodeURIComponent(password),
221
- hostname,
222
- port: port,
145
+ async newProxyInfo(options) {
146
+ const url = await this.newUrl(options);
147
+ if (!url)
148
+ return undefined;
149
+ const parsed = new URL(url);
150
+ const result = {
151
+ url,
152
+ username: decodeURIComponent(parsed.username),
153
+ password: decodeURIComponent(parsed.password),
154
+ hostname: parsed.hostname,
155
+ port: parsed.port,
223
156
  };
157
+ if (this.usesApifyProxy) {
158
+ result.groups = this.groups;
159
+ if (this.countryCode !== undefined)
160
+ result.countryCode = this.countryCode;
161
+ if (this.subdivisionCode !== undefined)
162
+ result.subdivisionCode = this.subdivisionCode;
163
+ }
164
+ return result;
224
165
  }
225
166
  /**
226
- * Returns a new proxy URL based on provided configuration options and the `sessionId` parameter.
227
- * @param [sessionId]
228
- * Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
229
- * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
230
- * When the provided sessionId is a number, it's converted to a string.
231
- *
232
- * All the HTTP requests going through the proxy with the same session identifier
233
- * will use the same target proxy server (i.e. the same IP address).
234
- * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
235
- * @return A string with a proxy URL, including authentication credentials and port number.
236
- * For example, `http://bob:password123@proxy.example.com:8000`
167
+ * Returns a new proxy URL. For Apify Proxy, each call generates a URL with a fresh
168
+ * random session id, so consecutive calls return independent URLs. For custom
169
+ * `proxyUrls`, the URLs are rotated round-robin.
237
170
  */
238
- async newUrl(sessionId, options) {
239
- if (typeof sessionId === 'number')
240
- sessionId = `${sessionId}`;
241
- (0, ow_1.default)(sessionId, ow_1.default.optional.string.maxLength(MAX_SESSION_ID_LENGTH).matches(consts_1.APIFY_PROXY_VALUE_REGEX));
242
- if (this.newUrlFunction) {
243
- return ((await this._callNewUrlFunction(sessionId, {
244
- request: options?.request,
245
- })) ?? undefined);
171
+ async newUrl(options) {
172
+ if (this.newUrlFunction || this.proxyUrls) {
173
+ return super.newUrl(options);
246
174
  }
247
- if (this.proxyUrls) {
248
- return this._handleCustomUrl(sessionId) ?? undefined;
249
- }
250
- if (this.tieredProxyUrls) {
251
- return this._handleTieredUrl(sessionId ?? (0, utilities_1.cryptoRandomObjectId)(6), options).proxyUrl ?? undefined;
252
- }
253
- return this.composeDefaultUrl(sessionId);
254
- }
255
- _generateTieredProxyUrls(tieredProxyConfig, globalOptions) {
256
- return tieredProxyConfig.map((config) => [
257
- new ProxyConfiguration({
258
- ...globalOptions,
259
- ...config,
260
- tieredProxyConfig: undefined,
261
- }).composeDefaultUrl(),
262
- ]);
175
+ return this.composeDefaultUrl(cryptoRandomObjectId(SESSION_ID_LENGTH));
263
176
  }
264
177
  /**
265
178
  * Returns proxy username.
266
179
  */
267
180
  _getUsername(sessionId) {
268
- let username;
269
181
  const { groups, countryCode, subdivisionCode } = this;
270
182
  const parts = [];
271
183
  if (groups && groups.length) {
272
184
  parts.push(`groups-${groups.join('+')}`);
273
185
  }
274
- if (sessionId) {
275
- parts.push(`session-${sessionId}`);
276
- }
186
+ parts.push(`session-${sessionId}`);
277
187
  if (subdivisionCode) {
278
188
  parts.push(`country-${countryCode}_${subdivisionCode}`);
279
189
  }
280
190
  else if (countryCode) {
281
191
  parts.push(`country-${countryCode}`);
282
192
  }
283
- username = parts.join(',');
284
- if (parts.length === 0)
285
- username = 'auto';
286
- return username;
193
+ return parts.join(',');
287
194
  }
288
195
  composeDefaultUrl(sessionId) {
289
196
  const username = this._getUsername(sessionId);
@@ -298,15 +205,15 @@ class ProxyConfiguration extends core_1.ProxyConfiguration {
298
205
  */
299
206
  // TODO: Make this private
300
207
  async _setPasswordIfToken() {
301
- const token = this.config.get('token');
208
+ const { token } = this.config;
302
209
  if (!token)
303
210
  return;
304
211
  try {
305
- const user = await actor_js_1.Actor.apifyClient.user().get();
212
+ const user = await Actor.apifyClient.user().get();
306
213
  this.password = user.proxy?.password;
307
214
  }
308
215
  catch (error) {
309
- if (actor_js_1.Actor.isAtHome()) {
216
+ if (Actor.isAtHome()) {
310
217
  throw error;
311
218
  }
312
219
  else {
@@ -336,7 +243,7 @@ class ProxyConfiguration extends core_1.ProxyConfiguration {
336
243
  // Throw only on the platform, locally we just print a warning and run requests without the proxy.
337
244
  // This is because the user might not have set up things correctly yet.
338
245
  // It still fails on the platform, where we don't want to allow this behavior.
339
- if (actor_js_1.Actor.isAtHome()) {
246
+ if (Actor.isAtHome()) {
340
247
  throw new Error(connectionError);
341
248
  }
342
249
  this.log.warning(connectionError);
@@ -346,7 +253,7 @@ class ProxyConfiguration extends core_1.ProxyConfiguration {
346
253
  * Apify Proxy can be down for a second or a minute, but this should not crash processes.
347
254
  */
348
255
  async _fetchStatus() {
349
- const proxyStatusUrl = this.config.get('proxyStatusUrl', 'http://proxy.apify.com');
256
+ const { proxyStatusUrl } = this.config;
350
257
  const requestOpts = {
351
258
  url: `${proxyStatusUrl}/?format=json`,
352
259
  proxyUrl: await this.newUrl(),
@@ -355,7 +262,7 @@ class ProxyConfiguration extends core_1.ProxyConfiguration {
355
262
  };
356
263
  for (let attempt = 1; attempt <= CHECK_ACCESS_MAX_ATTEMPTS; attempt++) {
357
264
  try {
358
- const response = await (0, utils_1.gotScraping)(requestOpts);
265
+ const response = await gotScraping(requestOpts);
359
266
  return response.body;
360
267
  }
361
268
  catch {
@@ -375,4 +282,3 @@ class ProxyConfiguration extends core_1.ProxyConfiguration {
375
282
  '"options.subdivisionCode" or "options.apifyProxySubdivision".');
376
283
  }
377
284
  }
378
- exports.ProxyConfiguration = ProxyConfiguration;
package/dist/storage.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import type { IStorage } from '@crawlee/core';
2
- import type { Constructor, StorageClient } from '@crawlee/types';
1
+ import type { Constructor, IStorage, StorageOpenOptions } from '@crawlee/core';
2
+ import type { StorageClient } from '@crawlee/types';
3
3
  import type { Configuration } from './configuration.js';
4
4
  export interface OpenStorageOptions {
5
5
  /**
@@ -51,6 +51,8 @@ export interface OpenStorageContext {
51
51
  purgedStorageAliases: Set<string>;
52
52
  }
53
53
  /**
54
- * Opens a storage by its identifier, handling alias resolution and local purging.
54
+ * Opens a storage by its identifier, handling Apify alias resolution and local purging.
55
55
  */
56
- export declare function openStorage<T extends IStorage>(storageClass: Constructor<T>, identifier: StorageIdentifier | null | undefined, context: OpenStorageContext): Promise<T>;
56
+ export declare function openStorage<T extends IStorage>(storageClass: Constructor<T> & {
57
+ open(id?: string | null, options?: StorageOpenOptions): Promise<T>;
58
+ }, identifier: StorageIdentifier | null | undefined, context: OpenStorageContext): Promise<T>;
package/dist/storage.js CHANGED
@@ -1,8 +1,4 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.openStorage = openStorage;
4
- const core_1 = require("@crawlee/core");
5
- const apify_client_1 = require("apify-client");
1
+ import { ApifyStorageClient } from './apify_storage_client.js';
6
2
  const STORAGE_TYPE_KEYS = {
7
3
  Dataset: 'datasets',
8
4
  KeyValueStore: 'keyValueStores',
@@ -11,7 +7,7 @@ const STORAGE_TYPE_KEYS = {
11
7
  const parsedStoragesJson = new Map();
12
8
  /**
13
9
  * Resolves a {@link StorageIdentifier} to a plain string ID or name
14
- * that can be passed to Crawlee's `StorageManager.openStorage()`.
10
+ * that can be passed to crawlee v4's `<Storage>.open()`.
15
11
  */
16
12
  function resolveStorageIdentifier(storageType, identifier, config) {
17
13
  if (identifier === null || identifier === undefined) {
@@ -27,8 +23,8 @@ function resolveStorageIdentifier(storageType, identifier, config) {
27
23
  return identifier.name;
28
24
  }
29
25
  // { alias: string }
30
- const storagesJson = config.get('actorStoragesJson');
31
- if (config.get('isAtHome') && storagesJson) {
26
+ const storagesJson = config.actorStoragesJson;
27
+ if (config.isAtHome && storagesJson) {
32
28
  let storages;
33
29
  try {
34
30
  if (!parsedStoragesJson.has(storagesJson)) {
@@ -50,30 +46,34 @@ function resolveStorageIdentifier(storageType, identifier, config) {
50
46
  // When using local storage, just use the alias as a name.
51
47
  // When using platform storage, we can't just make up a name — the alias must be
52
48
  // in ACTOR_STORAGES_JSON.
53
- if (config.get('isAtHome')) {
49
+ if (config.isAtHome) {
54
50
  throw new Error(`Storage alias "${identifier.alias}" cannot be resolved because ACTOR_STORAGES_JSON is not set. ` +
55
51
  `Aliases are only available for storages declared in the Actor's schema.`);
56
52
  }
57
53
  return identifier.alias;
58
54
  }
59
55
  /**
60
- * Opens a storage by its identifier, handling alias resolution and local purging.
56
+ * Opens a storage by its identifier, handling Apify alias resolution and local purging.
61
57
  */
62
- async function openStorage(storageClass, identifier, context) {
58
+ export async function openStorage(storageClass, identifier, context) {
63
59
  const isAlias = identifier !== null && identifier !== undefined && typeof identifier === 'object' && 'alias' in identifier;
64
- if (isAlias && !context.config.get('isAtHome') && context.client instanceof apify_client_1.ApifyClient) {
60
+ if (isAlias && !context.config.isAtHome && context.client instanceof ApifyStorageClient) {
65
61
  throw new Error('The `alias` option is not allowed for Apify-based storages running outside of Apify');
66
62
  }
67
63
  const resolvedIdOrName = resolveStorageIdentifier(storageClass.name, identifier, context.config);
68
64
  // When running locally, purge aliased storages on first open
69
- // (similar to how Crawlee purges default storages on start)
65
+ // (similar to how crawlee purges default storages on start).
70
66
  if (isAlias &&
71
- !context.config.get('isAtHome') &&
72
- context.config.get('purgeOnStart') &&
67
+ !context.config.isAtHome &&
68
+ context.config.purgeOnStart &&
73
69
  !context.purgedStorageAliases.has(identifier.alias)) {
74
70
  context.purgedStorageAliases.add(identifier.alias);
75
- const existingStorage = await core_1.StorageManager.openStorage(storageClass, resolvedIdOrName, context.client, context.config);
71
+ const existingStorage = await storageClass.open(resolvedIdOrName ?? null, {
72
+ storageClient: context.client,
73
+ });
76
74
  await existingStorage.drop();
77
75
  }
78
- return core_1.StorageManager.openStorage(storageClass, resolvedIdOrName, context.client, context.config);
76
+ return storageClass.open(resolvedIdOrName ?? null, {
77
+ storageClient: context.client,
78
+ });
79
79
  }
package/dist/utils.d.ts CHANGED
@@ -13,3 +13,8 @@ export declare function getSystemInfo(): {
13
13
  * @internal
14
14
  */
15
15
  export declare function checkCrawleeVersion(): void;
16
+ /**
17
+ * Prints a warning if this version of Apify SDK is outdated.
18
+ * @ignore
19
+ */
20
+ export declare function printOutdatedSdkWarning(): void;
package/dist/utils.js CHANGED
@@ -1,33 +1,36 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.getSystemInfo = getSystemInfo;
4
- exports.checkCrawleeVersion = checkCrawleeVersion;
5
- const node_os_1 = require("node:os");
6
- const node_path_1 = require("node:path");
1
+ import { existsSync } from 'node:fs';
2
+ import { createRequire } from 'node:module';
3
+ import { type } from 'node:os';
4
+ import { normalize } from 'node:path';
7
5
  // @ts-ignore if we enable resolveJsonModule, we end up with `src` folder in `dist`
8
- const package_json_1 = require("@crawlee/core/package.json");
6
+ import crawleePkgJson from '@crawlee/core/package.json' with { type: 'json' };
9
7
  // @ts-ignore if we enable resolveJsonModule, we end up with `src` folder in `dist`
10
- const package_json_2 = require("apify-client/package.json");
11
- const fs_extra_1 = require("fs-extra");
8
+ import apifyClientPkgJson from 'apify-client/package.json' with { type: 'json' };
9
+ // eslint-disable-next-line import/extensions
10
+ import { readJSONSync } from 'fs-extra/esm';
11
+ import semver from 'semver';
12
+ import { APIFY_ENV_VARS } from '@apify/consts';
13
+ import log from '@apify/log';
12
14
  // @ts-ignore if we enable resolveJsonModule, we end up with `src` folder in `dist`
13
- const package_json_3 = require("../package.json");
15
+ import apifyPkgJson from '../package.json' with { type: 'json' };
16
+ const require = createRequire(import.meta.url);
14
17
  /**
15
18
  * Gets info about system, node version and apify package version.
16
19
  * @internal
17
20
  */
18
- function getSystemInfo() {
21
+ export function getSystemInfo() {
19
22
  return {
20
- apifyVersion: package_json_3.version,
21
- apifyClientVersion: package_json_2.version,
22
- crawleeVersion: package_json_1.version,
23
- osType: (0, node_os_1.type)(),
23
+ apifyVersion: apifyPkgJson.version,
24
+ apifyClientVersion: apifyClientPkgJson.version,
25
+ crawleeVersion: crawleePkgJson.version,
26
+ osType: type(),
24
27
  nodeVersion: process.version,
25
28
  };
26
29
  }
27
30
  /**
28
31
  * @internal
29
32
  */
30
- function checkCrawleeVersion() {
33
+ export function checkCrawleeVersion() {
31
34
  const resolve = (name) => {
32
35
  try {
33
36
  return require.resolve(name);
@@ -38,28 +41,41 @@ function checkCrawleeVersion() {
38
41
  };
39
42
  const paths = [
40
43
  // when users install `crawlee` package, we need to check its core dependency
41
- (0, node_path_1.normalize)(`${process.cwd()}/node_modules/crawlee/node_modules/@crawlee/core/package.json`),
44
+ normalize(`${process.cwd()}/node_modules/crawlee/node_modules/@crawlee/core/package.json`),
42
45
  // when users install `@crawlee/cheerio` or other crawler package, we need to check the dependency under basic crawler package
43
- (0, node_path_1.normalize)(`${process.cwd()}/node_modules/@crawlee/basic/node_modules/@crawlee/core/package.json`),
46
+ normalize(`${process.cwd()}/node_modules/@crawlee/basic/node_modules/@crawlee/core/package.json`),
44
47
  // also check paths via `require.resolve` to support pnpm
45
48
  resolve('crawlee/package.json'),
46
49
  resolve('@crawlee/basic/package.json'),
47
50
  ];
48
51
  for (const path of paths) {
49
52
  // ignore unresolved paths or paths that are not in the project directory
50
- if (!(0, fs_extra_1.pathExistsSync)(path) || !path.startsWith(process.cwd())) {
53
+ if (!existsSync(path) || !path.startsWith(process.cwd())) {
51
54
  continue;
52
55
  }
53
56
  let version;
54
57
  try {
55
- version = (0, fs_extra_1.readJSONSync)(path).version;
58
+ version = readJSONSync(path).version;
56
59
  }
57
60
  catch {
58
61
  //
59
62
  }
60
- if (version != null && version !== package_json_1.version) {
61
- const details = `User installed version (${version}) found in ${path}.\nSDK uses ${package_json_1.version} from ${require.resolve('@crawlee/core')}`;
62
- throw new Error(`Detected incompatible Crawlee version used by the SDK. User installed ${version} but the SDK uses ${package_json_1.version}.\n\n${details}`);
63
+ if (version != null && version !== crawleePkgJson.version) {
64
+ const details = `User installed version (${version}) found in ${path}.\nSDK uses ${crawleePkgJson.version} from ${require.resolve('@crawlee/core')}`;
65
+ throw new Error(`Detected incompatible Crawlee version used by the SDK. User installed ${version} but the SDK uses ${crawleePkgJson.version}.\n\n${details}`);
63
66
  }
64
67
  }
65
68
  }
69
+ /**
70
+ * Prints a warning if this version of Apify SDK is outdated.
71
+ * @ignore
72
+ */
73
+ export function printOutdatedSdkWarning() {
74
+ if (process.env[APIFY_ENV_VARS.DISABLE_OUTDATED_WARNING])
75
+ return;
76
+ const latestApifyVersion = process.env[APIFY_ENV_VARS.SDK_LATEST_VERSION];
77
+ if (!latestApifyVersion || !semver.lt(apifyPkgJson.version, latestApifyVersion))
78
+ return;
79
+ log.warning(`You are using an outdated version (${apifyPkgJson.version}) of Apify SDK. We recommend you to update to the latest version (${latestApifyVersion}).
80
+ Read more about Apify SDK versioning at: https://help.apify.com/en/articles/3184510-updates-and-versioning-of-apify-sdk`);
81
+ }
package/package.json CHANGED
@@ -1,18 +1,17 @@
1
1
  {
2
2
  "name": "apify",
3
- "version": "3.7.3-beta.9",
3
+ "version": "4.0.0-beta.13",
4
4
  "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
5
5
  "engines": {
6
- "node": ">=16.0.0"
6
+ "node": ">=22.0.0"
7
7
  },
8
+ "type": "module",
8
9
  "main": "./dist/index.js",
9
- "module": "./dist/index.mjs",
10
10
  "types": "./dist/index.d.ts",
11
11
  "exports": {
12
12
  ".": {
13
- "import": "./dist/index.mjs",
14
- "require": "./dist/index.js",
15
- "types": "./dist/index.d.ts"
13
+ "types": "./dist/index.d.ts",
14
+ "default": "./dist/index.js"
16
15
  },
17
16
  "./package.json": "./package.json"
18
17
  },
@@ -59,20 +58,23 @@
59
58
  "@apify/log": "^2.4.3",
60
59
  "@apify/timeout": "^0.3.0",
61
60
  "@apify/utilities": "^2.13.0",
62
- "@crawlee/core": "^3.14.1",
63
- "@crawlee/types": "^3.14.1",
64
- "@crawlee/utils": "^3.14.1",
61
+ "@crawlee/core": "^4.0.0-beta.61",
62
+ "@crawlee/types": "^4.0.0-beta.61",
63
+ "@crawlee/utils": "^4.0.0-beta.61",
65
64
  "apify-client": "^2.17.0",
66
65
  "fs-extra": "^11.2.0",
67
- "ow": "^0.28.2",
66
+ "got-scraping": "^4.2.1",
67
+ "ow": "^2.0.0",
68
68
  "semver": "^7.5.4",
69
69
  "tslib": "^2.6.2",
70
- "ws": "^8.18.0"
70
+ "ws": "^8.18.0",
71
+ "zod": "^3.24.0 || ^4.0.0"
71
72
  },
72
73
  "devDependencies": {
73
74
  "@apify/oxlint-config": "^0.2.5",
74
75
  "@apify/tsconfig": "^0.1.2",
75
76
  "@commitlint/config-conventional": "^21.0.0",
77
+ "@crawlee/memory-storage": "^4.0.0-beta.61",
76
78
  "@playwright/browser-chromium": "^1.60.0",
77
79
  "@types/content-type": "^1.1.8",
78
80
  "@types/fs-extra": "^11.0.4",
@@ -81,13 +83,12 @@
81
83
  "@types/tough-cookie": "^4.0.5",
82
84
  "@types/ws": "^8.5.12",
83
85
  "commitlint": "^21.0.0",
84
- "crawlee": "^3.13.5",
85
- "gen-esm-wrapper": "^1.1.3",
86
+ "crawlee": "^4.0.0-beta.61",
86
87
  "globby": "^16.0.0",
87
88
  "husky": "^9.1.7",
88
89
  "lint-staged": "^17.0.0",
89
90
  "oxfmt": "0.52.0",
90
- "oxlint": "1.68.0",
91
+ "oxlint": "1.67.0",
91
92
  "oxlint-tsgolint": "0.22.0",
92
93
  "playwright": "^1.60.0",
93
94
  "puppeteer": "^25.0.0",
@@ -106,7 +107,7 @@
106
107
  },
107
108
  "scripts": {
108
109
  "clean": "rimraf ./dist ./tsconfig.build.tsbuildinfo",
109
- "compile": "tsc -p tsconfig.build.json && gen-esm-wrapper ./dist/index.js ./dist/index.mjs",
110
+ "compile": "tsc -p tsconfig.build.json",
110
111
  "fixApifyExport": "node ./scripts/temp_fix_apify_exports.mjs",
111
112
  "build": "pnpm clean && pnpm compile && pnpm fixApifyExport",
112
113
  "ci:build": "pnpm build",