apify 4.0.0-beta.17 → 4.0.0-beta.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/actor.d.ts +13 -19
- package/dist/actor.js +36 -48
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/proxy_configuration.d.ts +16 -5
- package/dist/proxy_configuration.js +57 -19
- package/dist/utils.d.ts +27 -0
- package/dist/utils.js +90 -4
- package/package.json +2 -6
package/dist/actor.d.ts
CHANGED
|
@@ -379,24 +379,22 @@ export declare class Actor<Data extends Dictionary = Dictionary> {
|
|
|
379
379
|
*
|
|
380
380
|
* If the user function returns a promise, it is considered asynchronous:
|
|
381
381
|
* ```js
|
|
382
|
-
* import { gotScraping } from 'got-scraping';
|
|
383
|
-
*
|
|
384
382
|
* await Actor.main(() => {
|
|
385
383
|
* // My asynchronous function that returns a promise
|
|
386
|
-
* return
|
|
387
|
-
*
|
|
388
|
-
*
|
|
384
|
+
* return fetch('http://www.example.com')
|
|
385
|
+
* .then((response) => response.text())
|
|
386
|
+
* .then((html) => {
|
|
387
|
+
* console.log(html);
|
|
388
|
+
* });
|
|
389
389
|
* });
|
|
390
390
|
* ```
|
|
391
391
|
*
|
|
392
392
|
* To simplify your code, you can take advantage of the `async`/`await` keywords:
|
|
393
393
|
*
|
|
394
394
|
* ```js
|
|
395
|
-
* import { gotScraping } from 'got-scraping';
|
|
396
|
-
*
|
|
397
395
|
* await Actor.main(async () => {
|
|
398
396
|
* // My asynchronous function
|
|
399
|
-
* const html = await
|
|
397
|
+
* const html = await fetch('http://www.example.com').then((response) => response.text());
|
|
400
398
|
* console.log(html);
|
|
401
399
|
* });
|
|
402
400
|
* ```
|
|
@@ -888,24 +886,22 @@ export declare class Actor<Data extends Dictionary = Dictionary> {
|
|
|
888
886
|
*
|
|
889
887
|
* If the user function returns a promise, it is considered asynchronous:
|
|
890
888
|
* ```js
|
|
891
|
-
* import { gotScraping } from 'got-scraping';
|
|
892
|
-
*
|
|
893
889
|
* await Actor.main(() => {
|
|
894
890
|
* // My asynchronous function that returns a promise
|
|
895
|
-
* return
|
|
896
|
-
*
|
|
897
|
-
*
|
|
891
|
+
* return fetch('http://www.example.com')
|
|
892
|
+
* .then((response) => response.text())
|
|
893
|
+
* .then((html) => {
|
|
894
|
+
* console.log(html);
|
|
895
|
+
* });
|
|
898
896
|
* });
|
|
899
897
|
* ```
|
|
900
898
|
*
|
|
901
899
|
* To simplify your code, you can take advantage of the `async`/`await` keywords:
|
|
902
900
|
*
|
|
903
901
|
* ```js
|
|
904
|
-
* import { gotScraping } from 'got-scraping';
|
|
905
|
-
*
|
|
906
902
|
* await Actor.main(async () => {
|
|
907
903
|
* // My asynchronous function
|
|
908
|
-
* const html = await
|
|
904
|
+
* const html = await fetch('http://www.example.com').then((response) => response.text());
|
|
909
905
|
* console.log(html);
|
|
910
906
|
* });
|
|
911
907
|
* ```
|
|
@@ -931,11 +927,9 @@ export declare class Actor<Data extends Dictionary = Dictionary> {
|
|
|
931
927
|
* behavior by setting `options.gracefulShutdown` to `false`.
|
|
932
928
|
*
|
|
933
929
|
* ```js
|
|
934
|
-
* import { gotScraping } from 'got-scraping';
|
|
935
|
-
*
|
|
936
930
|
* await Actor.init();
|
|
937
931
|
*
|
|
938
|
-
* const html = await
|
|
932
|
+
* const html = await fetch('http://www.example.com').then((response) => response.text());
|
|
939
933
|
* console.log(html);
|
|
940
934
|
*
|
|
941
935
|
* await Actor.exit();
|
package/dist/actor.js
CHANGED
|
@@ -2,7 +2,7 @@ import { createPrivateKey } from 'node:crypto';
|
|
|
2
2
|
import { Dataset, purgeDefaultStorages, RequestQueue, serviceLocator } from '@crawlee/core';
|
|
3
3
|
import { sleep, snakeCaseToCamelCase } from '@crawlee/utils';
|
|
4
4
|
import { ApifyClient } from 'apify-client';
|
|
5
|
-
import
|
|
5
|
+
import { z } from 'zod';
|
|
6
6
|
import { ACTOR_ENV_VARS, ACTOR_EVENT_NAMES, APIFY_ENV_VARS, INTEGER_ENV_VARS, } from '@apify/consts';
|
|
7
7
|
import { decryptInputSecrets } from '@apify/input_secrets';
|
|
8
8
|
import log from '@apify/log';
|
|
@@ -15,7 +15,7 @@ import { KeyValueStore } from './key_value_store.js';
|
|
|
15
15
|
import { PlatformEventManager } from './platform_event_manager.js';
|
|
16
16
|
import { ProxyConfiguration } from './proxy_configuration.js';
|
|
17
17
|
import { openStorage } from './storage.js';
|
|
18
|
-
import { checkCrawleeVersion, getSystemInfo, printOutdatedSdkWarning } from './utils.js';
|
|
18
|
+
import { checkCrawleeVersion, getSystemInfo, isNonEmptyObject, printOutdatedSdkWarning, validate } from './utils.js';
|
|
19
19
|
/**
|
|
20
20
|
* Exit codes for the Actor process.
|
|
21
21
|
* The error codes must be in the range 1-128, to avoid collision with signal exits
|
|
@@ -139,24 +139,22 @@ export class Actor {
|
|
|
139
139
|
*
|
|
140
140
|
* If the user function returns a promise, it is considered asynchronous:
|
|
141
141
|
* ```js
|
|
142
|
-
* import { gotScraping } from 'got-scraping';
|
|
143
|
-
*
|
|
144
142
|
* await Actor.main(() => {
|
|
145
143
|
* // My asynchronous function that returns a promise
|
|
146
|
-
* return
|
|
147
|
-
*
|
|
148
|
-
*
|
|
144
|
+
* return fetch('http://www.example.com')
|
|
145
|
+
* .then((response) => response.text())
|
|
146
|
+
* .then((html) => {
|
|
147
|
+
* console.log(html);
|
|
148
|
+
* });
|
|
149
149
|
* });
|
|
150
150
|
* ```
|
|
151
151
|
*
|
|
152
152
|
* To simplify your code, you can take advantage of the `async`/`await` keywords:
|
|
153
153
|
*
|
|
154
154
|
* ```js
|
|
155
|
-
* import { gotScraping } from 'got-scraping';
|
|
156
|
-
*
|
|
157
155
|
* await Actor.main(async () => {
|
|
158
156
|
* // My asynchronous function
|
|
159
|
-
* const html = await
|
|
157
|
+
* const html = await fetch('http://www.example.com').then((response) => response.text());
|
|
160
158
|
* console.log(html);
|
|
161
159
|
* });
|
|
162
160
|
* ```
|
|
@@ -534,18 +532,20 @@ export class Actor {
|
|
|
534
532
|
* @ignore
|
|
535
533
|
*/
|
|
536
534
|
async addWebhook(options) {
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
535
|
+
validate(z
|
|
536
|
+
.object({
|
|
537
|
+
eventTypes: z.array(z.string()),
|
|
538
|
+
requestUrl: z.string(),
|
|
539
|
+
payloadTemplate: z.string().optional(),
|
|
540
|
+
idempotencyKey: z.string().optional(),
|
|
541
|
+
headersTemplate: z.string().optional(),
|
|
542
|
+
description: z.string().optional(),
|
|
543
|
+
ignoreSslErrors: z.boolean().optional(),
|
|
544
|
+
doNotRetry: z.boolean().optional(),
|
|
545
|
+
shouldInterpolateStrings: z.boolean().optional(),
|
|
546
|
+
isApifyIntegration: z.boolean().optional(),
|
|
547
|
+
})
|
|
548
|
+
.strict(), options);
|
|
549
549
|
if (!this.isAtHome()) {
|
|
550
550
|
log.warning('Actor.addWebhook() is only supported when running on the Apify platform. The webhook will not be invoked.');
|
|
551
551
|
return undefined;
|
|
@@ -571,8 +571,8 @@ export class Actor {
|
|
|
571
571
|
*/
|
|
572
572
|
async setStatusMessage(statusMessage, options) {
|
|
573
573
|
const { isStatusMessageTerminal, level } = options || {};
|
|
574
|
-
|
|
575
|
-
|
|
574
|
+
validate(z.string(), statusMessage);
|
|
575
|
+
validate(z.boolean().optional(), isStatusMessageTerminal);
|
|
576
576
|
this._ensureActorInit('setStatusMessage');
|
|
577
577
|
const loggedStatusMessage = `[Status message]: ${statusMessage}`;
|
|
578
578
|
switch (level) {
|
|
@@ -665,9 +665,7 @@ export class Actor {
|
|
|
665
665
|
* @ignore
|
|
666
666
|
*/
|
|
667
667
|
async openDataset(datasetIdOrName, options = {}) {
|
|
668
|
-
|
|
669
|
-
forceCloud: ow.optional.boolean,
|
|
670
|
-
}));
|
|
668
|
+
validate(z.object({ forceCloud: z.boolean().optional() }).strict(), options);
|
|
671
669
|
this._ensureActorInit('openDataset');
|
|
672
670
|
return this._openStorage(Dataset, datasetIdOrName, options);
|
|
673
671
|
}
|
|
@@ -774,16 +772,14 @@ export class Actor {
|
|
|
774
772
|
const { inputSecretsPrivateKeyFile, inputSecretsPrivateKeyPassphrase } = this.config;
|
|
775
773
|
const rawInput = await this.getValue(this.config.inputKey);
|
|
776
774
|
let input = rawInput;
|
|
777
|
-
if (
|
|
778
|
-
inputSecretsPrivateKeyFile &&
|
|
779
|
-
inputSecretsPrivateKeyPassphrase) {
|
|
775
|
+
if (isNonEmptyObject(rawInput) && inputSecretsPrivateKeyFile && inputSecretsPrivateKeyPassphrase) {
|
|
780
776
|
const privateKey = createPrivateKey({
|
|
781
777
|
key: Buffer.from(inputSecretsPrivateKeyFile, 'base64'),
|
|
782
778
|
passphrase: inputSecretsPrivateKeyPassphrase,
|
|
783
779
|
});
|
|
784
780
|
input = decryptInputSecrets({ input: rawInput, privateKey });
|
|
785
781
|
}
|
|
786
|
-
if (
|
|
782
|
+
if (isNonEmptyObject(input) && !Buffer.isBuffer(input)) {
|
|
787
783
|
input = await this.inferDefaultsFromInputSchema(input);
|
|
788
784
|
}
|
|
789
785
|
return input;
|
|
@@ -816,9 +812,7 @@ export class Actor {
|
|
|
816
812
|
* @ignore
|
|
817
813
|
*/
|
|
818
814
|
async openKeyValueStore(storeIdOrName, options = {}) {
|
|
819
|
-
|
|
820
|
-
forceCloud: ow.optional.boolean,
|
|
821
|
-
}));
|
|
815
|
+
validate(z.object({ forceCloud: z.boolean().optional() }).strict(), options);
|
|
822
816
|
this._ensureActorInit('openKeyValueStore');
|
|
823
817
|
return this._openStorage(KeyValueStore, storeIdOrName, options);
|
|
824
818
|
}
|
|
@@ -841,9 +835,7 @@ export class Actor {
|
|
|
841
835
|
* @ignore
|
|
842
836
|
*/
|
|
843
837
|
async openRequestQueue(queueIdOrName, options = {}) {
|
|
844
|
-
|
|
845
|
-
forceCloud: ow.optional.boolean,
|
|
846
|
-
}));
|
|
838
|
+
validate(z.object({ forceCloud: z.boolean().optional() }).strict(), options);
|
|
847
839
|
this._ensureActorInit('openRequestQueue');
|
|
848
840
|
const queue = await this._openStorage(RequestQueue, queueIdOrName, options);
|
|
849
841
|
// eslint-disable-next-line dot-notation
|
|
@@ -1070,24 +1062,22 @@ export class Actor {
|
|
|
1070
1062
|
*
|
|
1071
1063
|
* If the user function returns a promise, it is considered asynchronous:
|
|
1072
1064
|
* ```js
|
|
1073
|
-
* import { gotScraping } from 'got-scraping';
|
|
1074
|
-
*
|
|
1075
1065
|
* await Actor.main(() => {
|
|
1076
1066
|
* // My asynchronous function that returns a promise
|
|
1077
|
-
* return
|
|
1078
|
-
*
|
|
1079
|
-
*
|
|
1067
|
+
* return fetch('http://www.example.com')
|
|
1068
|
+
* .then((response) => response.text())
|
|
1069
|
+
* .then((html) => {
|
|
1070
|
+
* console.log(html);
|
|
1071
|
+
* });
|
|
1080
1072
|
* });
|
|
1081
1073
|
* ```
|
|
1082
1074
|
*
|
|
1083
1075
|
* To simplify your code, you can take advantage of the `async`/`await` keywords:
|
|
1084
1076
|
*
|
|
1085
1077
|
* ```js
|
|
1086
|
-
* import { gotScraping } from 'got-scraping';
|
|
1087
|
-
*
|
|
1088
1078
|
* await Actor.main(async () => {
|
|
1089
1079
|
* // My asynchronous function
|
|
1090
|
-
* const html = await
|
|
1080
|
+
* const html = await fetch('http://www.example.com').then((response) => response.text());
|
|
1091
1081
|
* console.log(html);
|
|
1092
1082
|
* });
|
|
1093
1083
|
* ```
|
|
@@ -1115,11 +1105,9 @@ export class Actor {
|
|
|
1115
1105
|
* behavior by setting `options.gracefulShutdown` to `false`.
|
|
1116
1106
|
*
|
|
1117
1107
|
* ```js
|
|
1118
|
-
* import { gotScraping } from 'got-scraping';
|
|
1119
|
-
*
|
|
1120
1108
|
* await Actor.init();
|
|
1121
1109
|
*
|
|
1122
|
-
* const html = await
|
|
1110
|
+
* const html = await fetch('http://www.example.com').then((response) => response.text());
|
|
1123
1111
|
* console.log(html);
|
|
1124
1112
|
*
|
|
1125
1113
|
* await Actor.exit();
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
export * from './actor.js';
|
|
2
2
|
export { ApifyStorageClient } from './apify_storage_client.js';
|
|
3
|
+
export { ArgumentValidationError } from './utils.js';
|
|
3
4
|
export type { OpenStorageOptions, StorageAlias, StorageId, StorageName, StorageIdentifier, StorageIdentifierWithoutAlias, } from './storage.js';
|
|
4
5
|
export { ChargeOptions, ChargeResult, ActorPricingInfo, ChargingManager } from './charging.js';
|
|
5
6
|
export * from './configuration.js';
|
package/dist/index.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
export * from './actor.js';
|
|
2
2
|
export { ApifyStorageClient } from './apify_storage_client.js';
|
|
3
|
+
export { ArgumentValidationError } from './utils.js';
|
|
3
4
|
export { ChargingManager } from './charging.js';
|
|
4
5
|
export * from './configuration.js';
|
|
5
6
|
export * from './proxy_configuration.js';
|
|
@@ -2,6 +2,12 @@ import type { ProxyConfigurationOptions as CoreProxyConfigurationOptions } from
|
|
|
2
2
|
import { ProxyConfiguration as CoreProxyConfiguration } from '@crawlee/core';
|
|
3
3
|
import type { ProxyInfo as CoreProxyInfo } from '@crawlee/types';
|
|
4
4
|
import { Configuration } from './configuration.js';
|
|
5
|
+
/** Response of the Apify Proxy status endpoint (`proxy.apify.com/?format=json`). */
|
|
6
|
+
interface ProxyStatus {
|
|
7
|
+
connected: boolean;
|
|
8
|
+
connectionError: string;
|
|
9
|
+
isManInTheMiddle: boolean;
|
|
10
|
+
}
|
|
5
11
|
type NewUrlOptions = Parameters<CoreProxyConfiguration['newProxyInfo']>[0];
|
|
6
12
|
export interface ProxyConfigurationOptions extends CoreProxyConfigurationOptions {
|
|
7
13
|
/**
|
|
@@ -198,11 +204,16 @@ export declare class ProxyConfiguration extends CoreProxyConfiguration {
|
|
|
198
204
|
/**
|
|
199
205
|
* Apify Proxy can be down for a second or a minute, but this should not crash processes.
|
|
200
206
|
*/
|
|
201
|
-
protected _fetchStatus(): Promise<
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
207
|
+
protected _fetchStatus(): Promise<ProxyStatus | undefined>;
|
|
208
|
+
/**
|
|
209
|
+
* Fetches the Apify Proxy status endpoint once, *through* the proxy, so the
|
|
210
|
+
* response reports on this exact connection (auth + man-in-the-middle).
|
|
211
|
+
*
|
|
212
|
+
* Uses a native `node:http` forward-proxy request — an absolute request URL
|
|
213
|
+
* plus a `Proxy-Authorization` header — so no proxy-agent dependency is
|
|
214
|
+
* needed. The status endpoint (`http://proxy.apify.com`) is plain HTTP.
|
|
215
|
+
*/
|
|
216
|
+
protected _requestStatus(statusUrl: string, proxyUrl: string): Promise<ProxyStatus>;
|
|
206
217
|
/**
|
|
207
218
|
* Throws cannot combine custom proxies with Apify Proxy
|
|
208
219
|
* @internal
|
|
@@ -1,10 +1,13 @@
|
|
|
1
|
+
import { once } from 'node:events';
|
|
2
|
+
import { request as httpRequest } from 'node:http';
|
|
3
|
+
import { json } from 'node:stream/consumers';
|
|
1
4
|
import { ProxyConfiguration as CoreProxyConfiguration } from '@crawlee/core';
|
|
2
|
-
import {
|
|
3
|
-
import ow from 'ow';
|
|
5
|
+
import { z } from 'zod';
|
|
4
6
|
import { APIFY_ENV_VARS, APIFY_PROXY_VALUE_REGEX } from '@apify/consts';
|
|
5
7
|
import { cryptoRandomObjectId } from '@apify/utilities';
|
|
6
8
|
import { Actor } from './actor.js';
|
|
7
9
|
import { Configuration } from './configuration.js';
|
|
10
|
+
import { validate } from './utils.js';
|
|
8
11
|
const CHECK_ACCESS_REQUEST_TIMEOUT_MILLIS = 4_000;
|
|
9
12
|
const CHECK_ACCESS_MAX_ATTEMPTS = 2;
|
|
10
13
|
const COUNTRY_CODE_REGEX = /^[A-Z]{2}$/;
|
|
@@ -67,15 +70,17 @@ export class ProxyConfiguration extends CoreProxyConfiguration {
|
|
|
67
70
|
['validateRequired']: false,
|
|
68
71
|
});
|
|
69
72
|
this.config = config;
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
73
|
+
validate(z
|
|
74
|
+
.object({
|
|
75
|
+
groups: z.array(z.string().regex(APIFY_PROXY_VALUE_REGEX)).optional(),
|
|
76
|
+
apifyProxyGroups: z.array(z.string().regex(APIFY_PROXY_VALUE_REGEX)).optional(),
|
|
77
|
+
countryCode: z.string().regex(COUNTRY_CODE_REGEX).optional(),
|
|
78
|
+
apifyProxyCountry: z.string().regex(COUNTRY_CODE_REGEX).optional(),
|
|
79
|
+
subdivisionCode: z.string().regex(SUBDIVISION_CODE_REGEX).optional(),
|
|
80
|
+
apifyProxySubdivision: z.string().regex(SUBDIVISION_CODE_REGEX).optional(),
|
|
81
|
+
password: z.string().optional(),
|
|
82
|
+
})
|
|
83
|
+
.strict(), rest);
|
|
79
84
|
const { groups = [], apifyProxyGroups = [], countryCode, apifyProxyCountry, subdivisionCode, apifyProxySubdivision, password = config.proxyPassword, } = options;
|
|
80
85
|
const groupsToUse = groups.length ? groups : apifyProxyGroups;
|
|
81
86
|
const countryCodeToUse = countryCode || apifyProxyCountry;
|
|
@@ -254,16 +259,14 @@ export class ProxyConfiguration extends CoreProxyConfiguration {
|
|
|
254
259
|
*/
|
|
255
260
|
async _fetchStatus() {
|
|
256
261
|
const { proxyStatusUrl } = this.config;
|
|
257
|
-
const
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
};
|
|
262
|
+
const statusUrl = `${proxyStatusUrl}/?format=json`;
|
|
263
|
+
const proxyUrl = await this.newUrl();
|
|
264
|
+
// Without a proxy URL we can't perform the (proxied) status check.
|
|
265
|
+
if (!proxyUrl)
|
|
266
|
+
return undefined;
|
|
263
267
|
for (let attempt = 1; attempt <= CHECK_ACCESS_MAX_ATTEMPTS; attempt++) {
|
|
264
268
|
try {
|
|
265
|
-
|
|
266
|
-
return response.body;
|
|
269
|
+
return await this._requestStatus(statusUrl, proxyUrl);
|
|
267
270
|
}
|
|
268
271
|
catch {
|
|
269
272
|
// retry connection errors
|
|
@@ -271,6 +274,41 @@ export class ProxyConfiguration extends CoreProxyConfiguration {
|
|
|
271
274
|
}
|
|
272
275
|
return undefined;
|
|
273
276
|
}
|
|
277
|
+
/**
|
|
278
|
+
* Fetches the Apify Proxy status endpoint once, *through* the proxy, so the
|
|
279
|
+
* response reports on this exact connection (auth + man-in-the-middle).
|
|
280
|
+
*
|
|
281
|
+
* Uses a native `node:http` forward-proxy request — an absolute request URL
|
|
282
|
+
* plus a `Proxy-Authorization` header — so no proxy-agent dependency is
|
|
283
|
+
* needed. The status endpoint (`http://proxy.apify.com`) is plain HTTP.
|
|
284
|
+
*/
|
|
285
|
+
async _requestStatus(statusUrl, proxyUrl) {
|
|
286
|
+
const target = new URL(statusUrl);
|
|
287
|
+
const proxy = new URL(proxyUrl);
|
|
288
|
+
const headers = { host: target.host };
|
|
289
|
+
if (proxy.username) {
|
|
290
|
+
const credentials = `${decodeURIComponent(proxy.username)}:${decodeURIComponent(proxy.password)}`;
|
|
291
|
+
headers['proxy-authorization'] = `Basic ${Buffer.from(credentials).toString('base64')}`;
|
|
292
|
+
}
|
|
293
|
+
const request = httpRequest({
|
|
294
|
+
host: proxy.hostname,
|
|
295
|
+
port: proxy.port,
|
|
296
|
+
// Absolute-form request URI tells the proxy to forward the request.
|
|
297
|
+
path: target.href,
|
|
298
|
+
headers,
|
|
299
|
+
signal: AbortSignal.timeout(CHECK_ACCESS_REQUEST_TIMEOUT_MILLIS),
|
|
300
|
+
});
|
|
301
|
+
request.end();
|
|
302
|
+
// `once` rejects if the request emits `error` first (connection refused,
|
|
303
|
+
// timeout/abort), so failures propagate to the retry loop in `_fetchStatus`.
|
|
304
|
+
const [response] = (await once(request, 'response'));
|
|
305
|
+
const statusCode = response.statusCode ?? 0;
|
|
306
|
+
if (statusCode < 200 || statusCode >= 300) {
|
|
307
|
+
response.resume(); // drain so the socket can be freed
|
|
308
|
+
throw new Error(`Apify Proxy status check responded with status code ${statusCode}.`);
|
|
309
|
+
}
|
|
310
|
+
return (await json(response));
|
|
311
|
+
}
|
|
274
312
|
/**
|
|
275
313
|
* Throws cannot combine custom proxies with Apify Proxy
|
|
276
314
|
* @internal
|
package/dist/utils.d.ts
CHANGED
|
@@ -1,3 +1,30 @@
|
|
|
1
|
+
import type { z } from 'zod';
|
|
2
|
+
/**
|
|
3
|
+
* Returns `true` for a plain, non-empty object (not `null`, not an array).
|
|
4
|
+
* Mirrors the `ow.object.nonEmpty` predicate the SDK used previously.
|
|
5
|
+
* @internal
|
|
6
|
+
*/
|
|
7
|
+
export declare function isNonEmptyObject(value: unknown): value is Record<string, unknown>;
|
|
8
|
+
/**
|
|
9
|
+
* Error thrown when an argument fails validation (e.g. by `Actor.addWebhook()`
|
|
10
|
+
* or the `ProxyConfiguration` constructor).
|
|
11
|
+
*
|
|
12
|
+
* Its `message` is a human-readable sentence naming the offending field and the
|
|
13
|
+
* value it received (see {@link formatZodError}) — not a raw JSON dump. The
|
|
14
|
+
* structured zod {@link https://zod.dev | zod} issues are available on `issues`
|
|
15
|
+
* (and the original `ZodError` on `cause`) for programmatic inspection.
|
|
16
|
+
*/
|
|
17
|
+
export declare class ArgumentValidationError extends Error {
|
|
18
|
+
/** Structured issues from the underlying schema check. */
|
|
19
|
+
readonly issues: z.ZodError['issues'];
|
|
20
|
+
constructor(error: z.ZodError, value: unknown);
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Validates `value` against a zod `schema`, returning the parsed value, or
|
|
24
|
+
* throwing an {@link ArgumentValidationError} if it doesn't match.
|
|
25
|
+
* @internal
|
|
26
|
+
*/
|
|
27
|
+
export declare function validate<Schema extends z.ZodType>(schema: Schema, value: unknown): z.infer<Schema>;
|
|
1
28
|
/**
|
|
2
29
|
* Gets info about system, node version and apify package version.
|
|
3
30
|
* @internal
|
package/dist/utils.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { existsSync } from 'node:fs';
|
|
1
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
2
2
|
import { createRequire } from 'node:module';
|
|
3
3
|
import { type } from 'node:os';
|
|
4
4
|
import { normalize } from 'node:path';
|
|
@@ -6,14 +6,100 @@ import { normalize } from 'node:path';
|
|
|
6
6
|
import crawleePkgJson from '@crawlee/core/package.json' with { type: 'json' };
|
|
7
7
|
// @ts-ignore if we enable resolveJsonModule, we end up with `src` folder in `dist`
|
|
8
8
|
import apifyClientPkgJson from 'apify-client/package.json' with { type: 'json' };
|
|
9
|
-
// eslint-disable-next-line import/extensions
|
|
10
|
-
import { readJSONSync } from 'fs-extra/esm';
|
|
11
9
|
import semver from 'semver';
|
|
12
10
|
import { APIFY_ENV_VARS } from '@apify/consts';
|
|
13
11
|
import log from '@apify/log';
|
|
14
12
|
// @ts-ignore if we enable resolveJsonModule, we end up with `src` folder in `dist`
|
|
15
13
|
import apifyPkgJson from '../package.json' with { type: 'json' };
|
|
16
14
|
const require = createRequire(import.meta.url);
|
|
15
|
+
/**
|
|
16
|
+
* Returns `true` for a plain, non-empty object (not `null`, not an array).
|
|
17
|
+
* Mirrors the `ow.object.nonEmpty` predicate the SDK used previously.
|
|
18
|
+
* @internal
|
|
19
|
+
*/
|
|
20
|
+
export function isNonEmptyObject(value) {
|
|
21
|
+
return typeof value === 'object' && value !== null && !Array.isArray(value) && Object.keys(value).length > 0;
|
|
22
|
+
}
|
|
23
|
+
/** Formats a zod issue path like `groups[0]` or `countryCode`. */
|
|
24
|
+
function formatIssuePath(path) {
|
|
25
|
+
let out = '';
|
|
26
|
+
for (const key of path) {
|
|
27
|
+
if (typeof key === 'number')
|
|
28
|
+
out += `[${key}]`;
|
|
29
|
+
else
|
|
30
|
+
out += out ? `.${String(key)}` : String(key);
|
|
31
|
+
}
|
|
32
|
+
return out;
|
|
33
|
+
}
|
|
34
|
+
/** Reads the value at `path` from the validated input, to include in the error. */
|
|
35
|
+
function valueAtPath(root, path) {
|
|
36
|
+
let current = root;
|
|
37
|
+
for (const key of path) {
|
|
38
|
+
if (current === null || typeof current !== 'object')
|
|
39
|
+
return undefined;
|
|
40
|
+
current = current[key];
|
|
41
|
+
}
|
|
42
|
+
return current;
|
|
43
|
+
}
|
|
44
|
+
/** Renders a primitive received value for an error; skips objects/Dates (noisy). */
|
|
45
|
+
function describeReceived(value) {
|
|
46
|
+
switch (typeof value) {
|
|
47
|
+
case 'string':
|
|
48
|
+
return value;
|
|
49
|
+
case 'number':
|
|
50
|
+
case 'boolean':
|
|
51
|
+
case 'bigint':
|
|
52
|
+
return String(value);
|
|
53
|
+
default:
|
|
54
|
+
return undefined;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Formats a `ZodError` as a plain, human-readable message that names the
|
|
59
|
+
* offending field *and* the value it received (e.g. ``must match pattern
|
|
60
|
+
* /^[A-Z]{2}$/ at `countryCode`, got `CZE` ``) — closer to the old `ow` errors
|
|
61
|
+
* than zod's default, which omits the received value.
|
|
62
|
+
*/
|
|
63
|
+
function formatZodError(error, root) {
|
|
64
|
+
return error.issues
|
|
65
|
+
.map((issue) => {
|
|
66
|
+
const location = issue.path.length ? ` at \`${formatIssuePath(issue.path)}\`` : '';
|
|
67
|
+
const received = describeReceived(valueAtPath(root, issue.path));
|
|
68
|
+
const got = received === undefined ? '' : `, got \`${received}\``;
|
|
69
|
+
return `${issue.message}${location}${got}`;
|
|
70
|
+
})
|
|
71
|
+
.join('\n');
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Error thrown when an argument fails validation (e.g. by `Actor.addWebhook()`
|
|
75
|
+
* or the `ProxyConfiguration` constructor).
|
|
76
|
+
*
|
|
77
|
+
* Its `message` is a human-readable sentence naming the offending field and the
|
|
78
|
+
* value it received (see {@link formatZodError}) — not a raw JSON dump. The
|
|
79
|
+
* structured zod {@link https://zod.dev | zod} issues are available on `issues`
|
|
80
|
+
* (and the original `ZodError` on `cause`) for programmatic inspection.
|
|
81
|
+
*/
|
|
82
|
+
export class ArgumentValidationError extends Error {
|
|
83
|
+
/** Structured issues from the underlying schema check. */
|
|
84
|
+
issues;
|
|
85
|
+
constructor(error, value) {
|
|
86
|
+
super(formatZodError(error, value), { cause: error });
|
|
87
|
+
this.name = 'ArgumentValidationError';
|
|
88
|
+
this.issues = error.issues;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Validates `value` against a zod `schema`, returning the parsed value, or
|
|
93
|
+
* throwing an {@link ArgumentValidationError} if it doesn't match.
|
|
94
|
+
* @internal
|
|
95
|
+
*/
|
|
96
|
+
export function validate(schema, value) {
|
|
97
|
+
const result = schema.safeParse(value);
|
|
98
|
+
if (!result.success) {
|
|
99
|
+
throw new ArgumentValidationError(result.error, value);
|
|
100
|
+
}
|
|
101
|
+
return result.data;
|
|
102
|
+
}
|
|
17
103
|
/**
|
|
18
104
|
* Gets info about system, node version and apify package version.
|
|
19
105
|
* @internal
|
|
@@ -55,7 +141,7 @@ export function checkCrawleeVersion() {
|
|
|
55
141
|
}
|
|
56
142
|
let version;
|
|
57
143
|
try {
|
|
58
|
-
version =
|
|
144
|
+
version = JSON.parse(readFileSync(path, 'utf8')).version;
|
|
59
145
|
}
|
|
60
146
|
catch {
|
|
61
147
|
//
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "apify",
|
|
3
|
-
"version": "4.0.0-beta.
|
|
3
|
+
"version": "4.0.0-beta.19",
|
|
4
4
|
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=22.0.0"
|
|
@@ -61,10 +61,7 @@
|
|
|
61
61
|
"@crawlee/core": "^4.0.0-beta.61",
|
|
62
62
|
"@crawlee/types": "^4.0.0-beta.61",
|
|
63
63
|
"@crawlee/utils": "^4.0.0-beta.61",
|
|
64
|
-
"apify-client": "^2.
|
|
65
|
-
"fs-extra": "^11.2.0",
|
|
66
|
-
"got-scraping": "^4.2.1",
|
|
67
|
-
"ow": "^2.0.0",
|
|
64
|
+
"apify-client": "^2.23.4",
|
|
68
65
|
"semver": "^7.5.4",
|
|
69
66
|
"tslib": "^2.6.2",
|
|
70
67
|
"ws": "^8.18.0",
|
|
@@ -77,7 +74,6 @@
|
|
|
77
74
|
"@crawlee/memory-storage": "^4.0.0-beta.61",
|
|
78
75
|
"@playwright/browser-chromium": "^1.60.0",
|
|
79
76
|
"@types/content-type": "^1.1.8",
|
|
80
|
-
"@types/fs-extra": "^11.0.4",
|
|
81
77
|
"@types/node": "^24.0.0",
|
|
82
78
|
"@types/semver": "^7.5.8",
|
|
83
79
|
"@types/tough-cookie": "^4.0.5",
|