apify 4.0.0-beta.18 → 4.0.0-beta.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,12 @@ import type { ProxyConfigurationOptions as CoreProxyConfigurationOptions } from
2
2
  import { ProxyConfiguration as CoreProxyConfiguration } from '@crawlee/core';
3
3
  import type { ProxyInfo as CoreProxyInfo } from '@crawlee/types';
4
4
  import { Configuration } from './configuration.js';
5
+ /** Response of the Apify Proxy status endpoint (`proxy.apify.com/?format=json`). */
6
+ interface ProxyStatus {
7
+ connected: boolean;
8
+ connectionError: string;
9
+ isManInTheMiddle: boolean;
10
+ }
5
11
  type NewUrlOptions = Parameters<CoreProxyConfiguration['newProxyInfo']>[0];
6
12
  export interface ProxyConfigurationOptions extends CoreProxyConfigurationOptions {
7
13
  /**
@@ -198,11 +204,16 @@ export declare class ProxyConfiguration extends CoreProxyConfiguration {
198
204
  /**
199
205
  * Apify Proxy can be down for a second or a minute, but this should not crash processes.
200
206
  */
201
- protected _fetchStatus(): Promise<{
202
- connected: boolean;
203
- connectionError: string;
204
- isManInTheMiddle: boolean;
205
- } | undefined>;
207
+ protected _fetchStatus(): Promise<ProxyStatus | undefined>;
208
+ /**
209
+ * Fetches the Apify Proxy status endpoint once, *through* the proxy, so the
210
+ * response reports on this exact connection (auth + man-in-the-middle).
211
+ *
212
+ * Uses a native `node:http` forward-proxy request — an absolute request URL
213
+ * plus a `Proxy-Authorization` header — so no proxy-agent dependency is
214
+ * needed. The status endpoint (`http://proxy.apify.com`) is plain HTTP.
215
+ */
216
+ protected _requestStatus(statusUrl: string, proxyUrl: string): Promise<ProxyStatus>;
206
217
  /**
207
218
  * Throws cannot combine custom proxies with Apify Proxy
208
219
  * @internal
@@ -1,5 +1,7 @@
1
+ import { once } from 'node:events';
2
+ import { request as httpRequest } from 'node:http';
3
+ import { json } from 'node:stream/consumers';
1
4
  import { ProxyConfiguration as CoreProxyConfiguration } from '@crawlee/core';
2
- import { fetch, ProxyAgent } from 'undici';
3
5
  import { z } from 'zod';
4
6
  import { APIFY_ENV_VARS, APIFY_PROXY_VALUE_REGEX } from '@apify/consts';
5
7
  import { cryptoRandomObjectId } from '@apify/utilities';
@@ -257,37 +259,55 @@ export class ProxyConfiguration extends CoreProxyConfiguration {
257
259
  */
258
260
  async _fetchStatus() {
259
261
  const { proxyStatusUrl } = this.config;
260
- const url = `${proxyStatusUrl}/?format=json`;
261
- // The status endpoint (`proxy.apify.com`) is requested *through* the proxy
262
- // so it can report on this exact connection (auth + man-in-the-middle).
263
- // `undici`'s `fetch` + `ProxyAgent` come from the same package, so the
264
- // dispatcher is recognized (Node's global `fetch` uses a separate internal
265
- // copy of undici that would reject this dispatcher instance).
262
+ const statusUrl = `${proxyStatusUrl}/?format=json`;
266
263
  const proxyUrl = await this.newUrl();
267
264
  // Without a proxy URL we can't perform the (proxied) status check.
268
265
  if (!proxyUrl)
269
266
  return undefined;
270
- const dispatcher = new ProxyAgent(proxyUrl);
271
- try {
272
- for (let attempt = 1; attempt <= CHECK_ACCESS_MAX_ATTEMPTS; attempt++) {
273
- try {
274
- const response = await fetch(url, {
275
- dispatcher,
276
- signal: AbortSignal.timeout(CHECK_ACCESS_REQUEST_TIMEOUT_MILLIS),
277
- });
278
- if (!response.ok)
279
- continue;
280
- return (await response.json());
281
- }
282
- catch {
283
- // retry connection errors
284
- }
267
+ for (let attempt = 1; attempt <= CHECK_ACCESS_MAX_ATTEMPTS; attempt++) {
268
+ try {
269
+ return await this._requestStatus(statusUrl, proxyUrl);
270
+ }
271
+ catch {
272
+ // retry connection errors
285
273
  }
286
- return undefined;
287
274
  }
288
- finally {
289
- await dispatcher.close();
275
+ return undefined;
276
+ }
277
+ /**
278
+ * Fetches the Apify Proxy status endpoint once, *through* the proxy, so the
279
+ * response reports on this exact connection (auth + man-in-the-middle).
280
+ *
281
+ * Uses a native `node:http` forward-proxy request — an absolute request URL
282
+ * plus a `Proxy-Authorization` header — so no proxy-agent dependency is
283
+ * needed. The status endpoint (`http://proxy.apify.com`) is plain HTTP.
284
+ */
285
+ async _requestStatus(statusUrl, proxyUrl) {
286
+ const target = new URL(statusUrl);
287
+ const proxy = new URL(proxyUrl);
288
+ const headers = { host: target.host };
289
+ if (proxy.username) {
290
+ const credentials = `${decodeURIComponent(proxy.username)}:${decodeURIComponent(proxy.password)}`;
291
+ headers['proxy-authorization'] = `Basic ${Buffer.from(credentials).toString('base64')}`;
292
+ }
293
+ const request = httpRequest({
294
+ host: proxy.hostname,
295
+ port: proxy.port,
296
+ // Absolute-form request URI tells the proxy to forward the request.
297
+ path: target.href,
298
+ headers,
299
+ signal: AbortSignal.timeout(CHECK_ACCESS_REQUEST_TIMEOUT_MILLIS),
300
+ });
301
+ request.end();
302
+ // `once` rejects if the request emits `error` first (connection refused,
303
+ // timeout/abort), so failures propagate to the retry loop in `_fetchStatus`.
304
+ const [response] = (await once(request, 'response'));
305
+ const statusCode = response.statusCode ?? 0;
306
+ if (statusCode < 200 || statusCode >= 300) {
307
+ response.resume(); // drain so the socket can be freed
308
+ throw new Error(`Apify Proxy status check responded with status code ${statusCode}.`);
290
309
  }
310
+ return (await json(response));
291
311
  }
292
312
  /**
293
313
  * Throws cannot combine custom proxies with Apify Proxy
package/package.json CHANGED
@@ -1,9 +1,9 @@
1
1
  {
2
2
  "name": "apify",
3
- "version": "4.0.0-beta.18",
3
+ "version": "4.0.0-beta.19",
4
4
  "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
5
5
  "engines": {
6
- "node": ">=22.19.0"
6
+ "node": ">=22.0.0"
7
7
  },
8
8
  "type": "module",
9
9
  "main": "./dist/index.js",
@@ -64,7 +64,6 @@
64
64
  "apify-client": "^2.23.4",
65
65
  "semver": "^7.5.4",
66
66
  "tslib": "^2.6.2",
67
- "undici": "^8.0.0",
68
67
  "ws": "^8.18.0",
69
68
  "zod": "^4.0.0"
70
69
  },