undici 7.15.0 → 7.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +48 -2
  2. package/docs/docs/api/Agent.md +1 -0
  3. package/docs/docs/api/Client.md +1 -0
  4. package/docs/docs/api/DiagnosticsChannel.md +57 -0
  5. package/docs/docs/api/Dispatcher.md +86 -0
  6. package/docs/docs/api/Errors.md +0 -1
  7. package/docs/docs/api/RoundRobinPool.md +145 -0
  8. package/docs/docs/api/WebSocket.md +21 -0
  9. package/docs/docs/best-practices/crawling.md +58 -0
  10. package/index-fetch.js +2 -2
  11. package/index.js +8 -9
  12. package/lib/api/api-request.js +22 -8
  13. package/lib/api/api-upgrade.js +2 -1
  14. package/lib/api/readable.js +7 -5
  15. package/lib/core/connect.js +4 -1
  16. package/lib/core/diagnostics.js +28 -1
  17. package/lib/core/errors.js +217 -13
  18. package/lib/core/request.js +5 -1
  19. package/lib/core/symbols.js +3 -0
  20. package/lib/core/util.js +61 -41
  21. package/lib/dispatcher/agent.js +19 -7
  22. package/lib/dispatcher/balanced-pool.js +10 -0
  23. package/lib/dispatcher/client-h1.js +18 -23
  24. package/lib/dispatcher/client-h2.js +166 -26
  25. package/lib/dispatcher/client.js +64 -59
  26. package/lib/dispatcher/dispatcher-base.js +20 -16
  27. package/lib/dispatcher/env-http-proxy-agent.js +12 -16
  28. package/lib/dispatcher/fixed-queue.js +15 -39
  29. package/lib/dispatcher/h2c-client.js +7 -78
  30. package/lib/dispatcher/pool-base.js +60 -43
  31. package/lib/dispatcher/pool.js +2 -2
  32. package/lib/dispatcher/proxy-agent.js +27 -11
  33. package/lib/dispatcher/round-robin-pool.js +137 -0
  34. package/lib/encoding/index.js +33 -0
  35. package/lib/global.js +19 -1
  36. package/lib/handler/cache-handler.js +84 -27
  37. package/lib/handler/deduplication-handler.js +216 -0
  38. package/lib/handler/retry-handler.js +0 -2
  39. package/lib/interceptor/cache.js +94 -15
  40. package/lib/interceptor/decompress.js +2 -1
  41. package/lib/interceptor/deduplicate.js +109 -0
  42. package/lib/interceptor/dns.js +55 -13
  43. package/lib/mock/mock-agent.js +4 -4
  44. package/lib/mock/mock-errors.js +10 -0
  45. package/lib/mock/mock-utils.js +13 -12
  46. package/lib/mock/snapshot-agent.js +11 -5
  47. package/lib/mock/snapshot-recorder.js +12 -4
  48. package/lib/mock/snapshot-utils.js +4 -4
  49. package/lib/util/cache.js +29 -1
  50. package/lib/util/date.js +534 -140
  51. package/lib/util/runtime-features.js +124 -0
  52. package/lib/web/cookies/index.js +1 -1
  53. package/lib/web/cookies/parse.js +1 -1
  54. package/lib/web/eventsource/eventsource-stream.js +2 -2
  55. package/lib/web/eventsource/eventsource.js +34 -29
  56. package/lib/web/eventsource/util.js +1 -9
  57. package/lib/web/fetch/body.js +45 -61
  58. package/lib/web/fetch/data-url.js +12 -160
  59. package/lib/web/fetch/formdata-parser.js +204 -127
  60. package/lib/web/fetch/index.js +21 -19
  61. package/lib/web/fetch/request.js +6 -0
  62. package/lib/web/fetch/response.js +4 -7
  63. package/lib/web/fetch/util.js +10 -79
  64. package/lib/web/infra/index.js +229 -0
  65. package/lib/web/subresource-integrity/subresource-integrity.js +6 -5
  66. package/lib/web/webidl/index.js +207 -44
  67. package/lib/web/websocket/connection.js +33 -22
  68. package/lib/web/websocket/events.js +1 -1
  69. package/lib/web/websocket/frame.js +9 -15
  70. package/lib/web/websocket/stream/websocketerror.js +22 -1
  71. package/lib/web/websocket/stream/websocketstream.js +17 -8
  72. package/lib/web/websocket/util.js +2 -1
  73. package/lib/web/websocket/websocket.js +32 -42
  74. package/package.json +9 -7
  75. package/types/agent.d.ts +2 -1
  76. package/types/api.d.ts +2 -2
  77. package/types/balanced-pool.d.ts +2 -1
  78. package/types/cache-interceptor.d.ts +1 -0
  79. package/types/client.d.ts +1 -1
  80. package/types/connector.d.ts +2 -2
  81. package/types/diagnostics-channel.d.ts +2 -2
  82. package/types/dispatcher.d.ts +12 -12
  83. package/types/errors.d.ts +5 -15
  84. package/types/fetch.d.ts +4 -4
  85. package/types/formdata.d.ts +1 -1
  86. package/types/h2c-client.d.ts +1 -1
  87. package/types/index.d.ts +9 -1
  88. package/types/interceptors.d.ts +36 -2
  89. package/types/pool.d.ts +1 -1
  90. package/types/readable.d.ts +2 -2
  91. package/types/round-robin-pool.d.ts +41 -0
  92. package/types/webidl.d.ts +82 -21
  93. package/types/websocket.d.ts +9 -9
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # undici
2
2
 
3
- [![Node CI](https://github.com/nodejs/undici/actions/workflows/nodejs.yml/badge.svg)](https://github.com/nodejs/undici/actions/workflows/nodejs.yml) [![neostandard javascript style](https://img.shields.io/badge/neo-standard-7fffff?style=flat\&labelColor=ff80ff)](https://github.com/neostandard/neostandard) [![npm version](https://badge.fury.io/js/undici.svg)](https://badge.fury.io/js/undici) [![codecov](https://codecov.io/gh/nodejs/undici/branch/main/graph/badge.svg?token=yZL6LtXkOA)](https://codecov.io/gh/nodejs/undici)
3
+ [![Node CI](https://github.com/nodejs/undici/actions/workflows/ci.yml/badge.svg)](https://github.com/nodejs/undici/actions/workflows/nodejs.yml) [![neostandard javascript style](https://img.shields.io/badge/neo-standard-7fffff?style=flat\&labelColor=ff80ff)](https://github.com/neostandard/neostandard) [![npm version](https://badge.fury.io/js/undici.svg)](https://badge.fury.io/js/undici) [![codecov](https://codecov.io/gh/nodejs/undici/branch/main/graph/badge.svg?token=yZL6LtXkOA)](https://codecov.io/gh/nodejs/undici)
4
4
 
5
5
  An HTTP/1.1 client, written from scratch for Node.js.
6
6
 
@@ -166,6 +166,8 @@ Installing undici as a module allows you to use a newer version than what's bund
166
166
 
167
167
  ## Quick Start
168
168
 
169
+ ### Basic Request
170
+
169
171
  ```js
170
172
  import { request } from 'undici'
171
173
 
@@ -184,6 +186,50 @@ for await (const data of body) { console.log('data', data) }
184
186
  console.log('trailers', trailers)
185
187
  ```
186
188
 
189
+ ### Using Cache Interceptor
190
+
191
+ Undici provides a powerful HTTP caching interceptor that follows HTTP caching best practices. Here's how to use it:
192
+
193
+ ```js
194
+ import { fetch, Agent, interceptors, cacheStores } from 'undici';
195
+
196
+ // Create a client with cache interceptor
197
+ const client = new Agent().compose(interceptors.cache({
198
+ // Optional: Configure cache store (defaults to MemoryCacheStore)
199
+ store: new cacheStores.MemoryCacheStore({
200
+ maxSize: 100 * 1024 * 1024, // 100MB
201
+ maxCount: 1000,
202
+ maxEntrySize: 5 * 1024 * 1024 // 5MB
203
+ }),
204
+
205
+ // Optional: Specify which HTTP methods to cache (default: ['GET', 'HEAD'])
206
+ methods: ['GET', 'HEAD']
207
+ }));
208
+
209
+ // Set the global dispatcher to use our caching client
210
+ setGlobalDispatcher(client);
211
+
212
+ // Now all fetch requests will use the cache
213
+ async function getData() {
214
+ const response = await fetch('https://api.example.com/data');
215
+ // The server should set appropriate Cache-Control headers in the response
216
+ // which the cache will respect based on the cache policy
217
+ return response.json();
218
+ }
219
+
220
+ // First request - fetches from origin
221
+ const data1 = await getData();
222
+
223
+ // Second request - served from cache if within max-age
224
+ const data2 = await getData();
225
+ ```
226
+
227
+ #### Key Features:
228
+ - **Automatic Caching**: Respects `Cache-Control` and `Expires` headers
229
+ - **Validation**: Supports `ETag` and `Last-Modified` validation
230
+ - **Storage Options**: In-memory or persistent SQLite storage
231
+ - **Flexible**: Configure cache size, TTL, and more
232
+
187
233
  ## Global Installation
188
234
 
189
235
  Undici provides an `install()` function to add all WHATWG fetch classes to `globalThis`, making them available globally:
@@ -472,7 +518,7 @@ Note that consuming the response body is _mandatory_ for `request`:
472
518
  ```js
473
519
  // Do
474
520
  const { body, headers } = await request(url);
475
- await res.body.dump(); // force consumption of body
521
+ await body.dump(); // force consumption of body
476
522
 
477
523
  // Do not
478
524
  const { headers } = await request(url);
@@ -19,6 +19,7 @@ Returns: `Agent`
19
19
  Extends: [`PoolOptions`](/docs/docs/api/Pool.md#parameter-pooloptions)
20
20
 
21
21
  * **factory** `(origin: URL, opts: Object) => Dispatcher` - Default: `(origin, opts) => new Pool(origin, opts)`
22
+ * **maxOrigins** `number` (optional) - Default: `Infinity` - Limits the total number of origins that can receive requests at a time, throwing an `MaxOriginsReachedError` error when attempting to dispatch when the max is reached. If `Infinity`, no limit is enforced.
22
23
 
23
24
  ## Instance Properties
24
25
 
@@ -30,6 +30,7 @@ Returns: `Client`
30
30
  * **autoSelectFamily**: `boolean` (optional) - Default: depends on local Node version, on Node 18.13.0 and above is `false`. Enables a family autodetection algorithm that loosely implements section 5 of [RFC 8305](https://tools.ietf.org/html/rfc8305#section-5). See [here](https://nodejs.org/api/net.html#socketconnectoptions-connectlistener) for more details. This option is ignored if not supported by the current Node version.
31
31
  * **autoSelectFamilyAttemptTimeout**: `number` - Default: depends on local Node version, on Node 18.13.0 and above is `250`. The amount of time in milliseconds to wait for a connection attempt to finish before trying the next address when using the `autoSelectFamily` option. See [here](https://nodejs.org/api/net.html#socketconnectoptions-connectlistener) for more details.
32
32
  * **allowH2**: `boolean` - Default: `false`. Enables support for H2 if the server has assigned bigger priority to it through ALPN negotiation.
33
+ * **useH2c**: `boolean` - Default: `false`. Enforces h2c for non-https connections.
33
34
  * **maxConcurrentStreams**: `number` - Default: `100`. Dictates the maximum number of concurrent streams for a single H2 session. It can be overridden by a SETTINGS remote frame.
34
35
 
35
36
  > **Notes about HTTP/2**
@@ -254,3 +254,60 @@ diagnosticsChannel.channel('undici:websocket:pong').subscribe(({ payload, websoc
254
254
  console.log(websocket) // the WebSocket instance
255
255
  })
256
256
  ```
257
+
258
+ ## `undici:proxy:connected`
259
+
260
+ This message is published after the `ProxyAgent` establishes a connection to the proxy server.
261
+
262
+ ```js
263
+ import diagnosticsChannel from 'diagnostics_channel'
264
+
265
+ diagnosticsChannel.channel('undici:proxy:connected').subscribe(({ socket, connectParams }) => {
266
+ console.log(socket)
267
+ console.log(connectParams)
268
+ // const { origin, port, path, signal, headers, servername } = connectParams
269
+ })
270
+ ```
271
+
272
+ ## `undici:request:pending-requests`
273
+
274
+ This message is published when the deduplicate interceptor's pending request map changes. This is useful for monitoring and debugging request deduplication behavior.
275
+
276
+ The deduplicate interceptor automatically deduplicates concurrent requests for the same resource. When multiple identical requests are made while one is already in-flight, only one request is sent to the origin server, and all waiting handlers receive the same response.
277
+
278
+ ```js
279
+ import diagnosticsChannel from 'diagnostics_channel'
280
+
281
+ diagnosticsChannel.channel('undici:request:pending-requests').subscribe(({ type, size, key }) => {
282
+ console.log(type) // 'added' or 'removed'
283
+ console.log(size) // current number of pending requests
284
+ console.log(key) // the deduplication key for this request
285
+ })
286
+ ```
287
+
288
+ ### Event Properties
289
+
290
+ - `type` (`string`): Either `'added'` when a new pending request is registered, or `'removed'` when a pending request completes (successfully or with an error).
291
+ - `size` (`number`): The current number of pending requests after the change.
292
+ - `key` (`string`): The deduplication key for the request, composed of the origin, method, path, and request headers.
293
+
294
+ ### Example: Monitoring Request Deduplication
295
+
296
+ ```js
297
+ import diagnosticsChannel from 'diagnostics_channel'
298
+
299
+ const channel = diagnosticsChannel.channel('undici:request:pending-requests')
300
+
301
+ channel.subscribe(({ type, size, key }) => {
302
+ if (type === 'added') {
303
+ console.log(`New pending request: ${key} (${size} total pending)`)
304
+ } else {
305
+ console.log(`Request completed: ${key} (${size} remaining)`)
306
+ }
307
+ })
308
+ ```
309
+
310
+ This can be useful for:
311
+ - Verifying that request deduplication is working as expected
312
+ - Monitoring the number of concurrent in-flight requests
313
+ - Debugging deduplication behavior in production environments
@@ -1043,6 +1043,7 @@ The `dns` interceptor enables you to cache DNS lookups for a given duration, per
1043
1043
  - The function should return a single record from the records array.
1044
1044
  - By default a simplified version of Round Robin is used.
1045
1045
  - The `records` property can be mutated to store the state of the balancing algorithm.
1046
+ - `storage: DNSStorage` - Custom storage for resolved DNS records
1046
1047
 
1047
1048
  > The `Dispatcher#options` also gets extended with the options `dns.affinity`, `dns.dualStack`, `dns.lookup` and `dns.pick` which can be used to configure the interceptor at a request-per-request basis.
1048
1049
 
@@ -1057,6 +1058,14 @@ It represents a map of DNS IP addresses records for a single origin.
1057
1058
  - `4.ips` - (`DNSInterceptorRecord[] | null`) The IPv4 addresses.
1058
1059
  - `6.ips` - (`DNSInterceptorRecord[] | null`) The IPv6 addresses.
1059
1060
 
1061
+ **DNSStorage**
1062
+ It represents a storage object for resolved DNS records.
1063
+ - `size` - (`number`) current size of the storage.
1064
+ - `get` - (`(origin: string) => DNSInterceptorOriginRecords | null`) method to get the records for a given origin.
1065
+ - `set` - (`(origin: string, records: DNSInterceptorOriginRecords | null, options: { ttl: number }) => void`) method to set the records for a given origin.
1066
+ - `delete` - (`(origin: string) => void`) method to delete records for a given origin.
1067
+ - `full` - (`() => boolean`) method to check if the storage is full, if returns `true`, DNS lookup will be skipped in this interceptor and new records will not be stored.
1068
+
1060
1069
  **Example - Basic DNS Interceptor**
1061
1070
 
1062
1071
  ```js
@@ -1073,6 +1082,45 @@ const response = await client.request({
1073
1082
  })
1074
1083
  ```
1075
1084
 
1085
+ **Example - DNS Interceptor and LRU cache as a storage**
1086
+
1087
+ ```js
1088
+ const { Client, interceptors } = require("undici");
1089
+ const QuickLRU = require("quick-lru");
1090
+ const { dns } = interceptors;
1091
+
1092
+ const lru = new QuickLRU({ maxSize: 100 });
1093
+
1094
+ const lruAdapter = {
1095
+ get size() {
1096
+ return lru.size;
1097
+ },
1098
+ get(origin) {
1099
+ return lru.get(origin);
1100
+ },
1101
+ set(origin, records, { ttl }) {
1102
+ lru.set(origin, records, { maxAge: ttl });
1103
+ },
1104
+ delete(origin) {
1105
+ lru.delete(origin);
1106
+ },
1107
+ full() {
1108
+ // For LRU cache, we can always store new records,
1109
+ // old records will be evicted automatically
1110
+ return false;
1111
+ }
1112
+ }
1113
+
1114
+ const client = new Agent().compose([
1115
+ dns({ storage: lruAdapter })
1116
+ ])
1117
+
1118
+ const response = await client.request({
1119
+ origin: `http://localhost:3030`,
1120
+ ...requestOpts
1121
+ })
1122
+ ```
1123
+
1076
1124
  ##### `responseError`
1077
1125
 
1078
1126
  The `responseError` interceptor throws an error for responses with status code errors (>= 400).
@@ -1165,6 +1213,44 @@ The `cache` interceptor implements client-side response caching as described in
1165
1213
  - `cacheByDefault` - The default expiration time to cache responses by if they don't have an explicit expiration and cannot have an heuristic expiry computed. If this isn't present, responses neither with an explicit expiration nor heuristically cacheable will not be cached. Default `undefined`.
1166
1214
  - `type` - The [type of cache](https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/Caching#types_of_caches) for Undici to act as. Can be `shared` or `private`. Default `shared`. `private` implies privately cacheable responses will be cached and potentially shared with other users of your application.
1167
1215
 
1216
+ ##### `Deduplicate Interceptor`
1217
+
1218
+ The `deduplicate` interceptor deduplicates concurrent identical requests. When multiple identical requests are made while one is already in-flight, only one request is sent to the origin server, and all waiting handlers receive the same response. This reduces server load and improves performance.
1219
+
1220
+ **Options**
1221
+
1222
+ - `methods` - The [**safe** HTTP methods](https://www.rfc-editor.org/rfc/rfc9110#section-9.2.1) to deduplicate. Default `['GET']`.
1223
+ - `skipHeaderNames` - Header names that, if present in a request, will cause the request to skip deduplication entirely. Useful for headers like `idempotency-key` where presence indicates unique processing. Header name matching is case-insensitive. Default `[]`.
1224
+ - `excludeHeaderNames` - Header names to exclude from the deduplication key. Requests with different values for these headers will still be deduplicated together. Useful for headers like `x-request-id` that vary per request but shouldn't affect deduplication. Header name matching is case-insensitive. Default `[]`.
1225
+
1226
+ **Usage**
1227
+
1228
+ ```js
1229
+ const { Client, interceptors } = require("undici");
1230
+ const { deduplicate, cache } = interceptors;
1231
+
1232
+ // Deduplicate only
1233
+ const client = new Client("http://example.com").compose(
1234
+ deduplicate()
1235
+ );
1236
+
1237
+ // Deduplicate with caching
1238
+ const clientWithCache = new Client("http://example.com").compose(
1239
+ deduplicate(),
1240
+ cache()
1241
+ );
1242
+ ```
1243
+
1244
+ Requests are considered identical if they have the same:
1245
+ - Origin
1246
+ - HTTP method
1247
+ - Path
1248
+ - Request headers (excluding any headers specified in `excludeHeaderNames`)
1249
+
1250
+ All deduplicated requests receive the complete response including status code, headers, and body.
1251
+
1252
+ For observability, request deduplication events are published to the `undici:request:pending-requests` [diagnostic channel](/docs/docs/api/DiagnosticsChannel.md#undicirequestpending-requests).
1253
+
1168
1254
  ## Instance Events
1169
1255
 
1170
1256
  ### Event: `'connect'`
@@ -14,7 +14,6 @@ import { errors } from 'undici'
14
14
  | `HeadersTimeoutError` | `UND_ERR_HEADERS_TIMEOUT` | socket is destroyed due to headers timeout. |
15
15
  | `HeadersOverflowError` | `UND_ERR_HEADERS_OVERFLOW` | socket is destroyed due to headers' max size being exceeded. |
16
16
  | `BodyTimeoutError` | `UND_ERR_BODY_TIMEOUT` | socket is destroyed due to body timeout. |
17
- | `ResponseStatusCodeError` | `UND_ERR_RESPONSE_STATUS_CODE` | an error is thrown when `throwOnError` is `true` for status codes >= 400. |
18
17
  | `InvalidArgumentError` | `UND_ERR_INVALID_ARG` | passed an invalid argument. |
19
18
  | `InvalidReturnValueError` | `UND_ERR_INVALID_RETURN_VALUE` | returned an invalid value. |
20
19
  | `RequestAbortedError` | `UND_ERR_ABORTED` | the request has been aborted by the user |
@@ -0,0 +1,145 @@
1
+ # Class: RoundRobinPool
2
+
3
+ Extends: `undici.Dispatcher`
4
+
5
+ A pool of [Client](/docs/docs/api/Client.md) instances connected to the same upstream target with round-robin client selection.
6
+
7
+ Unlike [`Pool`](/docs/docs/api/Pool.md), which always selects the first available client, `RoundRobinPool` cycles through clients in a round-robin fashion. This ensures even distribution of requests across all connections, which is particularly useful when the upstream target is behind a load balancer that round-robins TCP connections across multiple backend servers (e.g., Kubernetes Services).
8
+
9
+ Requests are not guaranteed to be dispatched in order of invocation.
10
+
11
+ ## `new RoundRobinPool(url[, options])`
12
+
13
+ Arguments:
14
+
15
+ * **url** `URL | string` - It should only include the **protocol, hostname, and port**.
16
+ * **options** `RoundRobinPoolOptions` (optional)
17
+
18
+ ### Parameter: `RoundRobinPoolOptions`
19
+
20
+ Extends: [`ClientOptions`](/docs/docs/api/Client.md#parameter-clientoptions)
21
+
22
+ * **factory** `(origin: URL, opts: Object) => Dispatcher` - Default: `(origin, opts) => new Client(origin, opts)`
23
+ * **connections** `number | null` (optional) - Default: `null` - The number of `Client` instances to create. When set to `null`, the `RoundRobinPool` instance will create an unlimited amount of `Client` instances.
24
+ * **clientTtl** `number | null` (optional) - Default: `null` - The amount of time before a `Client` instance is removed from the `RoundRobinPool` and closed. When set to `null`, `Client` instances will not be removed or closed based on age.
25
+
26
+ ## Use Case
27
+
28
+ `RoundRobinPool` is designed for scenarios where:
29
+
30
+ 1. You connect to a single origin (e.g., `http://my-service.namespace.svc`)
31
+ 2. That origin is backed by a load balancer distributing TCP connections across multiple servers
32
+ 3. You want requests evenly distributed across all backend servers
33
+
34
+ **Example**: In Kubernetes, when using a Service DNS name with multiple Pod replicas, kube-proxy load balances TCP connections. `RoundRobinPool` ensures each connection (and thus each Pod) receives an equal share of requests.
35
+
36
+ ### Important: Backend Distribution Considerations
37
+
38
+ `RoundRobinPool` distributes **HTTP requests** evenly across **TCP connections**. Whether this translates to even backend server distribution depends on the load balancer's behavior:
39
+
40
+ **✓ Works when the load balancer**:
41
+ - Assigns different backends to different TCP connections from the same client
42
+ - Uses algorithms like: round-robin, random, least-connections (without client affinity)
43
+ - Example: Default Kubernetes Services without `sessionAffinity`
44
+
45
+ **✗ Does NOT work when**:
46
+ - Load balancer has client/source IP affinity (all connections from one IP → same backend)
47
+ - Load balancer uses source-IP-hash or sticky sessions
48
+
49
+ **How it works:**
50
+ 1. `RoundRobinPool` creates N TCP connections to the load balancer endpoint
51
+ 2. Load balancer assigns each TCP connection to a backend (per its algorithm)
52
+ 3. `RoundRobinPool` cycles HTTP requests across those N connections
53
+ 4. Result: Requests distributed proportionally to how the LB distributed the connections
54
+
55
+ If the load balancer assigns all connections to the same backend (e.g., due to session affinity), `RoundRobinPool` cannot overcome this. In such cases, consider using [`BalancedPool`](/docs/docs/api/BalancedPool.md) with direct backend addresses (e.g., individual pod IPs) instead of a load-balanced endpoint.
56
+
57
+ ## Instance Properties
58
+
59
+ ### `RoundRobinPool.closed`
60
+
61
+ Implements [Client.closed](/docs/docs/api/Client.md#clientclosed)
62
+
63
+ ### `RoundRobinPool.destroyed`
64
+
65
+ Implements [Client.destroyed](/docs/docs/api/Client.md#clientdestroyed)
66
+
67
+ ### `RoundRobinPool.stats`
68
+
69
+ Returns [`PoolStats`](PoolStats.md) instance for this pool.
70
+
71
+ ## Instance Methods
72
+
73
+ ### `RoundRobinPool.close([callback])`
74
+
75
+ Implements [`Dispatcher.close([callback])`](/docs/docs/api/Dispatcher.md#dispatcherclosecallback-promise).
76
+
77
+ ### `RoundRobinPool.destroy([error, callback])`
78
+
79
+ Implements [`Dispatcher.destroy([error, callback])`](/docs/docs/api/Dispatcher.md#dispatcherdestroyerror-callback-promise).
80
+
81
+ ### `RoundRobinPool.connect(options[, callback])`
82
+
83
+ See [`Dispatcher.connect(options[, callback])`](/docs/docs/api/Dispatcher.md#dispatcherconnectoptions-callback).
84
+
85
+ ### `RoundRobinPool.dispatch(options, handler)`
86
+
87
+ Implements [`Dispatcher.dispatch(options, handler)`](/docs/docs/api/Dispatcher.md#dispatcherdispatchoptions-handler).
88
+
89
+ ### `RoundRobinPool.pipeline(options, handler)`
90
+
91
+ See [`Dispatcher.pipeline(options, handler)`](/docs/docs/api/Dispatcher.md#dispatcherpipelineoptions-handler).
92
+
93
+ ### `RoundRobinPool.request(options[, callback])`
94
+
95
+ See [`Dispatcher.request(options [, callback])`](/docs/docs/api/Dispatcher.md#dispatcherrequestoptions-callback).
96
+
97
+ ### `RoundRobinPool.stream(options, factory[, callback])`
98
+
99
+ See [`Dispatcher.stream(options, factory[, callback])`](/docs/docs/api/Dispatcher.md#dispatcherstreamoptions-factory-callback).
100
+
101
+ ### `RoundRobinPool.upgrade(options[, callback])`
102
+
103
+ See [`Dispatcher.upgrade(options[, callback])`](/docs/docs/api/Dispatcher.md#dispatcherupgradeoptions-callback).
104
+
105
+ ## Instance Events
106
+
107
+ ### Event: `'connect'`
108
+
109
+ See [Dispatcher Event: `'connect'`](/docs/docs/api/Dispatcher.md#event-connect).
110
+
111
+ ### Event: `'disconnect'`
112
+
113
+ See [Dispatcher Event: `'disconnect'`](/docs/docs/api/Dispatcher.md#event-disconnect).
114
+
115
+ ### Event: `'drain'`
116
+
117
+ See [Dispatcher Event: `'drain'`](/docs/docs/api/Dispatcher.md#event-drain).
118
+
119
+ ## Example
120
+
121
+ ```javascript
122
+ import { RoundRobinPool } from 'undici'
123
+
124
+ const pool = new RoundRobinPool('http://my-service.default.svc.cluster.local', {
125
+ connections: 10
126
+ })
127
+
128
+ // Requests will be distributed evenly across all 10 connections
129
+ for (let i = 0; i < 100; i++) {
130
+ const { body } = await pool.request({
131
+ path: '/api/data',
132
+ method: 'GET'
133
+ })
134
+ console.log(await body.json())
135
+ }
136
+
137
+ await pool.close()
138
+ ```
139
+
140
+ ## See Also
141
+
142
+ - [Pool](/docs/docs/api/Pool.md) - Connection pool without round-robin
143
+ - [BalancedPool](/docs/docs/api/BalancedPool.md) - Load balancing across multiple origins
144
+ - [Issue #3648](https://github.com/nodejs/undici/issues/3648) - Original issue describing uneven distribution
145
+
@@ -34,6 +34,27 @@ import { WebSocket } from 'undici'
34
34
  const ws = new WebSocket('wss://echo.websocket.events', ['echo', 'chat'])
35
35
  ```
36
36
 
37
+ ### Example with HTTP/2:
38
+
39
+ > ⚠️ Warning: WebSocket over HTTP/2 is experimental, it is likely to change in the future.
40
+
41
+ > 🗒️ Note: WebSocket over HTTP/2 may be enabled by default in a future version,
42
+ > this will happen by enabling HTTP/2 connections as the default behavior of Undici's Agent as well the global dispatcher.
43
+ > Stay tuned to the changelog for more information.
44
+
45
+ This example will not work in browsers or other platforms that don't allow passing an object.
46
+
47
+ ```mjs
48
+ import { Agent } from 'undici'
49
+
50
+ const agent = new Agent({ allowH2: true })
51
+
52
+ const ws = new WebSocket('wss://echo.websocket.events', {
53
+ dispatcher: agent,
54
+ protocols: ['echo', 'chat']
55
+ })
56
+ ```
57
+
37
58
  # Class: WebSocketStream
38
59
 
39
60
  > ⚠️ Warning: the WebSocketStream API has not been finalized and is likely to change.
@@ -0,0 +1,58 @@
1
+ # Crawling
2
+
3
+ [RFC 9309](https://datatracker.ietf.org/doc/html/rfc9309) defines crawlers as automated clients.
4
+
5
+ Some web servers may reject requests that omit the `User-Agent` header or that use common defaults such as `'curl/7.79.1'`.
6
+
7
+ In **undici**, the default user agent is `'undici'`. Since undici is integrated into Node.js core as the implementation of `fetch()`, requests made via `fetch()` use `'node'` as the default user agent.
8
+
9
+ It is recommended to specify a **custom `User-Agent` header** when implementing crawlers. Providing a descriptive user agent allows servers to correctly identify the client and reduces the likelihood of requests being denied.
10
+
11
+ A user agent string should include sufficient detail to identify the crawler and provide contact information. For example:
12
+
13
+ ```
14
+ AcmeCo Crawler - acme.co - contact@acme.co
15
+ ```
16
+
17
+ When adding contact details, avoid using personal identifiers such as your own name or a private email address—especially in a professional or employment context. Instead, use a role-based or organizational contact (e.g., crawler-team@company.com) to protect individual privacy while still enabling communication.
18
+
19
+ If a crawler behaves unexpectedly—for example, due to misconfiguration or implementation errors—server administrators can use the information in the user agent to contact the operator and coordinate an appropriate resolution.
20
+
21
+ The `User-Agent` header can be set on individual requests or applied globally by configuring a custom dispatcher.
22
+
23
+ **Example: setting a `User-Agent` per request**
24
+
25
+ ```js
26
+ import { fetch } from 'undici'
27
+
28
+ const headers = {
29
+ 'User-Agent': 'AcmeCo Crawler - acme.co - contact@acme.co'
30
+ }
31
+
32
+ const res = await fetch('https://example.com', { headers })
33
+ ```
34
+
35
+ ## Best Practices for Crawlers
36
+
37
+ When developing a crawler, the following practices are recommended in addition to setting a descriptive `User-Agent` header:
38
+
39
+ * **Respect `robots.txt`**
40
+ Follow the directives defined in the target site’s `robots.txt` file, including disallowed paths and optional crawl-delay settings (see [W3C guidelines](https://www.w3.org/wiki/Write_Web_Crawler)).
41
+
42
+ * **Rate limiting**
43
+ Regulate request frequency to avoid imposing excessive load on servers. Introduce delays between requests or limit the number of concurrent requests. The W3C suggests at least one second between requests.
44
+
45
+ * **Error handling**
46
+ Implement retry logic with exponential backoff for transient failures, and stop requests when persistent errors occur (e.g., HTTP 403 or 429).
47
+
48
+ * **Monitoring and logging**
49
+ Track request volume, response codes, and error rates to detect misbehavior and address issues proactively.
50
+
51
+ * **Contact information**
52
+ Always include valid and current contact details in the `User-Agent` string so that administrators can reach the crawler operator if necessary.
53
+
54
+ ## References and Further Reading
55
+
56
+ * [RFC 9309: The Robots Exclusion Protocol](https://datatracker.ietf.org/doc/html/rfc9309)
57
+ * [W3C Wiki: Write Web Crawler](https://www.w3.org/wiki/Write_Web_Crawler)
58
+ * [Ethical Web Crawling (WWW 2010 Conference Paper)](https://archives.iw3c2.org/www2010/proceedings/www/p1101.pdf)
package/index-fetch.js CHANGED
@@ -4,8 +4,8 @@ const { getGlobalDispatcher, setGlobalDispatcher } = require('./lib/global')
4
4
  const EnvHttpProxyAgent = require('./lib/dispatcher/env-http-proxy-agent')
5
5
  const fetchImpl = require('./lib/web/fetch').fetch
6
6
 
7
- module.exports.fetch = function fetch (resource, init = undefined) {
8
- return fetchImpl(resource, init).catch((err) => {
7
+ module.exports.fetch = function fetch (init, options = undefined) {
8
+ return fetchImpl(init, options).catch(err => {
9
9
  if (err && typeof err === 'object') {
10
10
  Error.captureStackTrace(err)
11
11
  }
package/index.js CHANGED
@@ -4,6 +4,7 @@ const Client = require('./lib/dispatcher/client')
4
4
  const Dispatcher = require('./lib/dispatcher/dispatcher')
5
5
  const Pool = require('./lib/dispatcher/pool')
6
6
  const BalancedPool = require('./lib/dispatcher/balanced-pool')
7
+ const RoundRobinPool = require('./lib/dispatcher/round-robin-pool')
7
8
  const Agent = require('./lib/dispatcher/agent')
8
9
  const ProxyAgent = require('./lib/dispatcher/proxy-agent')
9
10
  const EnvHttpProxyAgent = require('./lib/dispatcher/env-http-proxy-agent')
@@ -31,6 +32,7 @@ module.exports.Dispatcher = Dispatcher
31
32
  module.exports.Client = Client
32
33
  module.exports.Pool = Pool
33
34
  module.exports.BalancedPool = BalancedPool
35
+ module.exports.RoundRobinPool = RoundRobinPool
34
36
  module.exports.Agent = Agent
35
37
  module.exports.ProxyAgent = ProxyAgent
36
38
  module.exports.EnvHttpProxyAgent = EnvHttpProxyAgent
@@ -47,7 +49,8 @@ module.exports.interceptors = {
47
49
  dump: require('./lib/interceptor/dump'),
48
50
  dns: require('./lib/interceptor/dns'),
49
51
  cache: require('./lib/interceptor/cache'),
50
- decompress: require('./lib/interceptor/decompress')
52
+ decompress: require('./lib/interceptor/decompress'),
53
+ deduplicate: require('./lib/interceptor/deduplicate')
51
54
  }
52
55
 
53
56
  module.exports.cacheStores = {
@@ -117,16 +120,14 @@ module.exports.setGlobalDispatcher = setGlobalDispatcher
117
120
  module.exports.getGlobalDispatcher = getGlobalDispatcher
118
121
 
119
122
  const fetchImpl = require('./lib/web/fetch').fetch
120
- module.exports.fetch = async function fetch (init, options = undefined) {
121
- try {
122
- return await fetchImpl(init, options)
123
- } catch (err) {
123
+
124
+ module.exports.fetch = function fetch (init, options = undefined) {
125
+ return fetchImpl(init, options).catch(err => {
124
126
  if (err && typeof err === 'object') {
125
127
  Error.captureStackTrace(err)
126
128
  }
127
-
128
129
  throw err
129
- }
130
+ })
130
131
  }
131
132
  module.exports.Headers = require('./lib/web/fetch/headers').Headers
132
133
  module.exports.Response = require('./lib/web/fetch/response').Response
@@ -141,8 +142,6 @@ module.exports.getGlobalOrigin = getGlobalOrigin
141
142
  const { CacheStorage } = require('./lib/web/cache/cachestorage')
142
143
  const { kConstruct } = require('./lib/core/symbols')
143
144
 
144
- // Cache & CacheStorage are tightly coupled with fetch. Even if it may run
145
- // in an older version of Node, it doesn't have any use without fetch.
146
145
  module.exports.caches = new CacheStorage(kConstruct)
147
146
 
148
147
  const { deleteCookie, getCookies, getSetCookies, setCookie, parseCookie } = require('./lib/web/cookies')
@@ -118,14 +118,28 @@ class RequestHandler extends AsyncResource {
118
118
  this.callback = null
119
119
  this.res = res
120
120
  if (callback !== null) {
121
- this.runInAsyncScope(callback, null, null, {
122
- statusCode,
123
- headers,
124
- trailers: this.trailers,
125
- opaque,
126
- body: res,
127
- context
128
- })
121
+ try {
122
+ this.runInAsyncScope(callback, null, null, {
123
+ statusCode,
124
+ headers,
125
+ trailers: this.trailers,
126
+ opaque,
127
+ body: res,
128
+ context
129
+ })
130
+ } catch (err) {
131
+ // If the callback throws synchronously, we need to handle it
132
+ // Remove reference to res to allow res being garbage collected
133
+ this.res = null
134
+
135
+ // Destroy the response stream
136
+ util.destroy(res.on('error', noop), err)
137
+
138
+ // Use queueMicrotask to re-throw the error so it reaches uncaughtException
139
+ queueMicrotask(() => {
140
+ throw err
141
+ })
142
+ }
129
143
  }
130
144
  }
131
145
 
@@ -4,6 +4,7 @@ const { InvalidArgumentError, SocketError } = require('../core/errors')
4
4
  const { AsyncResource } = require('node:async_hooks')
5
5
  const assert = require('node:assert')
6
6
  const util = require('../core/util')
7
+ const { kHTTP2Stream } = require('../core/symbols')
7
8
  const { addSignal, removeSignal } = require('./abort-signal')
8
9
 
9
10
  class UpgradeHandler extends AsyncResource {
@@ -50,7 +51,7 @@ class UpgradeHandler extends AsyncResource {
50
51
  }
51
52
 
52
53
  onUpgrade (statusCode, rawHeaders, socket) {
53
- assert(statusCode === 101)
54
+ assert(socket[kHTTP2Stream] === true ? statusCode === 200 : statusCode === 101)
54
55
 
55
56
  const { callback, opaque, context } = this
56
57
 
@@ -262,24 +262,26 @@ class BodyReadable extends Readable {
262
262
  * @param {AbortSignal} [opts.signal] An AbortSignal to cancel the dump.
263
263
  * @returns {Promise<null>}
264
264
  */
265
- async dump (opts) {
265
+ dump (opts) {
266
266
  const signal = opts?.signal
267
267
 
268
268
  if (signal != null && (typeof signal !== 'object' || !('aborted' in signal))) {
269
- throw new InvalidArgumentError('signal must be an AbortSignal')
269
+ return Promise.reject(new InvalidArgumentError('signal must be an AbortSignal'))
270
270
  }
271
271
 
272
272
  const limit = opts?.limit && Number.isFinite(opts.limit)
273
273
  ? opts.limit
274
274
  : 128 * 1024
275
275
 
276
- signal?.throwIfAborted()
276
+ if (signal?.aborted) {
277
+ return Promise.reject(signal.reason ?? new AbortError())
278
+ }
277
279
 
278
280
  if (this._readableState.closeEmitted) {
279
- return null
281
+ return Promise.resolve(null)
280
282
  }
281
283
 
282
- return await new Promise((resolve, reject) => {
284
+ return new Promise((resolve, reject) => {
283
285
  if (
284
286
  (this[kContentLength] && (this[kContentLength] > limit)) ||
285
287
  this[kBytesRead] > limit