undici 7.16.0 → 7.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/README.md +47 -1
  2. package/docs/docs/api/Client.md +1 -0
  3. package/docs/docs/api/DiagnosticsChannel.md +57 -0
  4. package/docs/docs/api/Dispatcher.md +86 -0
  5. package/docs/docs/api/RoundRobinPool.md +145 -0
  6. package/docs/docs/api/WebSocket.md +21 -0
  7. package/docs/docs/best-practices/crawling.md +58 -0
  8. package/index.js +4 -1
  9. package/lib/api/api-upgrade.js +2 -1
  10. package/lib/core/connect.js +4 -1
  11. package/lib/core/diagnostics.js +28 -1
  12. package/lib/core/symbols.js +3 -0
  13. package/lib/core/util.js +29 -31
  14. package/lib/dispatcher/balanced-pool.js +10 -0
  15. package/lib/dispatcher/client-h1.js +0 -16
  16. package/lib/dispatcher/client-h2.js +153 -23
  17. package/lib/dispatcher/client.js +7 -2
  18. package/lib/dispatcher/dispatcher-base.js +11 -12
  19. package/lib/dispatcher/h2c-client.js +7 -78
  20. package/lib/dispatcher/pool-base.js +1 -1
  21. package/lib/dispatcher/proxy-agent.js +13 -2
  22. package/lib/dispatcher/round-robin-pool.js +137 -0
  23. package/lib/encoding/index.js +33 -0
  24. package/lib/handler/cache-handler.js +84 -27
  25. package/lib/handler/deduplication-handler.js +216 -0
  26. package/lib/handler/retry-handler.js +0 -2
  27. package/lib/interceptor/cache.js +35 -17
  28. package/lib/interceptor/decompress.js +2 -1
  29. package/lib/interceptor/deduplicate.js +109 -0
  30. package/lib/interceptor/dns.js +55 -13
  31. package/lib/mock/mock-utils.js +1 -2
  32. package/lib/mock/snapshot-agent.js +11 -5
  33. package/lib/mock/snapshot-recorder.js +12 -4
  34. package/lib/mock/snapshot-utils.js +4 -4
  35. package/lib/util/cache.js +29 -1
  36. package/lib/util/runtime-features.js +124 -0
  37. package/lib/web/cookies/parse.js +1 -1
  38. package/lib/web/fetch/body.js +29 -39
  39. package/lib/web/fetch/data-url.js +12 -160
  40. package/lib/web/fetch/formdata-parser.js +204 -127
  41. package/lib/web/fetch/index.js +9 -6
  42. package/lib/web/fetch/request.js +6 -0
  43. package/lib/web/fetch/response.js +2 -3
  44. package/lib/web/fetch/util.js +2 -65
  45. package/lib/web/infra/index.js +229 -0
  46. package/lib/web/subresource-integrity/subresource-integrity.js +6 -5
  47. package/lib/web/webidl/index.js +4 -2
  48. package/lib/web/websocket/connection.js +31 -21
  49. package/lib/web/websocket/frame.js +9 -15
  50. package/lib/web/websocket/stream/websocketstream.js +1 -1
  51. package/lib/web/websocket/util.js +2 -1
  52. package/package.json +5 -4
  53. package/types/agent.d.ts +1 -1
  54. package/types/api.d.ts +2 -2
  55. package/types/balanced-pool.d.ts +2 -1
  56. package/types/cache-interceptor.d.ts +1 -0
  57. package/types/client.d.ts +1 -1
  58. package/types/connector.d.ts +2 -2
  59. package/types/diagnostics-channel.d.ts +2 -2
  60. package/types/dispatcher.d.ts +12 -12
  61. package/types/fetch.d.ts +4 -4
  62. package/types/formdata.d.ts +1 -1
  63. package/types/h2c-client.d.ts +1 -1
  64. package/types/index.d.ts +9 -1
  65. package/types/interceptors.d.ts +36 -2
  66. package/types/pool.d.ts +1 -1
  67. package/types/readable.d.ts +2 -2
  68. package/types/round-robin-pool.d.ts +41 -0
  69. package/types/websocket.d.ts +9 -9
package/README.md CHANGED
@@ -166,6 +166,8 @@ Installing undici as a module allows you to use a newer version than what's bund
166
166
 
167
167
  ## Quick Start
168
168
 
169
+ ### Basic Request
170
+
169
171
  ```js
170
172
  import { request } from 'undici'
171
173
 
@@ -184,6 +186,50 @@ for await (const data of body) { console.log('data', data) }
184
186
  console.log('trailers', trailers)
185
187
  ```
186
188
 
189
+ ### Using Cache Interceptor
190
+
191
+ Undici provides a powerful HTTP caching interceptor that follows HTTP caching best practices. Here's how to use it:
192
+
193
+ ```js
194
+ import { fetch, Agent, interceptors, cacheStores } from 'undici';
195
+
196
+ // Create a client with cache interceptor
197
+ const client = new Agent().compose(interceptors.cache({
198
+ // Optional: Configure cache store (defaults to MemoryCacheStore)
199
+ store: new cacheStores.MemoryCacheStore({
200
+ maxSize: 100 * 1024 * 1024, // 100MB
201
+ maxCount: 1000,
202
+ maxEntrySize: 5 * 1024 * 1024 // 5MB
203
+ }),
204
+
205
+ // Optional: Specify which HTTP methods to cache (default: ['GET', 'HEAD'])
206
+ methods: ['GET', 'HEAD']
207
+ }));
208
+
209
+ // Set the global dispatcher to use our caching client
210
+ setGlobalDispatcher(client);
211
+
212
+ // Now all fetch requests will use the cache
213
+ async function getData() {
214
+ const response = await fetch('https://api.example.com/data');
215
+ // The server should set appropriate Cache-Control headers in the response
216
+ // which the cache will respect based on the cache policy
217
+ return response.json();
218
+ }
219
+
220
+ // First request - fetches from origin
221
+ const data1 = await getData();
222
+
223
+ // Second request - served from cache if within max-age
224
+ const data2 = await getData();
225
+ ```
226
+
227
+ #### Key Features:
228
+ - **Automatic Caching**: Respects `Cache-Control` and `Expires` headers
229
+ - **Validation**: Supports `ETag` and `Last-Modified` validation
230
+ - **Storage Options**: In-memory or persistent SQLite storage
231
+ - **Flexible**: Configure cache size, TTL, and more
232
+
187
233
  ## Global Installation
188
234
 
189
235
  Undici provides an `install()` function to add all WHATWG fetch classes to `globalThis`, making them available globally:
@@ -472,7 +518,7 @@ Note that consuming the response body is _mandatory_ for `request`:
472
518
  ```js
473
519
  // Do
474
520
  const { body, headers } = await request(url);
475
- await res.body.dump(); // force consumption of body
521
+ await body.dump(); // force consumption of body
476
522
 
477
523
  // Do not
478
524
  const { headers } = await request(url);
@@ -30,6 +30,7 @@ Returns: `Client`
30
30
  * **autoSelectFamily**: `boolean` (optional) - Default: depends on local Node version, on Node 18.13.0 and above is `false`. Enables a family autodetection algorithm that loosely implements section 5 of [RFC 8305](https://tools.ietf.org/html/rfc8305#section-5). See [here](https://nodejs.org/api/net.html#socketconnectoptions-connectlistener) for more details. This option is ignored if not supported by the current Node version.
31
31
  * **autoSelectFamilyAttemptTimeout**: `number` - Default: depends on local Node version, on Node 18.13.0 and above is `250`. The amount of time in milliseconds to wait for a connection attempt to finish before trying the next address when using the `autoSelectFamily` option. See [here](https://nodejs.org/api/net.html#socketconnectoptions-connectlistener) for more details.
32
32
  * **allowH2**: `boolean` - Default: `false`. Enables support for H2 if the server has assigned bigger priority to it through ALPN negotiation.
33
+ * **useH2c**: `boolean` - Default: `false`. Enforces h2c for non-https connections.
33
34
  * **maxConcurrentStreams**: `number` - Default: `100`. Dictates the maximum number of concurrent streams for a single H2 session. It can be overridden by a SETTINGS remote frame.
34
35
 
35
36
  > **Notes about HTTP/2**
@@ -254,3 +254,60 @@ diagnosticsChannel.channel('undici:websocket:pong').subscribe(({ payload, websoc
254
254
  console.log(websocket) // the WebSocket instance
255
255
  })
256
256
  ```
257
+
258
+ ## `undici:proxy:connected`
259
+
260
+ This message is published after the `ProxyAgent` establishes a connection to the proxy server.
261
+
262
+ ```js
263
+ import diagnosticsChannel from 'diagnostics_channel'
264
+
265
+ diagnosticsChannel.channel('undici:proxy:connected').subscribe(({ socket, connectParams }) => {
266
+ console.log(socket)
267
+ console.log(connectParams)
268
+ // const { origin, port, path, signal, headers, servername } = connectParams
269
+ })
270
+ ```
271
+
272
+ ## `undici:request:pending-requests`
273
+
274
+ This message is published when the deduplicate interceptor's pending request map changes. This is useful for monitoring and debugging request deduplication behavior.
275
+
276
+ The deduplicate interceptor automatically deduplicates concurrent requests for the same resource. When multiple identical requests are made while one is already in-flight, only one request is sent to the origin server, and all waiting handlers receive the same response.
277
+
278
+ ```js
279
+ import diagnosticsChannel from 'diagnostics_channel'
280
+
281
+ diagnosticsChannel.channel('undici:request:pending-requests').subscribe(({ type, size, key }) => {
282
+ console.log(type) // 'added' or 'removed'
283
+ console.log(size) // current number of pending requests
284
+ console.log(key) // the deduplication key for this request
285
+ })
286
+ ```
287
+
288
+ ### Event Properties
289
+
290
+ - `type` (`string`): Either `'added'` when a new pending request is registered, or `'removed'` when a pending request completes (successfully or with an error).
291
+ - `size` (`number`): The current number of pending requests after the change.
292
+ - `key` (`string`): The deduplication key for the request, composed of the origin, method, path, and request headers.
293
+
294
+ ### Example: Monitoring Request Deduplication
295
+
296
+ ```js
297
+ import diagnosticsChannel from 'diagnostics_channel'
298
+
299
+ const channel = diagnosticsChannel.channel('undici:request:pending-requests')
300
+
301
+ channel.subscribe(({ type, size, key }) => {
302
+ if (type === 'added') {
303
+ console.log(`New pending request: ${key} (${size} total pending)`)
304
+ } else {
305
+ console.log(`Request completed: ${key} (${size} remaining)`)
306
+ }
307
+ })
308
+ ```
309
+
310
+ This can be useful for:
311
+ - Verifying that request deduplication is working as expected
312
+ - Monitoring the number of concurrent in-flight requests
313
+ - Debugging deduplication behavior in production environments
@@ -1043,6 +1043,7 @@ The `dns` interceptor enables you to cache DNS lookups for a given duration, per
1043
1043
  - The function should return a single record from the records array.
1044
1044
  - By default a simplified version of Round Robin is used.
1045
1045
  - The `records` property can be mutated to store the state of the balancing algorithm.
1046
+ - `storage: DNSStorage` - Custom storage for resolved DNS records
1046
1047
 
1047
1048
  > The `Dispatcher#options` also gets extended with the options `dns.affinity`, `dns.dualStack`, `dns.lookup` and `dns.pick` which can be used to configure the interceptor at a request-per-request basis.
1048
1049
 
@@ -1057,6 +1058,14 @@ It represents a map of DNS IP addresses records for a single origin.
1057
1058
  - `4.ips` - (`DNSInterceptorRecord[] | null`) The IPv4 addresses.
1058
1059
  - `6.ips` - (`DNSInterceptorRecord[] | null`) The IPv6 addresses.
1059
1060
 
1061
+ **DNSStorage**
1062
+ It represents a storage object for resolved DNS records.
1063
+ - `size` - (`number`) current size of the storage.
1064
+ - `get` - (`(origin: string) => DNSInterceptorOriginRecords | null`) method to get the records for a given origin.
1065
+ - `set` - (`(origin: string, records: DNSInterceptorOriginRecords | null, options: { ttl: number }) => void`) method to set the records for a given origin.
1066
+ - `delete` - (`(origin: string) => void`) method to delete records for a given origin.
1067
+ - `full` - (`() => boolean`) method to check if the storage is full, if returns `true`, DNS lookup will be skipped in this interceptor and new records will not be stored.
1068
+
1060
1069
  **Example - Basic DNS Interceptor**
1061
1070
 
1062
1071
  ```js
@@ -1073,6 +1082,45 @@ const response = await client.request({
1073
1082
  })
1074
1083
  ```
1075
1084
 
1085
+ **Example - DNS Interceptor and LRU cache as a storage**
1086
+
1087
+ ```js
1088
+ const { Client, interceptors } = require("undici");
1089
+ const QuickLRU = require("quick-lru");
1090
+ const { dns } = interceptors;
1091
+
1092
+ const lru = new QuickLRU({ maxSize: 100 });
1093
+
1094
+ const lruAdapter = {
1095
+ get size() {
1096
+ return lru.size;
1097
+ },
1098
+ get(origin) {
1099
+ return lru.get(origin);
1100
+ },
1101
+ set(origin, records, { ttl }) {
1102
+ lru.set(origin, records, { maxAge: ttl });
1103
+ },
1104
+ delete(origin) {
1105
+ lru.delete(origin);
1106
+ },
1107
+ full() {
1108
+ // For LRU cache, we can always store new records,
1109
+ // old records will be evicted automatically
1110
+ return false;
1111
+ }
1112
+ }
1113
+
1114
+ const client = new Agent().compose([
1115
+ dns({ storage: lruAdapter })
1116
+ ])
1117
+
1118
+ const response = await client.request({
1119
+ origin: `http://localhost:3030`,
1120
+ ...requestOpts
1121
+ })
1122
+ ```
1123
+
1076
1124
  ##### `responseError`
1077
1125
 
1078
1126
  The `responseError` interceptor throws an error for responses with status code errors (>= 400).
@@ -1165,6 +1213,44 @@ The `cache` interceptor implements client-side response caching as described in
1165
1213
  - `cacheByDefault` - The default expiration time to cache responses by if they don't have an explicit expiration and cannot have an heuristic expiry computed. If this isn't present, responses neither with an explicit expiration nor heuristically cacheable will not be cached. Default `undefined`.
1166
1214
  - `type` - The [type of cache](https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/Caching#types_of_caches) for Undici to act as. Can be `shared` or `private`. Default `shared`. `private` implies privately cacheable responses will be cached and potentially shared with other users of your application.
1167
1215
 
1216
+ ##### `Deduplicate Interceptor`
1217
+
1218
+ The `deduplicate` interceptor deduplicates concurrent identical requests. When multiple identical requests are made while one is already in-flight, only one request is sent to the origin server, and all waiting handlers receive the same response. This reduces server load and improves performance.
1219
+
1220
+ **Options**
1221
+
1222
+ - `methods` - The [**safe** HTTP methods](https://www.rfc-editor.org/rfc/rfc9110#section-9.2.1) to deduplicate. Default `['GET']`.
1223
+ - `skipHeaderNames` - Header names that, if present in a request, will cause the request to skip deduplication entirely. Useful for headers like `idempotency-key` where presence indicates unique processing. Header name matching is case-insensitive. Default `[]`.
1224
+ - `excludeHeaderNames` - Header names to exclude from the deduplication key. Requests with different values for these headers will still be deduplicated together. Useful for headers like `x-request-id` that vary per request but shouldn't affect deduplication. Header name matching is case-insensitive. Default `[]`.
1225
+
1226
+ **Usage**
1227
+
1228
+ ```js
1229
+ const { Client, interceptors } = require("undici");
1230
+ const { deduplicate, cache } = interceptors;
1231
+
1232
+ // Deduplicate only
1233
+ const client = new Client("http://example.com").compose(
1234
+ deduplicate()
1235
+ );
1236
+
1237
+ // Deduplicate with caching
1238
+ const clientWithCache = new Client("http://example.com").compose(
1239
+ deduplicate(),
1240
+ cache()
1241
+ );
1242
+ ```
1243
+
1244
+ Requests are considered identical if they have the same:
1245
+ - Origin
1246
+ - HTTP method
1247
+ - Path
1248
+ - Request headers (excluding any headers specified in `excludeHeaderNames`)
1249
+
1250
+ All deduplicated requests receive the complete response including status code, headers, and body.
1251
+
1252
+ For observability, request deduplication events are published to the `undici:request:pending-requests` [diagnostic channel](/docs/docs/api/DiagnosticsChannel.md#undicirequestpending-requests).
1253
+
1168
1254
  ## Instance Events
1169
1255
 
1170
1256
  ### Event: `'connect'`
@@ -0,0 +1,145 @@
1
+ # Class: RoundRobinPool
2
+
3
+ Extends: `undici.Dispatcher`
4
+
5
+ A pool of [Client](/docs/docs/api/Client.md) instances connected to the same upstream target with round-robin client selection.
6
+
7
+ Unlike [`Pool`](/docs/docs/api/Pool.md), which always selects the first available client, `RoundRobinPool` cycles through clients in a round-robin fashion. This ensures even distribution of requests across all connections, which is particularly useful when the upstream target is behind a load balancer that round-robins TCP connections across multiple backend servers (e.g., Kubernetes Services).
8
+
9
+ Requests are not guaranteed to be dispatched in order of invocation.
10
+
11
+ ## `new RoundRobinPool(url[, options])`
12
+
13
+ Arguments:
14
+
15
+ * **url** `URL | string` - It should only include the **protocol, hostname, and port**.
16
+ * **options** `RoundRobinPoolOptions` (optional)
17
+
18
+ ### Parameter: `RoundRobinPoolOptions`
19
+
20
+ Extends: [`ClientOptions`](/docs/docs/api/Client.md#parameter-clientoptions)
21
+
22
+ * **factory** `(origin: URL, opts: Object) => Dispatcher` - Default: `(origin, opts) => new Client(origin, opts)`
23
+ * **connections** `number | null` (optional) - Default: `null` - The number of `Client` instances to create. When set to `null`, the `RoundRobinPool` instance will create an unlimited amount of `Client` instances.
24
+ * **clientTtl** `number | null` (optional) - Default: `null` - The amount of time before a `Client` instance is removed from the `RoundRobinPool` and closed. When set to `null`, `Client` instances will not be removed or closed based on age.
25
+
26
+ ## Use Case
27
+
28
+ `RoundRobinPool` is designed for scenarios where:
29
+
30
+ 1. You connect to a single origin (e.g., `http://my-service.namespace.svc`)
31
+ 2. That origin is backed by a load balancer distributing TCP connections across multiple servers
32
+ 3. You want requests evenly distributed across all backend servers
33
+
34
+ **Example**: In Kubernetes, when using a Service DNS name with multiple Pod replicas, kube-proxy load balances TCP connections. `RoundRobinPool` ensures each connection (and thus each Pod) receives an equal share of requests.
35
+
36
+ ### Important: Backend Distribution Considerations
37
+
38
+ `RoundRobinPool` distributes **HTTP requests** evenly across **TCP connections**. Whether this translates to even backend server distribution depends on the load balancer's behavior:
39
+
40
+ **✓ Works when the load balancer**:
41
+ - Assigns different backends to different TCP connections from the same client
42
+ - Uses algorithms like: round-robin, random, least-connections (without client affinity)
43
+ - Example: Default Kubernetes Services without `sessionAffinity`
44
+
45
+ **✗ Does NOT work when**:
46
+ - Load balancer has client/source IP affinity (all connections from one IP → same backend)
47
+ - Load balancer uses source-IP-hash or sticky sessions
48
+
49
+ **How it works:**
50
+ 1. `RoundRobinPool` creates N TCP connections to the load balancer endpoint
51
+ 2. Load balancer assigns each TCP connection to a backend (per its algorithm)
52
+ 3. `RoundRobinPool` cycles HTTP requests across those N connections
53
+ 4. Result: Requests distributed proportionally to how the LB distributed the connections
54
+
55
+ If the load balancer assigns all connections to the same backend (e.g., due to session affinity), `RoundRobinPool` cannot overcome this. In such cases, consider using [`BalancedPool`](/docs/docs/api/BalancedPool.md) with direct backend addresses (e.g., individual pod IPs) instead of a load-balanced endpoint.
56
+
57
+ ## Instance Properties
58
+
59
+ ### `RoundRobinPool.closed`
60
+
61
+ Implements [Client.closed](/docs/docs/api/Client.md#clientclosed)
62
+
63
+ ### `RoundRobinPool.destroyed`
64
+
65
+ Implements [Client.destroyed](/docs/docs/api/Client.md#clientdestroyed)
66
+
67
+ ### `RoundRobinPool.stats`
68
+
69
+ Returns [`PoolStats`](PoolStats.md) instance for this pool.
70
+
71
+ ## Instance Methods
72
+
73
+ ### `RoundRobinPool.close([callback])`
74
+
75
+ Implements [`Dispatcher.close([callback])`](/docs/docs/api/Dispatcher.md#dispatcherclosecallback-promise).
76
+
77
+ ### `RoundRobinPool.destroy([error, callback])`
78
+
79
+ Implements [`Dispatcher.destroy([error, callback])`](/docs/docs/api/Dispatcher.md#dispatcherdestroyerror-callback-promise).
80
+
81
+ ### `RoundRobinPool.connect(options[, callback])`
82
+
83
+ See [`Dispatcher.connect(options[, callback])`](/docs/docs/api/Dispatcher.md#dispatcherconnectoptions-callback).
84
+
85
+ ### `RoundRobinPool.dispatch(options, handler)`
86
+
87
+ Implements [`Dispatcher.dispatch(options, handler)`](/docs/docs/api/Dispatcher.md#dispatcherdispatchoptions-handler).
88
+
89
+ ### `RoundRobinPool.pipeline(options, handler)`
90
+
91
+ See [`Dispatcher.pipeline(options, handler)`](/docs/docs/api/Dispatcher.md#dispatcherpipelineoptions-handler).
92
+
93
+ ### `RoundRobinPool.request(options[, callback])`
94
+
95
+ See [`Dispatcher.request(options [, callback])`](/docs/docs/api/Dispatcher.md#dispatcherrequestoptions-callback).
96
+
97
+ ### `RoundRobinPool.stream(options, factory[, callback])`
98
+
99
+ See [`Dispatcher.stream(options, factory[, callback])`](/docs/docs/api/Dispatcher.md#dispatcherstreamoptions-factory-callback).
100
+
101
+ ### `RoundRobinPool.upgrade(options[, callback])`
102
+
103
+ See [`Dispatcher.upgrade(options[, callback])`](/docs/docs/api/Dispatcher.md#dispatcherupgradeoptions-callback).
104
+
105
+ ## Instance Events
106
+
107
+ ### Event: `'connect'`
108
+
109
+ See [Dispatcher Event: `'connect'`](/docs/docs/api/Dispatcher.md#event-connect).
110
+
111
+ ### Event: `'disconnect'`
112
+
113
+ See [Dispatcher Event: `'disconnect'`](/docs/docs/api/Dispatcher.md#event-disconnect).
114
+
115
+ ### Event: `'drain'`
116
+
117
+ See [Dispatcher Event: `'drain'`](/docs/docs/api/Dispatcher.md#event-drain).
118
+
119
+ ## Example
120
+
121
+ ```javascript
122
+ import { RoundRobinPool } from 'undici'
123
+
124
+ const pool = new RoundRobinPool('http://my-service.default.svc.cluster.local', {
125
+ connections: 10
126
+ })
127
+
128
+ // Requests will be distributed evenly across all 10 connections
129
+ for (let i = 0; i < 100; i++) {
130
+ const { body } = await pool.request({
131
+ path: '/api/data',
132
+ method: 'GET'
133
+ })
134
+ console.log(await body.json())
135
+ }
136
+
137
+ await pool.close()
138
+ ```
139
+
140
+ ## See Also
141
+
142
+ - [Pool](/docs/docs/api/Pool.md) - Connection pool without round-robin
143
+ - [BalancedPool](/docs/docs/api/BalancedPool.md) - Load balancing across multiple origins
144
+ - [Issue #3648](https://github.com/nodejs/undici/issues/3648) - Original issue describing uneven distribution
145
+
@@ -34,6 +34,27 @@ import { WebSocket } from 'undici'
34
34
  const ws = new WebSocket('wss://echo.websocket.events', ['echo', 'chat'])
35
35
  ```
36
36
 
37
+ ### Example with HTTP/2:
38
+
39
+ > ⚠️ Warning: WebSocket over HTTP/2 is experimental, it is likely to change in the future.
40
+
41
+ > 🗒️ Note: WebSocket over HTTP/2 may be enabled by default in a future version,
42
+ > this will happen by enabling HTTP/2 connections as the default behavior of Undici's Agent as well the global dispatcher.
43
+ > Stay tuned to the changelog for more information.
44
+
45
+ This example will not work in browsers or other platforms that don't allow passing an object.
46
+
47
+ ```mjs
48
+ import { Agent } from 'undici'
49
+
50
+ const agent = new Agent({ allowH2: true })
51
+
52
+ const ws = new WebSocket('wss://echo.websocket.events', {
53
+ dispatcher: agent,
54
+ protocols: ['echo', 'chat']
55
+ })
56
+ ```
57
+
37
58
  # Class: WebSocketStream
38
59
 
39
60
  > ⚠️ Warning: the WebSocketStream API has not been finalized and is likely to change.
@@ -0,0 +1,58 @@
1
+ # Crawling
2
+
3
+ [RFC 9309](https://datatracker.ietf.org/doc/html/rfc9309) defines crawlers as automated clients.
4
+
5
+ Some web servers may reject requests that omit the `User-Agent` header or that use common defaults such as `'curl/7.79.1'`.
6
+
7
+ In **undici**, the default user agent is `'undici'`. Since undici is integrated into Node.js core as the implementation of `fetch()`, requests made via `fetch()` use `'node'` as the default user agent.
8
+
9
+ It is recommended to specify a **custom `User-Agent` header** when implementing crawlers. Providing a descriptive user agent allows servers to correctly identify the client and reduces the likelihood of requests being denied.
10
+
11
+ A user agent string should include sufficient detail to identify the crawler and provide contact information. For example:
12
+
13
+ ```
14
+ AcmeCo Crawler - acme.co - contact@acme.co
15
+ ```
16
+
17
+ When adding contact details, avoid using personal identifiers such as your own name or a private email address—especially in a professional or employment context. Instead, use a role-based or organizational contact (e.g., crawler-team@company.com) to protect individual privacy while still enabling communication.
18
+
19
+ If a crawler behaves unexpectedly—for example, due to misconfiguration or implementation errors—server administrators can use the information in the user agent to contact the operator and coordinate an appropriate resolution.
20
+
21
+ The `User-Agent` header can be set on individual requests or applied globally by configuring a custom dispatcher.
22
+
23
+ **Example: setting a `User-Agent` per request**
24
+
25
+ ```js
26
+ import { fetch } from 'undici'
27
+
28
+ const headers = {
29
+ 'User-Agent': 'AcmeCo Crawler - acme.co - contact@acme.co'
30
+ }
31
+
32
+ const res = await fetch('https://example.com', { headers })
33
+ ```
34
+
35
+ ## Best Practices for Crawlers
36
+
37
+ When developing a crawler, the following practices are recommended in addition to setting a descriptive `User-Agent` header:
38
+
39
+ * **Respect `robots.txt`**
40
+ Follow the directives defined in the target site’s `robots.txt` file, including disallowed paths and optional crawl-delay settings (see [W3C guidelines](https://www.w3.org/wiki/Write_Web_Crawler)).
41
+
42
+ * **Rate limiting**
43
+ Regulate request frequency to avoid imposing excessive load on servers. Introduce delays between requests or limit the number of concurrent requests. The W3C suggests at least one second between requests.
44
+
45
+ * **Error handling**
46
+ Implement retry logic with exponential backoff for transient failures, and stop requests when persistent errors occur (e.g., HTTP 403 or 429).
47
+
48
+ * **Monitoring and logging**
49
+ Track request volume, response codes, and error rates to detect misbehavior and address issues proactively.
50
+
51
+ * **Contact information**
52
+ Always include valid and current contact details in the `User-Agent` string so that administrators can reach the crawler operator if necessary.
53
+
54
+ ## References and Further Reading
55
+
56
+ * [RFC 9309: The Robots Exclusion Protocol](https://datatracker.ietf.org/doc/html/rfc9309)
57
+ * [W3C Wiki: Write Web Crawler](https://www.w3.org/wiki/Write_Web_Crawler)
58
+ * [Ethical Web Crawling (WWW 2010 Conference Paper)](https://archives.iw3c2.org/www2010/proceedings/www/p1101.pdf)
package/index.js CHANGED
@@ -4,6 +4,7 @@ const Client = require('./lib/dispatcher/client')
4
4
  const Dispatcher = require('./lib/dispatcher/dispatcher')
5
5
  const Pool = require('./lib/dispatcher/pool')
6
6
  const BalancedPool = require('./lib/dispatcher/balanced-pool')
7
+ const RoundRobinPool = require('./lib/dispatcher/round-robin-pool')
7
8
  const Agent = require('./lib/dispatcher/agent')
8
9
  const ProxyAgent = require('./lib/dispatcher/proxy-agent')
9
10
  const EnvHttpProxyAgent = require('./lib/dispatcher/env-http-proxy-agent')
@@ -31,6 +32,7 @@ module.exports.Dispatcher = Dispatcher
31
32
  module.exports.Client = Client
32
33
  module.exports.Pool = Pool
33
34
  module.exports.BalancedPool = BalancedPool
35
+ module.exports.RoundRobinPool = RoundRobinPool
34
36
  module.exports.Agent = Agent
35
37
  module.exports.ProxyAgent = ProxyAgent
36
38
  module.exports.EnvHttpProxyAgent = EnvHttpProxyAgent
@@ -47,7 +49,8 @@ module.exports.interceptors = {
47
49
  dump: require('./lib/interceptor/dump'),
48
50
  dns: require('./lib/interceptor/dns'),
49
51
  cache: require('./lib/interceptor/cache'),
50
- decompress: require('./lib/interceptor/decompress')
52
+ decompress: require('./lib/interceptor/decompress'),
53
+ deduplicate: require('./lib/interceptor/deduplicate')
51
54
  }
52
55
 
53
56
  module.exports.cacheStores = {
@@ -4,6 +4,7 @@ const { InvalidArgumentError, SocketError } = require('../core/errors')
4
4
  const { AsyncResource } = require('node:async_hooks')
5
5
  const assert = require('node:assert')
6
6
  const util = require('../core/util')
7
+ const { kHTTP2Stream } = require('../core/symbols')
7
8
  const { addSignal, removeSignal } = require('./abort-signal')
8
9
 
9
10
  class UpgradeHandler extends AsyncResource {
@@ -50,7 +51,7 @@ class UpgradeHandler extends AsyncResource {
50
51
  }
51
52
 
52
53
  onUpgrade (statusCode, rawHeaders, socket) {
53
- assert(statusCode === 101)
54
+ assert(socket[kHTTP2Stream] === true ? statusCode === 200 : statusCode === 101)
54
55
 
55
56
  const { callback, opaque, context } = this
56
57
 
@@ -43,7 +43,7 @@ const SessionCache = class WeakSessionCache {
43
43
  }
44
44
  }
45
45
 
46
- function buildConnector ({ allowH2, maxCachedSessions, socketPath, timeout, session: customSession, ...opts }) {
46
+ function buildConnector ({ allowH2, useH2c, maxCachedSessions, socketPath, timeout, session: customSession, ...opts }) {
47
47
  if (maxCachedSessions != null && (!Number.isInteger(maxCachedSessions) || maxCachedSessions < 0)) {
48
48
  throw new InvalidArgumentError('maxCachedSessions must be a positive integer or zero')
49
49
  }
@@ -96,6 +96,9 @@ function buildConnector ({ allowH2, maxCachedSessions, socketPath, timeout, sess
96
96
  port,
97
97
  host: hostname
98
98
  })
99
+ if (useH2c === true) {
100
+ socket.alpnProtocol = 'h2'
101
+ }
99
102
  }
100
103
 
101
104
  // Set TCP keep alive options on the socket here instead of in connect() for the case of assigning the socket
@@ -26,7 +26,9 @@ const channels = {
26
26
  close: diagnosticsChannel.channel('undici:websocket:close'),
27
27
  socketError: diagnosticsChannel.channel('undici:websocket:socket_error'),
28
28
  ping: diagnosticsChannel.channel('undici:websocket:ping'),
29
- pong: diagnosticsChannel.channel('undici:websocket:pong')
29
+ pong: diagnosticsChannel.channel('undici:websocket:pong'),
30
+ // ProxyAgent
31
+ proxyConnected: diagnosticsChannel.channel('undici:proxy:connected')
30
32
  }
31
33
 
32
34
  let isTrackingClientEvents = false
@@ -36,6 +38,14 @@ function trackClientEvents (debugLog = undiciDebugLog) {
36
38
  return
37
39
  }
38
40
 
41
+ // Check if any of the channels already have subscribers to prevent duplicate subscriptions
42
+ // This can happen when both Node.js built-in undici and undici as a dependency are present
43
+ if (channels.beforeConnect.hasSubscribers || channels.connected.hasSubscribers ||
44
+ channels.connectError.hasSubscribers || channels.sendHeaders.hasSubscribers) {
45
+ isTrackingClientEvents = true
46
+ return
47
+ }
48
+
39
49
  isTrackingClientEvents = true
40
50
 
41
51
  diagnosticsChannel.subscribe('undici:client:beforeConnect',
@@ -98,6 +108,14 @@ function trackRequestEvents (debugLog = undiciDebugLog) {
98
108
  return
99
109
  }
100
110
 
111
+ // Check if any of the channels already have subscribers to prevent duplicate subscriptions
112
+ // This can happen when both Node.js built-in undici and undici as a dependency are present
113
+ if (channels.headers.hasSubscribers || channels.trailers.hasSubscribers ||
114
+ channels.error.hasSubscribers) {
115
+ isTrackingRequestEvents = true
116
+ return
117
+ }
118
+
101
119
  isTrackingRequestEvents = true
102
120
 
103
121
  diagnosticsChannel.subscribe('undici:request:headers',
@@ -146,6 +164,15 @@ function trackWebSocketEvents (debugLog = websocketDebuglog) {
146
164
  return
147
165
  }
148
166
 
167
+ // Check if any of the channels already have subscribers to prevent duplicate subscriptions
168
+ // This can happen when both Node.js built-in undici and undici as a dependency are present
169
+ if (channels.open.hasSubscribers || channels.close.hasSubscribers ||
170
+ channels.socketError.hasSubscribers || channels.ping.hasSubscribers ||
171
+ channels.pong.hasSubscribers) {
172
+ isTrackingWebSocketEvents = true
173
+ return
174
+ }
175
+
149
176
  isTrackingWebSocketEvents = true
150
177
 
151
178
  diagnosticsChannel.subscribe('undici:websocket:open',
@@ -62,6 +62,9 @@ module.exports = {
62
62
  kListeners: Symbol('listeners'),
63
63
  kHTTPContext: Symbol('http context'),
64
64
  kMaxConcurrentStreams: Symbol('max concurrent streams'),
65
+ kEnableConnectProtocol: Symbol('http2session connect protocol'),
66
+ kRemoteSettings: Symbol('http2session remote settings'),
67
+ kHTTP2Stream: Symbol('http2session client stream'),
65
68
  kNoProxyAgent: Symbol('no proxy agent'),
66
69
  kHttpProxyAgent: Symbol('http proxy agent'),
67
70
  kHttpsProxyAgent: Symbol('https proxy agent')