undici 7.16.0 → 7.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -1
- package/docs/docs/api/Client.md +1 -0
- package/docs/docs/api/DiagnosticsChannel.md +57 -0
- package/docs/docs/api/Dispatcher.md +86 -0
- package/docs/docs/api/RoundRobinPool.md +145 -0
- package/docs/docs/api/WebSocket.md +21 -0
- package/docs/docs/best-practices/crawling.md +58 -0
- package/index.js +4 -1
- package/lib/api/api-upgrade.js +2 -1
- package/lib/core/connect.js +4 -1
- package/lib/core/diagnostics.js +28 -1
- package/lib/core/symbols.js +3 -0
- package/lib/core/util.js +29 -31
- package/lib/dispatcher/balanced-pool.js +10 -0
- package/lib/dispatcher/client-h1.js +0 -16
- package/lib/dispatcher/client-h2.js +153 -23
- package/lib/dispatcher/client.js +7 -2
- package/lib/dispatcher/dispatcher-base.js +11 -12
- package/lib/dispatcher/h2c-client.js +7 -78
- package/lib/dispatcher/pool-base.js +1 -1
- package/lib/dispatcher/proxy-agent.js +13 -2
- package/lib/dispatcher/round-robin-pool.js +137 -0
- package/lib/encoding/index.js +33 -0
- package/lib/handler/cache-handler.js +84 -27
- package/lib/handler/deduplication-handler.js +216 -0
- package/lib/handler/retry-handler.js +0 -2
- package/lib/interceptor/cache.js +35 -17
- package/lib/interceptor/decompress.js +2 -1
- package/lib/interceptor/deduplicate.js +109 -0
- package/lib/interceptor/dns.js +55 -13
- package/lib/mock/mock-utils.js +1 -2
- package/lib/mock/snapshot-agent.js +11 -5
- package/lib/mock/snapshot-recorder.js +12 -4
- package/lib/mock/snapshot-utils.js +4 -4
- package/lib/util/cache.js +29 -1
- package/lib/util/runtime-features.js +124 -0
- package/lib/web/cookies/parse.js +1 -1
- package/lib/web/fetch/body.js +29 -39
- package/lib/web/fetch/data-url.js +12 -160
- package/lib/web/fetch/formdata-parser.js +204 -127
- package/lib/web/fetch/index.js +18 -6
- package/lib/web/fetch/request.js +6 -0
- package/lib/web/fetch/response.js +2 -3
- package/lib/web/fetch/util.js +2 -65
- package/lib/web/infra/index.js +229 -0
- package/lib/web/subresource-integrity/subresource-integrity.js +6 -5
- package/lib/web/webidl/index.js +4 -2
- package/lib/web/websocket/connection.js +31 -21
- package/lib/web/websocket/frame.js +9 -15
- package/lib/web/websocket/stream/websocketstream.js +1 -1
- package/lib/web/websocket/util.js +2 -1
- package/package.json +5 -4
- package/types/agent.d.ts +1 -1
- package/types/api.d.ts +2 -2
- package/types/balanced-pool.d.ts +2 -1
- package/types/cache-interceptor.d.ts +1 -0
- package/types/client.d.ts +1 -1
- package/types/connector.d.ts +2 -2
- package/types/diagnostics-channel.d.ts +2 -2
- package/types/dispatcher.d.ts +12 -12
- package/types/fetch.d.ts +4 -4
- package/types/formdata.d.ts +1 -1
- package/types/h2c-client.d.ts +1 -1
- package/types/index.d.ts +9 -1
- package/types/interceptors.d.ts +36 -2
- package/types/pool.d.ts +1 -1
- package/types/readable.d.ts +2 -2
- package/types/round-robin-pool.d.ts +41 -0
- package/types/websocket.d.ts +9 -9
package/README.md
CHANGED
|
@@ -166,6 +166,8 @@ Installing undici as a module allows you to use a newer version than what's bund
|
|
|
166
166
|
|
|
167
167
|
## Quick Start
|
|
168
168
|
|
|
169
|
+
### Basic Request
|
|
170
|
+
|
|
169
171
|
```js
|
|
170
172
|
import { request } from 'undici'
|
|
171
173
|
|
|
@@ -184,6 +186,50 @@ for await (const data of body) { console.log('data', data) }
|
|
|
184
186
|
console.log('trailers', trailers)
|
|
185
187
|
```
|
|
186
188
|
|
|
189
|
+
### Using Cache Interceptor
|
|
190
|
+
|
|
191
|
+
Undici provides a powerful HTTP caching interceptor that follows HTTP caching best practices. Here's how to use it:
|
|
192
|
+
|
|
193
|
+
```js
|
|
194
|
+
import { fetch, Agent, interceptors, cacheStores } from 'undici';
|
|
195
|
+
|
|
196
|
+
// Create a client with cache interceptor
|
|
197
|
+
const client = new Agent().compose(interceptors.cache({
|
|
198
|
+
// Optional: Configure cache store (defaults to MemoryCacheStore)
|
|
199
|
+
store: new cacheStores.MemoryCacheStore({
|
|
200
|
+
maxSize: 100 * 1024 * 1024, // 100MB
|
|
201
|
+
maxCount: 1000,
|
|
202
|
+
maxEntrySize: 5 * 1024 * 1024 // 5MB
|
|
203
|
+
}),
|
|
204
|
+
|
|
205
|
+
// Optional: Specify which HTTP methods to cache (default: ['GET', 'HEAD'])
|
|
206
|
+
methods: ['GET', 'HEAD']
|
|
207
|
+
}));
|
|
208
|
+
|
|
209
|
+
// Set the global dispatcher to use our caching client
|
|
210
|
+
setGlobalDispatcher(client);
|
|
211
|
+
|
|
212
|
+
// Now all fetch requests will use the cache
|
|
213
|
+
async function getData() {
|
|
214
|
+
const response = await fetch('https://api.example.com/data');
|
|
215
|
+
// The server should set appropriate Cache-Control headers in the response
|
|
216
|
+
// which the cache will respect based on the cache policy
|
|
217
|
+
return response.json();
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// First request - fetches from origin
|
|
221
|
+
const data1 = await getData();
|
|
222
|
+
|
|
223
|
+
// Second request - served from cache if within max-age
|
|
224
|
+
const data2 = await getData();
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
#### Key Features:
|
|
228
|
+
- **Automatic Caching**: Respects `Cache-Control` and `Expires` headers
|
|
229
|
+
- **Validation**: Supports `ETag` and `Last-Modified` validation
|
|
230
|
+
- **Storage Options**: In-memory or persistent SQLite storage
|
|
231
|
+
- **Flexible**: Configure cache size, TTL, and more
|
|
232
|
+
|
|
187
233
|
## Global Installation
|
|
188
234
|
|
|
189
235
|
Undici provides an `install()` function to add all WHATWG fetch classes to `globalThis`, making them available globally:
|
|
@@ -472,7 +518,7 @@ Note that consuming the response body is _mandatory_ for `request`:
|
|
|
472
518
|
```js
|
|
473
519
|
// Do
|
|
474
520
|
const { body, headers } = await request(url);
|
|
475
|
-
await
|
|
521
|
+
await body.dump(); // force consumption of body
|
|
476
522
|
|
|
477
523
|
// Do not
|
|
478
524
|
const { headers } = await request(url);
|
|
@@ -487,6 +533,12 @@ const { headers } = await request(url);
|
|
|
487
533
|
|
|
488
534
|
The [Fetch Standard](https://fetch.spec.whatwg.org) requires implementations to exclude certain headers from requests and responses. In browser environments, some headers are forbidden so the user agent remains in full control over them. In Undici, these constraints are removed to give more control to the user.
|
|
489
535
|
|
|
536
|
+
#### Content-Encoding
|
|
537
|
+
|
|
538
|
+
* https://www.rfc-editor.org/rfc/rfc9110#field.content-encoding
|
|
539
|
+
|
|
540
|
+
Undici limits the number of `Content-Encoding` layers in a response to **5** to prevent resource exhaustion attacks. If a server responds with more than 5 content-encodings (e.g., `Content-Encoding: gzip, gzip, gzip, gzip, gzip, gzip`), the fetch will be rejected with an error. This limit matches the approach taken by [curl](https://curl.se/docs/CVE-2022-32206.html) and [urllib3](https://github.com/advisories/GHSA-gm62-xv2j-4rw9).
|
|
541
|
+
|
|
490
542
|
#### `undici.upgrade([url, options]): Promise`
|
|
491
543
|
|
|
492
544
|
Upgrade to a different protocol. See [MDN - HTTP - Protocol upgrade mechanism](https://developer.mozilla.org/en-US/docs/Web/HTTP/Protocol_upgrade_mechanism) for more details.
|
package/docs/docs/api/Client.md
CHANGED
|
@@ -30,6 +30,7 @@ Returns: `Client`
|
|
|
30
30
|
* **autoSelectFamily**: `boolean` (optional) - Default: depends on local Node version, on Node 18.13.0 and above is `false`. Enables a family autodetection algorithm that loosely implements section 5 of [RFC 8305](https://tools.ietf.org/html/rfc8305#section-5). See [here](https://nodejs.org/api/net.html#socketconnectoptions-connectlistener) for more details. This option is ignored if not supported by the current Node version.
|
|
31
31
|
* **autoSelectFamilyAttemptTimeout**: `number` - Default: depends on local Node version, on Node 18.13.0 and above is `250`. The amount of time in milliseconds to wait for a connection attempt to finish before trying the next address when using the `autoSelectFamily` option. See [here](https://nodejs.org/api/net.html#socketconnectoptions-connectlistener) for more details.
|
|
32
32
|
* **allowH2**: `boolean` - Default: `false`. Enables support for H2 if the server has assigned bigger priority to it through ALPN negotiation.
|
|
33
|
+
* **useH2c**: `boolean` - Default: `false`. Enforces h2c for non-https connections.
|
|
33
34
|
* **maxConcurrentStreams**: `number` - Default: `100`. Dictates the maximum number of concurrent streams for a single H2 session. It can be overridden by a SETTINGS remote frame.
|
|
34
35
|
|
|
35
36
|
> **Notes about HTTP/2**
|
|
@@ -254,3 +254,60 @@ diagnosticsChannel.channel('undici:websocket:pong').subscribe(({ payload, websoc
|
|
|
254
254
|
console.log(websocket) // the WebSocket instance
|
|
255
255
|
})
|
|
256
256
|
```
|
|
257
|
+
|
|
258
|
+
## `undici:proxy:connected`
|
|
259
|
+
|
|
260
|
+
This message is published after the `ProxyAgent` establishes a connection to the proxy server.
|
|
261
|
+
|
|
262
|
+
```js
|
|
263
|
+
import diagnosticsChannel from 'diagnostics_channel'
|
|
264
|
+
|
|
265
|
+
diagnosticsChannel.channel('undici:proxy:connected').subscribe(({ socket, connectParams }) => {
|
|
266
|
+
console.log(socket)
|
|
267
|
+
console.log(connectParams)
|
|
268
|
+
// const { origin, port, path, signal, headers, servername } = connectParams
|
|
269
|
+
})
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
## `undici:request:pending-requests`
|
|
273
|
+
|
|
274
|
+
This message is published when the deduplicate interceptor's pending request map changes. This is useful for monitoring and debugging request deduplication behavior.
|
|
275
|
+
|
|
276
|
+
The deduplicate interceptor automatically deduplicates concurrent requests for the same resource. When multiple identical requests are made while one is already in-flight, only one request is sent to the origin server, and all waiting handlers receive the same response.
|
|
277
|
+
|
|
278
|
+
```js
|
|
279
|
+
import diagnosticsChannel from 'diagnostics_channel'
|
|
280
|
+
|
|
281
|
+
diagnosticsChannel.channel('undici:request:pending-requests').subscribe(({ type, size, key }) => {
|
|
282
|
+
console.log(type) // 'added' or 'removed'
|
|
283
|
+
console.log(size) // current number of pending requests
|
|
284
|
+
console.log(key) // the deduplication key for this request
|
|
285
|
+
})
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
### Event Properties
|
|
289
|
+
|
|
290
|
+
- `type` (`string`): Either `'added'` when a new pending request is registered, or `'removed'` when a pending request completes (successfully or with an error).
|
|
291
|
+
- `size` (`number`): The current number of pending requests after the change.
|
|
292
|
+
- `key` (`string`): The deduplication key for the request, composed of the origin, method, path, and request headers.
|
|
293
|
+
|
|
294
|
+
### Example: Monitoring Request Deduplication
|
|
295
|
+
|
|
296
|
+
```js
|
|
297
|
+
import diagnosticsChannel from 'diagnostics_channel'
|
|
298
|
+
|
|
299
|
+
const channel = diagnosticsChannel.channel('undici:request:pending-requests')
|
|
300
|
+
|
|
301
|
+
channel.subscribe(({ type, size, key }) => {
|
|
302
|
+
if (type === 'added') {
|
|
303
|
+
console.log(`New pending request: ${key} (${size} total pending)`)
|
|
304
|
+
} else {
|
|
305
|
+
console.log(`Request completed: ${key} (${size} remaining)`)
|
|
306
|
+
}
|
|
307
|
+
})
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
This can be useful for:
|
|
311
|
+
- Verifying that request deduplication is working as expected
|
|
312
|
+
- Monitoring the number of concurrent in-flight requests
|
|
313
|
+
- Debugging deduplication behavior in production environments
|
|
@@ -1043,6 +1043,7 @@ The `dns` interceptor enables you to cache DNS lookups for a given duration, per
|
|
|
1043
1043
|
- The function should return a single record from the records array.
|
|
1044
1044
|
- By default a simplified version of Round Robin is used.
|
|
1045
1045
|
- The `records` property can be mutated to store the state of the balancing algorithm.
|
|
1046
|
+
- `storage: DNSStorage` - Custom storage for resolved DNS records
|
|
1046
1047
|
|
|
1047
1048
|
> The `Dispatcher#options` also gets extended with the options `dns.affinity`, `dns.dualStack`, `dns.lookup` and `dns.pick` which can be used to configure the interceptor at a request-per-request basis.
|
|
1048
1049
|
|
|
@@ -1057,6 +1058,14 @@ It represents a map of DNS IP addresses records for a single origin.
|
|
|
1057
1058
|
- `4.ips` - (`DNSInterceptorRecord[] | null`) The IPv4 addresses.
|
|
1058
1059
|
- `6.ips` - (`DNSInterceptorRecord[] | null`) The IPv6 addresses.
|
|
1059
1060
|
|
|
1061
|
+
**DNSStorage**
|
|
1062
|
+
It represents a storage object for resolved DNS records.
|
|
1063
|
+
- `size` - (`number`) current size of the storage.
|
|
1064
|
+
- `get` - (`(origin: string) => DNSInterceptorOriginRecords | null`) method to get the records for a given origin.
|
|
1065
|
+
- `set` - (`(origin: string, records: DNSInterceptorOriginRecords | null, options: { ttl: number }) => void`) method to set the records for a given origin.
|
|
1066
|
+
- `delete` - (`(origin: string) => void`) method to delete records for a given origin.
|
|
1067
|
+
- `full` - (`() => boolean`) method to check if the storage is full, if returns `true`, DNS lookup will be skipped in this interceptor and new records will not be stored.
|
|
1068
|
+
|
|
1060
1069
|
**Example - Basic DNS Interceptor**
|
|
1061
1070
|
|
|
1062
1071
|
```js
|
|
@@ -1073,6 +1082,45 @@ const response = await client.request({
|
|
|
1073
1082
|
})
|
|
1074
1083
|
```
|
|
1075
1084
|
|
|
1085
|
+
**Example - DNS Interceptor and LRU cache as a storage**
|
|
1086
|
+
|
|
1087
|
+
```js
|
|
1088
|
+
const { Client, interceptors } = require("undici");
|
|
1089
|
+
const QuickLRU = require("quick-lru");
|
|
1090
|
+
const { dns } = interceptors;
|
|
1091
|
+
|
|
1092
|
+
const lru = new QuickLRU({ maxSize: 100 });
|
|
1093
|
+
|
|
1094
|
+
const lruAdapter = {
|
|
1095
|
+
get size() {
|
|
1096
|
+
return lru.size;
|
|
1097
|
+
},
|
|
1098
|
+
get(origin) {
|
|
1099
|
+
return lru.get(origin);
|
|
1100
|
+
},
|
|
1101
|
+
set(origin, records, { ttl }) {
|
|
1102
|
+
lru.set(origin, records, { maxAge: ttl });
|
|
1103
|
+
},
|
|
1104
|
+
delete(origin) {
|
|
1105
|
+
lru.delete(origin);
|
|
1106
|
+
},
|
|
1107
|
+
full() {
|
|
1108
|
+
// For LRU cache, we can always store new records,
|
|
1109
|
+
// old records will be evicted automatically
|
|
1110
|
+
return false;
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
const client = new Agent().compose([
|
|
1115
|
+
dns({ storage: lruAdapter })
|
|
1116
|
+
])
|
|
1117
|
+
|
|
1118
|
+
const response = await client.request({
|
|
1119
|
+
origin: `http://localhost:3030`,
|
|
1120
|
+
...requestOpts
|
|
1121
|
+
})
|
|
1122
|
+
```
|
|
1123
|
+
|
|
1076
1124
|
##### `responseError`
|
|
1077
1125
|
|
|
1078
1126
|
The `responseError` interceptor throws an error for responses with status code errors (>= 400).
|
|
@@ -1165,6 +1213,44 @@ The `cache` interceptor implements client-side response caching as described in
|
|
|
1165
1213
|
- `cacheByDefault` - The default expiration time to cache responses by if they don't have an explicit expiration and cannot have an heuristic expiry computed. If this isn't present, responses neither with an explicit expiration nor heuristically cacheable will not be cached. Default `undefined`.
|
|
1166
1214
|
- `type` - The [type of cache](https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/Caching#types_of_caches) for Undici to act as. Can be `shared` or `private`. Default `shared`. `private` implies privately cacheable responses will be cached and potentially shared with other users of your application.
|
|
1167
1215
|
|
|
1216
|
+
##### `Deduplicate Interceptor`
|
|
1217
|
+
|
|
1218
|
+
The `deduplicate` interceptor deduplicates concurrent identical requests. When multiple identical requests are made while one is already in-flight, only one request is sent to the origin server, and all waiting handlers receive the same response. This reduces server load and improves performance.
|
|
1219
|
+
|
|
1220
|
+
**Options**
|
|
1221
|
+
|
|
1222
|
+
- `methods` - The [**safe** HTTP methods](https://www.rfc-editor.org/rfc/rfc9110#section-9.2.1) to deduplicate. Default `['GET']`.
|
|
1223
|
+
- `skipHeaderNames` - Header names that, if present in a request, will cause the request to skip deduplication entirely. Useful for headers like `idempotency-key` where presence indicates unique processing. Header name matching is case-insensitive. Default `[]`.
|
|
1224
|
+
- `excludeHeaderNames` - Header names to exclude from the deduplication key. Requests with different values for these headers will still be deduplicated together. Useful for headers like `x-request-id` that vary per request but shouldn't affect deduplication. Header name matching is case-insensitive. Default `[]`.
|
|
1225
|
+
|
|
1226
|
+
**Usage**
|
|
1227
|
+
|
|
1228
|
+
```js
|
|
1229
|
+
const { Client, interceptors } = require("undici");
|
|
1230
|
+
const { deduplicate, cache } = interceptors;
|
|
1231
|
+
|
|
1232
|
+
// Deduplicate only
|
|
1233
|
+
const client = new Client("http://example.com").compose(
|
|
1234
|
+
deduplicate()
|
|
1235
|
+
);
|
|
1236
|
+
|
|
1237
|
+
// Deduplicate with caching
|
|
1238
|
+
const clientWithCache = new Client("http://example.com").compose(
|
|
1239
|
+
deduplicate(),
|
|
1240
|
+
cache()
|
|
1241
|
+
);
|
|
1242
|
+
```
|
|
1243
|
+
|
|
1244
|
+
Requests are considered identical if they have the same:
|
|
1245
|
+
- Origin
|
|
1246
|
+
- HTTP method
|
|
1247
|
+
- Path
|
|
1248
|
+
- Request headers (excluding any headers specified in `excludeHeaderNames`)
|
|
1249
|
+
|
|
1250
|
+
All deduplicated requests receive the complete response including status code, headers, and body.
|
|
1251
|
+
|
|
1252
|
+
For observability, request deduplication events are published to the `undici:request:pending-requests` [diagnostic channel](/docs/docs/api/DiagnosticsChannel.md#undicirequestpending-requests).
|
|
1253
|
+
|
|
1168
1254
|
## Instance Events
|
|
1169
1255
|
|
|
1170
1256
|
### Event: `'connect'`
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# Class: RoundRobinPool
|
|
2
|
+
|
|
3
|
+
Extends: `undici.Dispatcher`
|
|
4
|
+
|
|
5
|
+
A pool of [Client](/docs/docs/api/Client.md) instances connected to the same upstream target with round-robin client selection.
|
|
6
|
+
|
|
7
|
+
Unlike [`Pool`](/docs/docs/api/Pool.md), which always selects the first available client, `RoundRobinPool` cycles through clients in a round-robin fashion. This ensures even distribution of requests across all connections, which is particularly useful when the upstream target is behind a load balancer that round-robins TCP connections across multiple backend servers (e.g., Kubernetes Services).
|
|
8
|
+
|
|
9
|
+
Requests are not guaranteed to be dispatched in order of invocation.
|
|
10
|
+
|
|
11
|
+
## `new RoundRobinPool(url[, options])`
|
|
12
|
+
|
|
13
|
+
Arguments:
|
|
14
|
+
|
|
15
|
+
* **url** `URL | string` - It should only include the **protocol, hostname, and port**.
|
|
16
|
+
* **options** `RoundRobinPoolOptions` (optional)
|
|
17
|
+
|
|
18
|
+
### Parameter: `RoundRobinPoolOptions`
|
|
19
|
+
|
|
20
|
+
Extends: [`ClientOptions`](/docs/docs/api/Client.md#parameter-clientoptions)
|
|
21
|
+
|
|
22
|
+
* **factory** `(origin: URL, opts: Object) => Dispatcher` - Default: `(origin, opts) => new Client(origin, opts)`
|
|
23
|
+
* **connections** `number | null` (optional) - Default: `null` - The number of `Client` instances to create. When set to `null`, the `RoundRobinPool` instance will create an unlimited amount of `Client` instances.
|
|
24
|
+
* **clientTtl** `number | null` (optional) - Default: `null` - The amount of time before a `Client` instance is removed from the `RoundRobinPool` and closed. When set to `null`, `Client` instances will not be removed or closed based on age.
|
|
25
|
+
|
|
26
|
+
## Use Case
|
|
27
|
+
|
|
28
|
+
`RoundRobinPool` is designed for scenarios where:
|
|
29
|
+
|
|
30
|
+
1. You connect to a single origin (e.g., `http://my-service.namespace.svc`)
|
|
31
|
+
2. That origin is backed by a load balancer distributing TCP connections across multiple servers
|
|
32
|
+
3. You want requests evenly distributed across all backend servers
|
|
33
|
+
|
|
34
|
+
**Example**: In Kubernetes, when using a Service DNS name with multiple Pod replicas, kube-proxy load balances TCP connections. `RoundRobinPool` ensures each connection (and thus each Pod) receives an equal share of requests.
|
|
35
|
+
|
|
36
|
+
### Important: Backend Distribution Considerations
|
|
37
|
+
|
|
38
|
+
`RoundRobinPool` distributes **HTTP requests** evenly across **TCP connections**. Whether this translates to even backend server distribution depends on the load balancer's behavior:
|
|
39
|
+
|
|
40
|
+
**✓ Works when the load balancer**:
|
|
41
|
+
- Assigns different backends to different TCP connections from the same client
|
|
42
|
+
- Uses algorithms like: round-robin, random, least-connections (without client affinity)
|
|
43
|
+
- Example: Default Kubernetes Services without `sessionAffinity`
|
|
44
|
+
|
|
45
|
+
**✗ Does NOT work when**:
|
|
46
|
+
- Load balancer has client/source IP affinity (all connections from one IP → same backend)
|
|
47
|
+
- Load balancer uses source-IP-hash or sticky sessions
|
|
48
|
+
|
|
49
|
+
**How it works:**
|
|
50
|
+
1. `RoundRobinPool` creates N TCP connections to the load balancer endpoint
|
|
51
|
+
2. Load balancer assigns each TCP connection to a backend (per its algorithm)
|
|
52
|
+
3. `RoundRobinPool` cycles HTTP requests across those N connections
|
|
53
|
+
4. Result: Requests distributed proportionally to how the LB distributed the connections
|
|
54
|
+
|
|
55
|
+
If the load balancer assigns all connections to the same backend (e.g., due to session affinity), `RoundRobinPool` cannot overcome this. In such cases, consider using [`BalancedPool`](/docs/docs/api/BalancedPool.md) with direct backend addresses (e.g., individual pod IPs) instead of a load-balanced endpoint.
|
|
56
|
+
|
|
57
|
+
## Instance Properties
|
|
58
|
+
|
|
59
|
+
### `RoundRobinPool.closed`
|
|
60
|
+
|
|
61
|
+
Implements [Client.closed](/docs/docs/api/Client.md#clientclosed)
|
|
62
|
+
|
|
63
|
+
### `RoundRobinPool.destroyed`
|
|
64
|
+
|
|
65
|
+
Implements [Client.destroyed](/docs/docs/api/Client.md#clientdestroyed)
|
|
66
|
+
|
|
67
|
+
### `RoundRobinPool.stats`
|
|
68
|
+
|
|
69
|
+
Returns [`PoolStats`](PoolStats.md) instance for this pool.
|
|
70
|
+
|
|
71
|
+
## Instance Methods
|
|
72
|
+
|
|
73
|
+
### `RoundRobinPool.close([callback])`
|
|
74
|
+
|
|
75
|
+
Implements [`Dispatcher.close([callback])`](/docs/docs/api/Dispatcher.md#dispatcherclosecallback-promise).
|
|
76
|
+
|
|
77
|
+
### `RoundRobinPool.destroy([error, callback])`
|
|
78
|
+
|
|
79
|
+
Implements [`Dispatcher.destroy([error, callback])`](/docs/docs/api/Dispatcher.md#dispatcherdestroyerror-callback-promise).
|
|
80
|
+
|
|
81
|
+
### `RoundRobinPool.connect(options[, callback])`
|
|
82
|
+
|
|
83
|
+
See [`Dispatcher.connect(options[, callback])`](/docs/docs/api/Dispatcher.md#dispatcherconnectoptions-callback).
|
|
84
|
+
|
|
85
|
+
### `RoundRobinPool.dispatch(options, handler)`
|
|
86
|
+
|
|
87
|
+
Implements [`Dispatcher.dispatch(options, handler)`](/docs/docs/api/Dispatcher.md#dispatcherdispatchoptions-handler).
|
|
88
|
+
|
|
89
|
+
### `RoundRobinPool.pipeline(options, handler)`
|
|
90
|
+
|
|
91
|
+
See [`Dispatcher.pipeline(options, handler)`](/docs/docs/api/Dispatcher.md#dispatcherpipelineoptions-handler).
|
|
92
|
+
|
|
93
|
+
### `RoundRobinPool.request(options[, callback])`
|
|
94
|
+
|
|
95
|
+
See [`Dispatcher.request(options [, callback])`](/docs/docs/api/Dispatcher.md#dispatcherrequestoptions-callback).
|
|
96
|
+
|
|
97
|
+
### `RoundRobinPool.stream(options, factory[, callback])`
|
|
98
|
+
|
|
99
|
+
See [`Dispatcher.stream(options, factory[, callback])`](/docs/docs/api/Dispatcher.md#dispatcherstreamoptions-factory-callback).
|
|
100
|
+
|
|
101
|
+
### `RoundRobinPool.upgrade(options[, callback])`
|
|
102
|
+
|
|
103
|
+
See [`Dispatcher.upgrade(options[, callback])`](/docs/docs/api/Dispatcher.md#dispatcherupgradeoptions-callback).
|
|
104
|
+
|
|
105
|
+
## Instance Events
|
|
106
|
+
|
|
107
|
+
### Event: `'connect'`
|
|
108
|
+
|
|
109
|
+
See [Dispatcher Event: `'connect'`](/docs/docs/api/Dispatcher.md#event-connect).
|
|
110
|
+
|
|
111
|
+
### Event: `'disconnect'`
|
|
112
|
+
|
|
113
|
+
See [Dispatcher Event: `'disconnect'`](/docs/docs/api/Dispatcher.md#event-disconnect).
|
|
114
|
+
|
|
115
|
+
### Event: `'drain'`
|
|
116
|
+
|
|
117
|
+
See [Dispatcher Event: `'drain'`](/docs/docs/api/Dispatcher.md#event-drain).
|
|
118
|
+
|
|
119
|
+
## Example
|
|
120
|
+
|
|
121
|
+
```javascript
|
|
122
|
+
import { RoundRobinPool } from 'undici'
|
|
123
|
+
|
|
124
|
+
const pool = new RoundRobinPool('http://my-service.default.svc.cluster.local', {
|
|
125
|
+
connections: 10
|
|
126
|
+
})
|
|
127
|
+
|
|
128
|
+
// Requests will be distributed evenly across all 10 connections
|
|
129
|
+
for (let i = 0; i < 100; i++) {
|
|
130
|
+
const { body } = await pool.request({
|
|
131
|
+
path: '/api/data',
|
|
132
|
+
method: 'GET'
|
|
133
|
+
})
|
|
134
|
+
console.log(await body.json())
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
await pool.close()
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## See Also
|
|
141
|
+
|
|
142
|
+
- [Pool](/docs/docs/api/Pool.md) - Connection pool without round-robin
|
|
143
|
+
- [BalancedPool](/docs/docs/api/BalancedPool.md) - Load balancing across multiple origins
|
|
144
|
+
- [Issue #3648](https://github.com/nodejs/undici/issues/3648) - Original issue describing uneven distribution
|
|
145
|
+
|
|
@@ -34,6 +34,27 @@ import { WebSocket } from 'undici'
|
|
|
34
34
|
const ws = new WebSocket('wss://echo.websocket.events', ['echo', 'chat'])
|
|
35
35
|
```
|
|
36
36
|
|
|
37
|
+
### Example with HTTP/2:
|
|
38
|
+
|
|
39
|
+
> ⚠️ Warning: WebSocket over HTTP/2 is experimental, it is likely to change in the future.
|
|
40
|
+
|
|
41
|
+
> 🗒️ Note: WebSocket over HTTP/2 may be enabled by default in a future version,
|
|
42
|
+
> this will happen by enabling HTTP/2 connections as the default behavior of Undici's Agent as well the global dispatcher.
|
|
43
|
+
> Stay tuned to the changelog for more information.
|
|
44
|
+
|
|
45
|
+
This example will not work in browsers or other platforms that don't allow passing an object.
|
|
46
|
+
|
|
47
|
+
```mjs
|
|
48
|
+
import { Agent } from 'undici'
|
|
49
|
+
|
|
50
|
+
const agent = new Agent({ allowH2: true })
|
|
51
|
+
|
|
52
|
+
const ws = new WebSocket('wss://echo.websocket.events', {
|
|
53
|
+
dispatcher: agent,
|
|
54
|
+
protocols: ['echo', 'chat']
|
|
55
|
+
})
|
|
56
|
+
```
|
|
57
|
+
|
|
37
58
|
# Class: WebSocketStream
|
|
38
59
|
|
|
39
60
|
> ⚠️ Warning: the WebSocketStream API has not been finalized and is likely to change.
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Crawling
|
|
2
|
+
|
|
3
|
+
[RFC 9309](https://datatracker.ietf.org/doc/html/rfc9309) defines crawlers as automated clients.
|
|
4
|
+
|
|
5
|
+
Some web servers may reject requests that omit the `User-Agent` header or that use common defaults such as `'curl/7.79.1'`.
|
|
6
|
+
|
|
7
|
+
In **undici**, the default user agent is `'undici'`. Since undici is integrated into Node.js core as the implementation of `fetch()`, requests made via `fetch()` use `'node'` as the default user agent.
|
|
8
|
+
|
|
9
|
+
It is recommended to specify a **custom `User-Agent` header** when implementing crawlers. Providing a descriptive user agent allows servers to correctly identify the client and reduces the likelihood of requests being denied.
|
|
10
|
+
|
|
11
|
+
A user agent string should include sufficient detail to identify the crawler and provide contact information. For example:
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
AcmeCo Crawler - acme.co - contact@acme.co
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
When adding contact details, avoid using personal identifiers such as your own name or a private email address—especially in a professional or employment context. Instead, use a role-based or organizational contact (e.g., crawler-team@company.com) to protect individual privacy while still enabling communication.
|
|
18
|
+
|
|
19
|
+
If a crawler behaves unexpectedly—for example, due to misconfiguration or implementation errors—server administrators can use the information in the user agent to contact the operator and coordinate an appropriate resolution.
|
|
20
|
+
|
|
21
|
+
The `User-Agent` header can be set on individual requests or applied globally by configuring a custom dispatcher.
|
|
22
|
+
|
|
23
|
+
**Example: setting a `User-Agent` per request**
|
|
24
|
+
|
|
25
|
+
```js
|
|
26
|
+
import { fetch } from 'undici'
|
|
27
|
+
|
|
28
|
+
const headers = {
|
|
29
|
+
'User-Agent': 'AcmeCo Crawler - acme.co - contact@acme.co'
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const res = await fetch('https://example.com', { headers })
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Best Practices for Crawlers
|
|
36
|
+
|
|
37
|
+
When developing a crawler, the following practices are recommended in addition to setting a descriptive `User-Agent` header:
|
|
38
|
+
|
|
39
|
+
* **Respect `robots.txt`**
|
|
40
|
+
Follow the directives defined in the target site’s `robots.txt` file, including disallowed paths and optional crawl-delay settings (see [W3C guidelines](https://www.w3.org/wiki/Write_Web_Crawler)).
|
|
41
|
+
|
|
42
|
+
* **Rate limiting**
|
|
43
|
+
Regulate request frequency to avoid imposing excessive load on servers. Introduce delays between requests or limit the number of concurrent requests. The W3C suggests at least one second between requests.
|
|
44
|
+
|
|
45
|
+
* **Error handling**
|
|
46
|
+
Implement retry logic with exponential backoff for transient failures, and stop requests when persistent errors occur (e.g., HTTP 403 or 429).
|
|
47
|
+
|
|
48
|
+
* **Monitoring and logging**
|
|
49
|
+
Track request volume, response codes, and error rates to detect misbehavior and address issues proactively.
|
|
50
|
+
|
|
51
|
+
* **Contact information**
|
|
52
|
+
Always include valid and current contact details in the `User-Agent` string so that administrators can reach the crawler operator if necessary.
|
|
53
|
+
|
|
54
|
+
## References and Further Reading
|
|
55
|
+
|
|
56
|
+
* [RFC 9309: The Robots Exclusion Protocol](https://datatracker.ietf.org/doc/html/rfc9309)
|
|
57
|
+
* [W3C Wiki: Write Web Crawler](https://www.w3.org/wiki/Write_Web_Crawler)
|
|
58
|
+
* [Ethical Web Crawling (WWW 2010 Conference Paper)](https://archives.iw3c2.org/www2010/proceedings/www/p1101.pdf)
|
package/index.js
CHANGED
|
@@ -4,6 +4,7 @@ const Client = require('./lib/dispatcher/client')
|
|
|
4
4
|
const Dispatcher = require('./lib/dispatcher/dispatcher')
|
|
5
5
|
const Pool = require('./lib/dispatcher/pool')
|
|
6
6
|
const BalancedPool = require('./lib/dispatcher/balanced-pool')
|
|
7
|
+
const RoundRobinPool = require('./lib/dispatcher/round-robin-pool')
|
|
7
8
|
const Agent = require('./lib/dispatcher/agent')
|
|
8
9
|
const ProxyAgent = require('./lib/dispatcher/proxy-agent')
|
|
9
10
|
const EnvHttpProxyAgent = require('./lib/dispatcher/env-http-proxy-agent')
|
|
@@ -31,6 +32,7 @@ module.exports.Dispatcher = Dispatcher
|
|
|
31
32
|
module.exports.Client = Client
|
|
32
33
|
module.exports.Pool = Pool
|
|
33
34
|
module.exports.BalancedPool = BalancedPool
|
|
35
|
+
module.exports.RoundRobinPool = RoundRobinPool
|
|
34
36
|
module.exports.Agent = Agent
|
|
35
37
|
module.exports.ProxyAgent = ProxyAgent
|
|
36
38
|
module.exports.EnvHttpProxyAgent = EnvHttpProxyAgent
|
|
@@ -47,7 +49,8 @@ module.exports.interceptors = {
|
|
|
47
49
|
dump: require('./lib/interceptor/dump'),
|
|
48
50
|
dns: require('./lib/interceptor/dns'),
|
|
49
51
|
cache: require('./lib/interceptor/cache'),
|
|
50
|
-
decompress: require('./lib/interceptor/decompress')
|
|
52
|
+
decompress: require('./lib/interceptor/decompress'),
|
|
53
|
+
deduplicate: require('./lib/interceptor/deduplicate')
|
|
51
54
|
}
|
|
52
55
|
|
|
53
56
|
module.exports.cacheStores = {
|
package/lib/api/api-upgrade.js
CHANGED
|
@@ -4,6 +4,7 @@ const { InvalidArgumentError, SocketError } = require('../core/errors')
|
|
|
4
4
|
const { AsyncResource } = require('node:async_hooks')
|
|
5
5
|
const assert = require('node:assert')
|
|
6
6
|
const util = require('../core/util')
|
|
7
|
+
const { kHTTP2Stream } = require('../core/symbols')
|
|
7
8
|
const { addSignal, removeSignal } = require('./abort-signal')
|
|
8
9
|
|
|
9
10
|
class UpgradeHandler extends AsyncResource {
|
|
@@ -50,7 +51,7 @@ class UpgradeHandler extends AsyncResource {
|
|
|
50
51
|
}
|
|
51
52
|
|
|
52
53
|
onUpgrade (statusCode, rawHeaders, socket) {
|
|
53
|
-
assert(statusCode === 101)
|
|
54
|
+
assert(socket[kHTTP2Stream] === true ? statusCode === 200 : statusCode === 101)
|
|
54
55
|
|
|
55
56
|
const { callback, opaque, context } = this
|
|
56
57
|
|
package/lib/core/connect.js
CHANGED
|
@@ -43,7 +43,7 @@ const SessionCache = class WeakSessionCache {
|
|
|
43
43
|
}
|
|
44
44
|
}
|
|
45
45
|
|
|
46
|
-
function buildConnector ({ allowH2, maxCachedSessions, socketPath, timeout, session: customSession, ...opts }) {
|
|
46
|
+
function buildConnector ({ allowH2, useH2c, maxCachedSessions, socketPath, timeout, session: customSession, ...opts }) {
|
|
47
47
|
if (maxCachedSessions != null && (!Number.isInteger(maxCachedSessions) || maxCachedSessions < 0)) {
|
|
48
48
|
throw new InvalidArgumentError('maxCachedSessions must be a positive integer or zero')
|
|
49
49
|
}
|
|
@@ -96,6 +96,9 @@ function buildConnector ({ allowH2, maxCachedSessions, socketPath, timeout, sess
|
|
|
96
96
|
port,
|
|
97
97
|
host: hostname
|
|
98
98
|
})
|
|
99
|
+
if (useH2c === true) {
|
|
100
|
+
socket.alpnProtocol = 'h2'
|
|
101
|
+
}
|
|
99
102
|
}
|
|
100
103
|
|
|
101
104
|
// Set TCP keep alive options on the socket here instead of in connect() for the case of assigning the socket
|
package/lib/core/diagnostics.js
CHANGED
|
@@ -26,7 +26,9 @@ const channels = {
|
|
|
26
26
|
close: diagnosticsChannel.channel('undici:websocket:close'),
|
|
27
27
|
socketError: diagnosticsChannel.channel('undici:websocket:socket_error'),
|
|
28
28
|
ping: diagnosticsChannel.channel('undici:websocket:ping'),
|
|
29
|
-
pong: diagnosticsChannel.channel('undici:websocket:pong')
|
|
29
|
+
pong: diagnosticsChannel.channel('undici:websocket:pong'),
|
|
30
|
+
// ProxyAgent
|
|
31
|
+
proxyConnected: diagnosticsChannel.channel('undici:proxy:connected')
|
|
30
32
|
}
|
|
31
33
|
|
|
32
34
|
let isTrackingClientEvents = false
|
|
@@ -36,6 +38,14 @@ function trackClientEvents (debugLog = undiciDebugLog) {
|
|
|
36
38
|
return
|
|
37
39
|
}
|
|
38
40
|
|
|
41
|
+
// Check if any of the channels already have subscribers to prevent duplicate subscriptions
|
|
42
|
+
// This can happen when both Node.js built-in undici and undici as a dependency are present
|
|
43
|
+
if (channels.beforeConnect.hasSubscribers || channels.connected.hasSubscribers ||
|
|
44
|
+
channels.connectError.hasSubscribers || channels.sendHeaders.hasSubscribers) {
|
|
45
|
+
isTrackingClientEvents = true
|
|
46
|
+
return
|
|
47
|
+
}
|
|
48
|
+
|
|
39
49
|
isTrackingClientEvents = true
|
|
40
50
|
|
|
41
51
|
diagnosticsChannel.subscribe('undici:client:beforeConnect',
|
|
@@ -98,6 +108,14 @@ function trackRequestEvents (debugLog = undiciDebugLog) {
|
|
|
98
108
|
return
|
|
99
109
|
}
|
|
100
110
|
|
|
111
|
+
// Check if any of the channels already have subscribers to prevent duplicate subscriptions
|
|
112
|
+
// This can happen when both Node.js built-in undici and undici as a dependency are present
|
|
113
|
+
if (channels.headers.hasSubscribers || channels.trailers.hasSubscribers ||
|
|
114
|
+
channels.error.hasSubscribers) {
|
|
115
|
+
isTrackingRequestEvents = true
|
|
116
|
+
return
|
|
117
|
+
}
|
|
118
|
+
|
|
101
119
|
isTrackingRequestEvents = true
|
|
102
120
|
|
|
103
121
|
diagnosticsChannel.subscribe('undici:request:headers',
|
|
@@ -146,6 +164,15 @@ function trackWebSocketEvents (debugLog = websocketDebuglog) {
|
|
|
146
164
|
return
|
|
147
165
|
}
|
|
148
166
|
|
|
167
|
+
// Check if any of the channels already have subscribers to prevent duplicate subscriptions
|
|
168
|
+
// This can happen when both Node.js built-in undici and undici as a dependency are present
|
|
169
|
+
if (channels.open.hasSubscribers || channels.close.hasSubscribers ||
|
|
170
|
+
channels.socketError.hasSubscribers || channels.ping.hasSubscribers ||
|
|
171
|
+
channels.pong.hasSubscribers) {
|
|
172
|
+
isTrackingWebSocketEvents = true
|
|
173
|
+
return
|
|
174
|
+
}
|
|
175
|
+
|
|
149
176
|
isTrackingWebSocketEvents = true
|
|
150
177
|
|
|
151
178
|
diagnosticsChannel.subscribe('undici:websocket:open',
|
package/lib/core/symbols.js
CHANGED
|
@@ -62,6 +62,9 @@ module.exports = {
|
|
|
62
62
|
kListeners: Symbol('listeners'),
|
|
63
63
|
kHTTPContext: Symbol('http context'),
|
|
64
64
|
kMaxConcurrentStreams: Symbol('max concurrent streams'),
|
|
65
|
+
kEnableConnectProtocol: Symbol('http2session connect protocol'),
|
|
66
|
+
kRemoteSettings: Symbol('http2session remote settings'),
|
|
67
|
+
kHTTP2Stream: Symbol('http2session client stream'),
|
|
65
68
|
kNoProxyAgent: Symbol('no proxy agent'),
|
|
66
69
|
kHttpProxyAgent: Symbol('http proxy agent'),
|
|
67
70
|
kHttpsProxyAgent: Symbol('https proxy agent')
|