undici 7.15.0 → 7.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +48 -2
  2. package/docs/docs/api/Agent.md +1 -0
  3. package/docs/docs/api/Client.md +1 -0
  4. package/docs/docs/api/DiagnosticsChannel.md +57 -0
  5. package/docs/docs/api/Dispatcher.md +86 -0
  6. package/docs/docs/api/Errors.md +0 -1
  7. package/docs/docs/api/RoundRobinPool.md +145 -0
  8. package/docs/docs/api/WebSocket.md +21 -0
  9. package/docs/docs/best-practices/crawling.md +58 -0
  10. package/index-fetch.js +2 -2
  11. package/index.js +8 -9
  12. package/lib/api/api-request.js +22 -8
  13. package/lib/api/api-upgrade.js +2 -1
  14. package/lib/api/readable.js +7 -5
  15. package/lib/core/connect.js +4 -1
  16. package/lib/core/diagnostics.js +28 -1
  17. package/lib/core/errors.js +217 -13
  18. package/lib/core/request.js +5 -1
  19. package/lib/core/symbols.js +3 -0
  20. package/lib/core/util.js +61 -41
  21. package/lib/dispatcher/agent.js +19 -7
  22. package/lib/dispatcher/balanced-pool.js +10 -0
  23. package/lib/dispatcher/client-h1.js +18 -23
  24. package/lib/dispatcher/client-h2.js +166 -26
  25. package/lib/dispatcher/client.js +64 -59
  26. package/lib/dispatcher/dispatcher-base.js +20 -16
  27. package/lib/dispatcher/env-http-proxy-agent.js +12 -16
  28. package/lib/dispatcher/fixed-queue.js +15 -39
  29. package/lib/dispatcher/h2c-client.js +7 -78
  30. package/lib/dispatcher/pool-base.js +60 -43
  31. package/lib/dispatcher/pool.js +2 -2
  32. package/lib/dispatcher/proxy-agent.js +27 -11
  33. package/lib/dispatcher/round-robin-pool.js +137 -0
  34. package/lib/encoding/index.js +33 -0
  35. package/lib/global.js +19 -1
  36. package/lib/handler/cache-handler.js +84 -27
  37. package/lib/handler/deduplication-handler.js +216 -0
  38. package/lib/handler/retry-handler.js +0 -2
  39. package/lib/interceptor/cache.js +94 -15
  40. package/lib/interceptor/decompress.js +2 -1
  41. package/lib/interceptor/deduplicate.js +109 -0
  42. package/lib/interceptor/dns.js +55 -13
  43. package/lib/mock/mock-agent.js +4 -4
  44. package/lib/mock/mock-errors.js +10 -0
  45. package/lib/mock/mock-utils.js +13 -12
  46. package/lib/mock/snapshot-agent.js +11 -5
  47. package/lib/mock/snapshot-recorder.js +12 -4
  48. package/lib/mock/snapshot-utils.js +4 -4
  49. package/lib/util/cache.js +29 -1
  50. package/lib/util/date.js +534 -140
  51. package/lib/util/runtime-features.js +124 -0
  52. package/lib/web/cookies/index.js +1 -1
  53. package/lib/web/cookies/parse.js +1 -1
  54. package/lib/web/eventsource/eventsource-stream.js +2 -2
  55. package/lib/web/eventsource/eventsource.js +34 -29
  56. package/lib/web/eventsource/util.js +1 -9
  57. package/lib/web/fetch/body.js +45 -61
  58. package/lib/web/fetch/data-url.js +12 -160
  59. package/lib/web/fetch/formdata-parser.js +204 -127
  60. package/lib/web/fetch/index.js +21 -19
  61. package/lib/web/fetch/request.js +6 -0
  62. package/lib/web/fetch/response.js +4 -7
  63. package/lib/web/fetch/util.js +10 -79
  64. package/lib/web/infra/index.js +229 -0
  65. package/lib/web/subresource-integrity/subresource-integrity.js +6 -5
  66. package/lib/web/webidl/index.js +207 -44
  67. package/lib/web/websocket/connection.js +33 -22
  68. package/lib/web/websocket/events.js +1 -1
  69. package/lib/web/websocket/frame.js +9 -15
  70. package/lib/web/websocket/stream/websocketerror.js +22 -1
  71. package/lib/web/websocket/stream/websocketstream.js +17 -8
  72. package/lib/web/websocket/util.js +2 -1
  73. package/lib/web/websocket/websocket.js +32 -42
  74. package/package.json +9 -7
  75. package/types/agent.d.ts +2 -1
  76. package/types/api.d.ts +2 -2
  77. package/types/balanced-pool.d.ts +2 -1
  78. package/types/cache-interceptor.d.ts +1 -0
  79. package/types/client.d.ts +1 -1
  80. package/types/connector.d.ts +2 -2
  81. package/types/diagnostics-channel.d.ts +2 -2
  82. package/types/dispatcher.d.ts +12 -12
  83. package/types/errors.d.ts +5 -15
  84. package/types/fetch.d.ts +4 -4
  85. package/types/formdata.d.ts +1 -1
  86. package/types/h2c-client.d.ts +1 -1
  87. package/types/index.d.ts +9 -1
  88. package/types/interceptors.d.ts +36 -2
  89. package/types/pool.d.ts +1 -1
  90. package/types/readable.d.ts +2 -2
  91. package/types/round-robin-pool.d.ts +41 -0
  92. package/types/webidl.d.ts +82 -21
  93. package/types/websocket.d.ts +9 -9
@@ -1,19 +1,20 @@
1
1
  'use strict'
2
2
 
3
3
  const assert = require('node:assert')
4
+ const { forgivingBase64, collectASequenceOfCodePoints, collectASequenceOfCodePointsFast, isomorphicDecode, removeASCIIWhitespace, removeChars } = require('../infra')
4
5
 
5
6
  const encoder = new TextEncoder()
6
7
 
7
8
  /**
8
9
  * @see https://mimesniff.spec.whatwg.org/#http-token-code-point
9
10
  */
10
- const HTTP_TOKEN_CODEPOINTS = /^[!#$%&'*+\-.^_|~A-Za-z0-9]+$/
11
- const HTTP_WHITESPACE_REGEX = /[\u000A\u000D\u0009\u0020]/ // eslint-disable-line
12
- const ASCII_WHITESPACE_REPLACE_REGEX = /[\u0009\u000A\u000C\u000D\u0020]/g // eslint-disable-line
11
+ const HTTP_TOKEN_CODEPOINTS = /^[-!#$%&'*+.^_|~A-Za-z0-9]+$/u
12
+ const HTTP_WHITESPACE_REGEX = /[\u000A\u000D\u0009\u0020]/u // eslint-disable-line
13
+
13
14
  /**
14
15
  * @see https://mimesniff.spec.whatwg.org/#http-quoted-string-token-code-point
15
16
  */
16
- const HTTP_QUOTED_STRING_TOKENS = /^[\u0009\u0020-\u007E\u0080-\u00FF]+$/ // eslint-disable-line
17
+ const HTTP_QUOTED_STRING_TOKENS = /^[\u0009\u0020-\u007E\u0080-\u00FF]+$/u // eslint-disable-line
17
18
 
18
19
  // https://fetch.spec.whatwg.org/#data-url-processor
19
20
  /** @param {URL} dataURL */
@@ -68,7 +69,7 @@ function dataURLProcessor (dataURL) {
68
69
  // 11. If mimeType ends with U+003B (;), followed by
69
70
  // zero or more U+0020 SPACE, followed by an ASCII
70
71
  // case-insensitive match for "base64", then:
71
- if (/;(\u0020){0,}base64$/i.test(mimeType)) {
72
+ if (/;(?:\u0020*)base64$/ui.test(mimeType)) {
72
73
  // 1. Let stringBody be the isomorphic decode of body.
73
74
  const stringBody = isomorphicDecode(body)
74
75
 
@@ -86,7 +87,7 @@ function dataURLProcessor (dataURL) {
86
87
 
87
88
  // 5. Remove trailing U+0020 SPACE code points from mimeType,
88
89
  // if any.
89
- mimeType = mimeType.replace(/(\u0020)+$/, '')
90
+ mimeType = mimeType.replace(/(\u0020+)$/u, '')
90
91
 
91
92
  // 6. Remove the last U+003B (;) code point from mimeType.
92
93
  mimeType = mimeType.slice(0, -1)
@@ -136,49 +137,6 @@ function URLSerializer (url, excludeFragment = false) {
136
137
  return serialized
137
138
  }
138
139
 
139
- // https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points
140
- /**
141
- * @param {(char: string) => boolean} condition
142
- * @param {string} input
143
- * @param {{ position: number }} position
144
- */
145
- function collectASequenceOfCodePoints (condition, input, position) {
146
- // 1. Let result be the empty string.
147
- let result = ''
148
-
149
- // 2. While position doesn’t point past the end of input and the
150
- // code point at position within input meets the condition condition:
151
- while (position.position < input.length && condition(input[position.position])) {
152
- // 1. Append that code point to the end of result.
153
- result += input[position.position]
154
-
155
- // 2. Advance position by 1.
156
- position.position++
157
- }
158
-
159
- // 3. Return result.
160
- return result
161
- }
162
-
163
- /**
164
- * A faster collectASequenceOfCodePoints that only works when comparing a single character.
165
- * @param {string} char
166
- * @param {string} input
167
- * @param {{ position: number }} position
168
- */
169
- function collectASequenceOfCodePointsFast (char, input, position) {
170
- const idx = input.indexOf(char, position.position)
171
- const start = position.position
172
-
173
- if (idx === -1) {
174
- position.position = input.length
175
- return input.slice(start)
176
- }
177
-
178
- position.position = idx
179
- return input.slice(start, position.position)
180
- }
181
-
182
140
  // https://url.spec.whatwg.org/#string-percent-decode
183
141
  /** @param {string} input */
184
142
  function stringPercentDecode (input) {
@@ -219,8 +177,9 @@ function percentDecode (input) {
219
177
  /** @type {Uint8Array} */
220
178
  const output = new Uint8Array(length)
221
179
  let j = 0
180
+ let i = 0
222
181
  // 2. For each byte byte in input:
223
- for (let i = 0; i < length; ++i) {
182
+ while (i < length) {
224
183
  const byte = input[i]
225
184
 
226
185
  // 1. If byte is not 0x25 (%), then append byte to output.
@@ -248,6 +207,7 @@ function percentDecode (input) {
248
207
  // 3. Skip the next two bytes in input.
249
208
  i += 2
250
209
  }
210
+ ++i
251
211
  }
252
212
 
253
213
  // 3. Return output.
@@ -427,45 +387,6 @@ function parseMIMEType (input) {
427
387
  return mimeType
428
388
  }
429
389
 
430
- // https://infra.spec.whatwg.org/#forgiving-base64-decode
431
- /** @param {string} data */
432
- function forgivingBase64 (data) {
433
- // 1. Remove all ASCII whitespace from data.
434
- data = data.replace(ASCII_WHITESPACE_REPLACE_REGEX, '')
435
-
436
- let dataLength = data.length
437
- // 2. If data’s code point length divides by 4 leaving
438
- // no remainder, then:
439
- if (dataLength % 4 === 0) {
440
- // 1. If data ends with one or two U+003D (=) code points,
441
- // then remove them from data.
442
- if (data.charCodeAt(dataLength - 1) === 0x003D) {
443
- --dataLength
444
- if (data.charCodeAt(dataLength - 1) === 0x003D) {
445
- --dataLength
446
- }
447
- }
448
- }
449
-
450
- // 3. If data’s code point length divides by 4 leaving
451
- // a remainder of 1, then return failure.
452
- if (dataLength % 4 === 1) {
453
- return 'failure'
454
- }
455
-
456
- // 4. If data contains a code point that is not one of
457
- // U+002B (+)
458
- // U+002F (/)
459
- // ASCII alphanumeric
460
- // then return failure.
461
- if (/[^+/0-9A-Za-z]/.test(data.length === dataLength ? data : data.substring(0, dataLength))) {
462
- return 'failure'
463
- }
464
-
465
- const buffer = Buffer.from(data, 'base64')
466
- return new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.byteLength)
467
- }
468
-
469
390
  // https://fetch.spec.whatwg.org/#collect-an-http-quoted-string
470
391
  // tests: https://fetch.spec.whatwg.org/#example-http-quoted-string
471
392
  /**
@@ -572,7 +493,7 @@ function serializeAMimeType (mimeType) {
572
493
  if (!HTTP_TOKEN_CODEPOINTS.test(value)) {
573
494
  // 1. Precede each occurrence of U+0022 (") or
574
495
  // U+005C (\) in value with U+005C (\).
575
- value = value.replace(/(\\|")/g, '\\$1')
496
+ value = value.replace(/[\\"]/ug, '\\$&')
576
497
 
577
498
  // 2. Prepend U+0022 (") to value.
578
499
  value = '"' + value
@@ -608,71 +529,6 @@ function removeHTTPWhitespace (str, leading = true, trailing = true) {
608
529
  return removeChars(str, leading, trailing, isHTTPWhiteSpace)
609
530
  }
610
531
 
611
- /**
612
- * @see https://infra.spec.whatwg.org/#ascii-whitespace
613
- * @param {number} char
614
- */
615
- function isASCIIWhitespace (char) {
616
- // "\r\n\t\f "
617
- return char === 0x00d || char === 0x00a || char === 0x009 || char === 0x00c || char === 0x020
618
- }
619
-
620
- /**
621
- * @see https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace
622
- * @param {string} str
623
- * @param {boolean} [leading=true]
624
- * @param {boolean} [trailing=true]
625
- */
626
- function removeASCIIWhitespace (str, leading = true, trailing = true) {
627
- return removeChars(str, leading, trailing, isASCIIWhitespace)
628
- }
629
-
630
- /**
631
- * @param {string} str
632
- * @param {boolean} leading
633
- * @param {boolean} trailing
634
- * @param {(charCode: number) => boolean} predicate
635
- * @returns
636
- */
637
- function removeChars (str, leading, trailing, predicate) {
638
- let lead = 0
639
- let trail = str.length - 1
640
-
641
- if (leading) {
642
- while (lead < str.length && predicate(str.charCodeAt(lead))) lead++
643
- }
644
-
645
- if (trailing) {
646
- while (trail > 0 && predicate(str.charCodeAt(trail))) trail--
647
- }
648
-
649
- return lead === 0 && trail === str.length - 1 ? str : str.slice(lead, trail + 1)
650
- }
651
-
652
- /**
653
- * @see https://infra.spec.whatwg.org/#isomorphic-decode
654
- * @param {Uint8Array} input
655
- * @returns {string}
656
- */
657
- function isomorphicDecode (input) {
658
- // 1. To isomorphic decode a byte sequence input, return a string whose code point
659
- // length is equal to input’s length and whose code points have the same values
660
- // as the values of input’s bytes, in the same order.
661
- const length = input.length
662
- if ((2 << 15) - 1 > length) {
663
- return String.fromCharCode.apply(null, input)
664
- }
665
- let result = ''; let i = 0
666
- let addition = (2 << 15) - 1
667
- while (i < length) {
668
- if (i + addition > length) {
669
- addition = length - i
670
- }
671
- result += String.fromCharCode.apply(null, input.subarray(i, i += addition))
672
- }
673
- return result
674
- }
675
-
676
532
  /**
677
533
  * @see https://mimesniff.spec.whatwg.org/#minimize-a-supported-mime-type
678
534
  * @param {Exclude<ReturnType<typeof parseMIMEType>, 'failure'>} mimeType
@@ -730,15 +586,11 @@ function minimizeSupportedMimeType (mimeType) {
730
586
  module.exports = {
731
587
  dataURLProcessor,
732
588
  URLSerializer,
733
- collectASequenceOfCodePoints,
734
- collectASequenceOfCodePointsFast,
735
589
  stringPercentDecode,
736
590
  parseMIMEType,
737
591
  collectAnHTTPQuotedString,
738
592
  serializeAMimeType,
739
- removeChars,
740
593
  removeHTTPWhitespace,
741
594
  minimizeSupportedMimeType,
742
- HTTP_TOKEN_CODEPOINTS,
743
- isomorphicDecode
595
+ HTTP_TOKEN_CODEPOINTS
744
596
  }
@@ -1,16 +1,15 @@
1
1
  'use strict'
2
2
 
3
3
  const { bufferToLowerCasedHeaderName } = require('../../core/util')
4
- const { utf8DecodeBytes } = require('./util')
5
- const { HTTP_TOKEN_CODEPOINTS, isomorphicDecode } = require('./data-url')
4
+ const { HTTP_TOKEN_CODEPOINTS } = require('./data-url')
6
5
  const { makeEntry } = require('./formdata')
7
6
  const { webidl } = require('../webidl')
8
7
  const assert = require('node:assert')
8
+ const { isomorphicDecode } = require('../infra')
9
+ const { utf8DecodeBytes } = require('../../encoding')
9
10
 
10
- const formDataNameBuffer = Buffer.from('form-data; name="')
11
- const filenameBuffer = Buffer.from('filename')
12
11
  const dd = Buffer.from('--')
13
- const ddcrlf = Buffer.from('--\r\n')
12
+ const decoder = new TextDecoder()
14
13
 
15
14
  /**
16
15
  * @param {string} chars
@@ -84,20 +83,16 @@ function multipartFormDataParser (input, mimeType) {
84
83
  // the first byte.
85
84
  const position = { position: 0 }
86
85
 
87
- // Note: undici addition, allows leading and trailing CRLFs.
88
- while (input[position.position] === 0x0d && input[position.position + 1] === 0x0a) {
89
- position.position += 2
90
- }
91
-
92
- let trailing = input.length
86
+ // Note: Per RFC 2046 Section 5.1.1, we must ignore anything before the
87
+ // first boundary delimiter line (preamble). Search for the first boundary.
88
+ const firstBoundaryIndex = input.indexOf(boundary)
93
89
 
94
- while (input[trailing - 1] === 0x0a && input[trailing - 2] === 0x0d) {
95
- trailing -= 2
90
+ if (firstBoundaryIndex === -1) {
91
+ throw parsingError('no boundary found in multipart body')
96
92
  }
97
93
 
98
- if (trailing !== input.length) {
99
- input = input.subarray(0, trailing)
100
- }
94
+ // Start parsing from the first boundary, ignoring any preamble
95
+ position.position = firstBoundaryIndex
101
96
 
102
97
  // 5. While true:
103
98
  while (true) {
@@ -113,11 +108,11 @@ function multipartFormDataParser (input, mimeType) {
113
108
 
114
109
  // 5.2. If position points to the sequence of bytes 0x2D 0x2D 0x0D 0x0A
115
110
  // (`--` followed by CR LF) followed by the end of input, return entry list.
116
- // Note: a body does NOT need to end with CRLF. It can end with --.
117
- if (
118
- (position.position === input.length - 2 && bufferStartsWith(input, dd, position)) ||
119
- (position.position === input.length - 4 && bufferStartsWith(input, ddcrlf, position))
120
- ) {
111
+ // Note: Per RFC 2046 Section 5.1.1, we must ignore anything after the
112
+ // final boundary delimiter (epilogue). Check for -- or --CRLF and return
113
+ // regardless of what follows.
114
+ if (bufferStartsWith(input, dd, position)) {
115
+ // Found closing boundary delimiter (--), ignore any epilogue
121
116
  return entryList
122
117
  }
123
118
 
@@ -205,6 +200,113 @@ function multipartFormDataParser (input, mimeType) {
205
200
  }
206
201
  }
207
202
 
203
+ /**
204
+ * Parses content-disposition attributes (e.g., name="value" or filename*=utf-8''encoded)
205
+ * @param {Buffer} input
206
+ * @param {{ position: number }} position
207
+ * @returns {{ name: string, value: string }}
208
+ */
209
+ function parseContentDispositionAttribute (input, position) {
210
+ // Skip leading semicolon and whitespace
211
+ if (input[position.position] === 0x3b /* ; */) {
212
+ position.position++
213
+ }
214
+
215
+ // Skip whitespace
216
+ collectASequenceOfBytes(
217
+ (char) => char === 0x20 || char === 0x09,
218
+ input,
219
+ position
220
+ )
221
+
222
+ // Collect attribute name (token characters)
223
+ const attributeName = collectASequenceOfBytes(
224
+ (char) => isToken(char) && char !== 0x3d && char !== 0x2a, // not = or *
225
+ input,
226
+ position
227
+ )
228
+
229
+ if (attributeName.length === 0) {
230
+ return null
231
+ }
232
+
233
+ const attrNameStr = attributeName.toString('ascii').toLowerCase()
234
+
235
+ // Check for extended notation (attribute*)
236
+ const isExtended = input[position.position] === 0x2a /* * */
237
+ if (isExtended) {
238
+ position.position++ // skip *
239
+ }
240
+
241
+ // Expect = sign
242
+ if (input[position.position] !== 0x3d /* = */) {
243
+ return null
244
+ }
245
+ position.position++ // skip =
246
+
247
+ // Skip whitespace
248
+ collectASequenceOfBytes(
249
+ (char) => char === 0x20 || char === 0x09,
250
+ input,
251
+ position
252
+ )
253
+
254
+ let value
255
+
256
+ if (isExtended) {
257
+ // Extended attribute format: charset'language'encoded-value
258
+ const headerValue = collectASequenceOfBytes(
259
+ (char) => char !== 0x20 && char !== 0x0d && char !== 0x0a && char !== 0x3b, // not space, CRLF, or ;
260
+ input,
261
+ position
262
+ )
263
+
264
+ // Check for utf-8'' prefix (case insensitive)
265
+ if (
266
+ (headerValue[0] !== 0x75 && headerValue[0] !== 0x55) || // u or U
267
+ (headerValue[1] !== 0x74 && headerValue[1] !== 0x54) || // t or T
268
+ (headerValue[2] !== 0x66 && headerValue[2] !== 0x46) || // f or F
269
+ headerValue[3] !== 0x2d || // -
270
+ headerValue[4] !== 0x38 // 8
271
+ ) {
272
+ throw parsingError('unknown encoding, expected utf-8\'\'')
273
+ }
274
+
275
+ // Skip utf-8'' and decode the rest
276
+ value = decodeURIComponent(decoder.decode(headerValue.subarray(7)))
277
+ } else if (input[position.position] === 0x22 /* " */) {
278
+ // Quoted string
279
+ position.position++ // skip opening quote
280
+
281
+ const quotedValue = collectASequenceOfBytes(
282
+ (char) => char !== 0x0a && char !== 0x0d && char !== 0x22, // not LF, CR, or "
283
+ input,
284
+ position
285
+ )
286
+
287
+ if (input[position.position] !== 0x22) {
288
+ throw parsingError('Closing quote not found')
289
+ }
290
+ position.position++ // skip closing quote
291
+
292
+ value = decoder.decode(quotedValue)
293
+ .replace(/%0A/ig, '\n')
294
+ .replace(/%0D/ig, '\r')
295
+ .replace(/%22/g, '"')
296
+ } else {
297
+ // Token value (no quotes)
298
+ const tokenValue = collectASequenceOfBytes(
299
+ (char) => isToken(char) && char !== 0x3b, // not ;
300
+ input,
301
+ position
302
+ )
303
+
304
+ value = decoder.decode(tokenValue)
305
+ }
306
+
307
+ return { name: attrNameStr, value }
308
+ }
309
+
208
310
  /**
209
311
  * @see https://andreubotella.github.io/multipart-form-data/#parse-multipart-form-data-headers
210
312
  * @param {Buffer} input
@@ -265,80 +367,40 @@ function parseMultipartFormDataHeaders (input, position) {
265
367
  // 2.8. Byte-lowercase header name and switch on the result:
266
368
  switch (bufferToLowerCasedHeaderName(headerName)) {
267
369
  case 'content-disposition': {
268
- // 1. Set name and filename to null.
269
370
  name = filename = null
270
371
 
271
- // 2. If position does not point to a sequence of bytes starting with
272
- // `form-data; name="`, return failure.
273
- if (!bufferStartsWith(input, formDataNameBuffer, position)) {
274
- throw parsingError('expected form-data; name=" for content-disposition header')
372
+ // Collect the disposition type (should be "form-data")
373
+ const dispositionType = collectASequenceOfBytes(
374
+ (char) => isToken(char),
375
+ input,
376
+ position
377
+ )
378
+
379
+ if (dispositionType.toString('ascii').toLowerCase() !== 'form-data') {
380
+ throw parsingError('expected form-data for content-disposition header')
275
381
  }
276
382
 
277
- // 3. Advance position so it points at the byte after the next 0x22 (")
278
- // byte (the one in the sequence of bytes matched above).
279
- position.position += 17
280
-
281
- // 4. Set name to the result of parsing a multipart/form-data name given
282
- // input and position, if the result is not failure. Otherwise, return
283
- // failure.
284
- name = parseMultipartFormDataName(input, position)
285
-
286
- // 5. If position points to a sequence of bytes starting with `; filename="`:
287
- if (input[position.position] === 0x3b /* ; */ && input[position.position + 1] === 0x20 /* ' ' */) {
288
- const at = { position: position.position + 2 }
289
-
290
- if (bufferStartsWith(input, filenameBuffer, at)) {
291
- if (input[at.position + 8] === 0x2a /* '*' */) {
292
- at.position += 10 // skip past filename*=
293
-
294
- // Remove leading http tab and spaces. See RFC for examples.
295
- // https://datatracker.ietf.org/doc/html/rfc6266#section-5
296
- collectASequenceOfBytes(
297
- (char) => char === 0x20 || char === 0x09,
298
- input,
299
- at
300
- )
301
-
302
- const headerValue = collectASequenceOfBytes(
303
- (char) => char !== 0x20 && char !== 0x0d && char !== 0x0a, // ' ' or CRLF
304
- input,
305
- at
306
- )
307
-
308
- if (
309
- (headerValue[0] !== 0x75 && headerValue[0] !== 0x55) || // u or U
310
- (headerValue[1] !== 0x74 && headerValue[1] !== 0x54) || // t or T
311
- (headerValue[2] !== 0x66 && headerValue[2] !== 0x46) || // f or F
312
- headerValue[3] !== 0x2d || // -
313
- headerValue[4] !== 0x38 // 8
314
- ) {
315
- throw parsingError('unknown encoding, expected utf-8\'\'')
316
- }
317
-
318
- // skip utf-8''
319
- filename = decodeURIComponent(new TextDecoder().decode(headerValue.subarray(7)))
320
-
321
- position.position = at.position
322
- } else {
323
- // 1. Advance position so it points at the byte after the next 0x22 (") byte
324
- // (the one in the sequence of bytes matched above).
325
- position.position += 11
326
-
327
- // Remove leading http tab and spaces. See RFC for examples.
328
- // https://datatracker.ietf.org/doc/html/rfc6266#section-5
329
- collectASequenceOfBytes(
330
- (char) => char === 0x20 || char === 0x09,
331
- input,
332
- position
333
- )
334
-
335
- position.position++ // skip past " after removing whitespace
336
-
337
- // 2. Set filename to the result of parsing a multipart/form-data name given
338
- // input and position, if the result is not failure. Otherwise, return failure.
339
- filename = parseMultipartFormDataName(input, position)
340
- }
383
+ // Parse attributes recursively until CRLF
384
+ while (
385
+ position.position < input.length &&
386
+ input[position.position] !== 0x0d &&
387
+ input[position.position + 1] !== 0x0a
388
+ ) {
389
+ const attribute = parseContentDispositionAttribute(input, position)
390
+
391
+ if (!attribute) {
392
+ break
341
393
  }
394
+
395
+ if (attribute.name === 'name') {
396
+ name = attribute.value
397
+ } else if (attribute.name === 'filename') {
398
+ filename = attribute.value
399
+ }
400
+ }
401
+
402
+ if (name === null) {
403
+ throw parsingError('name attribute is required in content-disposition header')
342
404
  }
343
405
 
344
406
  break
@@ -394,43 +456,6 @@ function parseMultipartFormDataHeaders (input, position) {
394
456
  }
395
457
  }
396
458
 
397
- /**
398
- * @see https://andreubotella.github.io/multipart-form-data/#parse-a-multipart-form-data-name
399
- * @param {Buffer} input
400
- * @param {{ position: number }} position
401
- */
402
- function parseMultipartFormDataName (input, position) {
403
- // 1. Assert: The byte at (position - 1) is 0x22 (").
404
- assert(input[position.position - 1] === 0x22)
405
-
406
- // 2. Let name be the result of collecting a sequence of bytes that are not 0x0A (LF), 0x0D (CR) or 0x22 ("), given position.
407
- /** @type {string | Buffer} */
408
- let name = collectASequenceOfBytes(
409
- (char) => char !== 0x0a && char !== 0x0d && char !== 0x22,
410
- input,
411
- position
412
- )
413
-
414
- // 3. If the byte at position is not 0x22 ("), return failure. Otherwise, advance position by 1.
415
- if (input[position.position] !== 0x22) {
416
- throw parsingError('expected "')
417
- } else {
418
- position.position++
419
- }
420
-
421
- // 4. Replace any occurrence of the following subsequences in name with the given byte:
422
- // - `%0A`: 0x0A (LF)
423
- // - `%0D`: 0x0D (CR)
424
- // - `%22`: 0x22 (")
425
- name = new TextDecoder().decode(name)
426
- .replace(/%0A/ig, '\n')
427
- .replace(/%0D/ig, '\r')
428
- .replace(/%22/g, '"')
429
-
430
- // 5. Return the UTF-8 decoding without BOM of name.
431
- return name
432
- }
433
-
434
459
  /**
435
460
  * @param {(char: number) => boolean} condition
436
461
  * @param {Buffer} input
@@ -492,6 +517,58 @@ function parsingError (cause) {
492
517
  return new TypeError('Failed to parse body as FormData.', { cause: new TypeError(cause) })
493
518
  }
494
519
 
520
+ /**
521
+ * CTL = <any US-ASCII control character
522
+ * (octets 0 - 31) and DEL (127)>
523
+ * @param {number} char
524
+ */
525
+ function isCTL (char) {
526
+ return char <= 0x1f || char === 0x7f
527
+ }
528
+
529
+ /**
530
+ * tspecials := "(" / ")" / "<" / ">" / "@" /
531
+ * "," / ";" / ":" / "\" / <">
532
+ * "/" / "[" / "]" / "?" / "="
533
+ * ; Must be in quoted-string,
534
+ * ; to use within parameter values
535
+ * @param {number} char
536
+ */
537
+ function isTSpecial (char) {
538
+ return (
539
+ char === 0x28 || // (
540
+ char === 0x29 || // )
541
+ char === 0x3c || // <
542
+ char === 0x3e || // >
543
+ char === 0x40 || // @
544
+ char === 0x2c || // ,
545
+ char === 0x3b || // ;
546
+ char === 0x3a || // :
547
+ char === 0x5c || // \
548
+ char === 0x22 || // "
549
+ char === 0x2f || // /
550
+ char === 0x5b || // [
551
+ char === 0x5d || // ]
552
+ char === 0x3f || // ?
553
+ char === 0x3d // +
554
+ )
555
+ }
556
+
557
+ /**
558
+ * token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
559
+ * or tspecials>
560
+ * @param {number} char
561
+ */
562
+ function isToken (char) {
563
+ return (
564
+ char <= 0x7f && // ascii
565
+ char !== 0x20 && // space
566
+ char !== 0x09 &&
567
+ !isCTL(char) &&
568
+ !isTSpecial(char)
569
+ )
570
+ }
571
+
495
572
  module.exports = {
496
573
  multipartFormDataParser,
497
574
  validateBoundary