file-type 21.3.1 → 21.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/core.js +227 -14
  2. package/index.js +1 -1
  3. package/package.json +1 -1
  4. package/readme.md +5 -4
package/core.js CHANGED
@@ -17,12 +17,20 @@ export const reasonableDetectionSizeInBytes = 4100; // A fair amount of file-typ
17
17
  // Keep defensive limits small enough to avoid accidental memory spikes from untrusted inputs.
18
18
  const maximumMpegOffsetTolerance = reasonableDetectionSizeInBytes - 2;
19
19
  const maximumZipEntrySizeInBytes = 1024 * 1024;
20
+ const maximumZipEntryCount = 1024;
21
+ const maximumZipBufferedReadSizeInBytes = (2 ** 31) - 1;
20
22
  const maximumUntrustedSkipSizeInBytes = 16 * 1024 * 1024;
23
+ const maximumZipTextEntrySizeInBytes = maximumZipEntrySizeInBytes;
21
24
  const maximumNestedGzipDetectionSizeInBytes = maximumUntrustedSkipSizeInBytes;
25
+ const maximumNestedGzipProbeDepth = 1;
22
26
  const maximumId3HeaderSizeInBytes = maximumUntrustedSkipSizeInBytes;
23
27
  const maximumEbmlDocumentTypeSizeInBytes = 64;
24
28
  const maximumEbmlElementPayloadSizeInBytes = maximumUntrustedSkipSizeInBytes;
25
29
  const maximumEbmlElementCount = 256;
30
+ const maximumPngChunkCount = 512;
31
+ const maximumAsfHeaderObjectCount = 512;
32
+ const maximumTiffTagCount = 512;
33
+ const maximumDetectionReentryCount = 256;
26
34
  const maximumPngChunkSizeInBytes = maximumUntrustedSkipSizeInBytes;
27
35
  const maximumTiffIfdOffsetInBytes = maximumUntrustedSkipSizeInBytes;
28
36
  const recoverableZipErrorMessages = new Set([
@@ -31,7 +39,9 @@ const recoverableZipErrorMessages = new Set([
31
39
  'Expected Central-File-Header signature',
32
40
  ]);
33
41
  const recoverableZipErrorMessagePrefixes = [
42
+ 'ZIP entry count exceeds ',
34
43
  'Unsupported ZIP compression method:',
44
+ 'ZIP entry compressed data exceeds ',
35
45
  'ZIP entry decompressed data exceeds ',
36
46
  ];
37
47
  const recoverableZipErrorCodes = new Set([
@@ -109,6 +119,114 @@ async function decompressDeflateRawWithLimit(data, {maximumLength = maximumZipEn
109
119
  return uncompressedData;
110
120
  }
111
121
 
122
+ const zipDataDescriptorSignature = 0x08_07_4B_50;
123
+ const zipDataDescriptorLengthInBytes = 16;
124
+ const zipDataDescriptorOverlapLengthInBytes = zipDataDescriptorLengthInBytes - 1;
125
+
126
+ function findZipDataDescriptorOffset(buffer, bytesConsumed) {
127
+ if (buffer.length < zipDataDescriptorLengthInBytes) {
128
+ return -1;
129
+ }
130
+
131
+ const lastPossibleDescriptorOffset = buffer.length - zipDataDescriptorLengthInBytes;
132
+ for (let index = 0; index <= lastPossibleDescriptorOffset; index++) {
133
+ if (
134
+ Token.UINT32_LE.get(buffer, index) === zipDataDescriptorSignature
135
+ && Token.UINT32_LE.get(buffer, index + 8) === bytesConsumed + index
136
+ ) {
137
+ return index;
138
+ }
139
+ }
140
+
141
+ return -1;
142
+ }
143
+
144
+ function mergeByteChunks(chunks, totalLength) {
145
+ const merged = new Uint8Array(totalLength);
146
+ let offset = 0;
147
+
148
+ for (const chunk of chunks) {
149
+ merged.set(chunk, offset);
150
+ offset += chunk.length;
151
+ }
152
+
153
+ return merged;
154
+ }
155
+
156
+ async function readZipDataDescriptorEntryWithLimit(zipHandler, {shouldBuffer, maximumLength = maximumZipEntrySizeInBytes} = {}) {
157
+ const {syncBuffer} = zipHandler;
158
+ const {length: syncBufferLength} = syncBuffer;
159
+ const chunks = [];
160
+ let bytesConsumed = 0;
161
+
162
+ for (;;) {
163
+ const length = await zipHandler.tokenizer.peekBuffer(syncBuffer, {mayBeLess: true});
164
+ const dataDescriptorOffset = findZipDataDescriptorOffset(syncBuffer.subarray(0, length), bytesConsumed);
165
+ const retainedLength = dataDescriptorOffset >= 0
166
+ ? 0
167
+ : (
168
+ length === syncBufferLength
169
+ ? Math.min(zipDataDescriptorOverlapLengthInBytes, length - 1)
170
+ : 0
171
+ );
172
+ const chunkLength = dataDescriptorOffset >= 0 ? dataDescriptorOffset : length - retainedLength;
173
+
174
+ if (chunkLength === 0) {
175
+ break;
176
+ }
177
+
178
+ bytesConsumed += chunkLength;
179
+ if (bytesConsumed > maximumLength) {
180
+ throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
181
+ }
182
+
183
+ if (shouldBuffer) {
184
+ const data = new Uint8Array(chunkLength);
185
+ await zipHandler.tokenizer.readBuffer(data);
186
+ chunks.push(data);
187
+ } else {
188
+ await zipHandler.tokenizer.ignore(chunkLength);
189
+ }
190
+
191
+ if (dataDescriptorOffset >= 0) {
192
+ break;
193
+ }
194
+ }
195
+
196
+ if (!shouldBuffer) {
197
+ return;
198
+ }
199
+
200
+ return mergeByteChunks(chunks, bytesConsumed);
201
+ }
202
+
203
+ async function readZipEntryData(zipHandler, zipHeader, {shouldBuffer} = {}) {
204
+ if (
205
+ zipHeader.dataDescriptor
206
+ && zipHeader.compressedSize === 0
207
+ ) {
208
+ return readZipDataDescriptorEntryWithLimit(zipHandler, {shouldBuffer});
209
+ }
210
+
211
+ if (!shouldBuffer) {
212
+ await zipHandler.tokenizer.ignore(zipHeader.compressedSize);
213
+ return;
214
+ }
215
+
216
+ const maximumLength = getMaximumZipBufferedReadLength(zipHandler.tokenizer);
217
+ if (
218
+ !Number.isFinite(zipHeader.compressedSize)
219
+ || zipHeader.compressedSize < 0
220
+ || zipHeader.compressedSize > maximumLength
221
+ ) {
222
+ throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
223
+ }
224
+
225
+ const fileData = new Uint8Array(zipHeader.compressedSize);
226
+ await zipHandler.tokenizer.readBuffer(fileData);
227
+ return fileData;
228
+ }
229
+
112
230
  // Override the default inflate to enforce decompression size limits, since @tokenizer/inflate does not expose a configuration hook for this.
113
231
  ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
114
232
  if (zipHeader.compressedMethod === 0) {
@@ -119,11 +237,45 @@ ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
119
237
  throw new Error(`Unsupported ZIP compression method: ${zipHeader.compressedMethod}`);
120
238
  }
121
239
 
122
- const maximumLength = hasUnknownFileSize(this.tokenizer) ? maximumZipEntrySizeInBytes : Number.MAX_SAFE_INTEGER;
123
- const uncompressedData = await decompressDeflateRawWithLimit(fileData, {maximumLength});
240
+ const uncompressedData = await decompressDeflateRawWithLimit(fileData, {maximumLength: maximumZipEntrySizeInBytes});
124
241
  return callback(uncompressedData);
125
242
  };
126
243
 
244
+ ZipHandler.prototype.unzip = async function (fileCallback) {
245
+ let stop = false;
246
+ let zipEntryCount = 0;
247
+ do {
248
+ const zipHeader = await this.readLocalFileHeader();
249
+ if (!zipHeader) {
250
+ break;
251
+ }
252
+
253
+ zipEntryCount++;
254
+ if (zipEntryCount > maximumZipEntryCount) {
255
+ throw new Error(`ZIP entry count exceeds ${maximumZipEntryCount}`);
256
+ }
257
+
258
+ const next = fileCallback(zipHeader);
259
+ stop = Boolean(next.stop);
260
+ await this.tokenizer.ignore(zipHeader.extraFieldLength);
261
+ const fileData = await readZipEntryData(this, zipHeader, {
262
+ shouldBuffer: Boolean(next.handler),
263
+ });
264
+
265
+ if (next.handler) {
266
+ await this.inflate(zipHeader, fileData, next.handler);
267
+ }
268
+
269
+ if (zipHeader.dataDescriptor) {
270
+ const dataDescriptor = new Uint8Array(zipDataDescriptorLengthInBytes);
271
+ await this.tokenizer.readBuffer(dataDescriptor);
272
+ if (Token.UINT32_LE.get(dataDescriptor, 0) !== zipDataDescriptorSignature) {
273
+ throw new Error(`Expected data-descriptor-signature at position ${this.tokenizer.position - dataDescriptor.length}`);
274
+ }
275
+ }
276
+ } while (!stop);
277
+ };
278
+
127
279
  function createByteLimitedReadableStream(stream, maximumBytes) {
128
280
  const reader = stream.getReader();
129
281
  let emittedBytes = 0;
@@ -384,6 +536,15 @@ function hasExceededUnknownSizeScanBudget(tokenizer, startOffset, maximumBytes)
384
536
  );
385
537
  }
386
538
 
539
+ function getMaximumZipBufferedReadLength(tokenizer) {
540
+ const fileSize = tokenizer.fileInfo.size;
541
+ const remainingBytes = Number.isFinite(fileSize)
542
+ ? Math.max(0, fileSize - tokenizer.position)
543
+ : Number.MAX_SAFE_INTEGER;
544
+
545
+ return Math.min(remainingBytes, maximumZipBufferedReadSizeInBytes);
546
+ }
547
+
387
548
  function isRecoverableZipError(error) {
388
549
  if (error instanceof strtok3.EndOfStreamError) {
389
550
  return true;
@@ -401,10 +562,7 @@ function isRecoverableZipError(error) {
401
562
  return true;
402
563
  }
403
564
 
404
- if (
405
- error instanceof TypeError
406
- && recoverableZipErrorCodes.has(error.code)
407
- ) {
565
+ if (recoverableZipErrorCodes.has(error.code)) {
408
566
  return true;
409
567
  }
410
568
 
@@ -548,9 +706,17 @@ export class FileTypeParser {
548
706
  this.tokenizerOptions = {
549
707
  abortSignal: this.options.signal,
550
708
  };
709
+ this.gzipProbeDepth = 0;
710
+ }
711
+
712
+ getTokenizerOptions() {
713
+ return {
714
+ ...this.tokenizerOptions,
715
+ };
551
716
  }
552
717
 
553
- async fromTokenizer(tokenizer) {
718
+ async fromTokenizer(tokenizer, detectionReentryCount = 0) {
719
+ this.detectionReentryCount = detectionReentryCount;
554
720
  const initialPosition = tokenizer.position;
555
721
  // Iterate through all file-type detectors
556
722
  for (const detector of this.detectors) {
@@ -590,11 +756,11 @@ export class FileTypeParser {
590
756
  return;
591
757
  }
592
758
 
593
- return this.fromTokenizer(strtok3.fromBuffer(buffer, this.tokenizerOptions));
759
+ return this.fromTokenizer(strtok3.fromBuffer(buffer, this.getTokenizerOptions()));
594
760
  }
595
761
 
596
762
  async fromBlob(blob) {
597
- const tokenizer = strtok3.fromBlob(blob, this.tokenizerOptions);
763
+ const tokenizer = strtok3.fromBlob(blob, this.getTokenizerOptions());
598
764
  try {
599
765
  return await this.fromTokenizer(tokenizer);
600
766
  } finally {
@@ -603,7 +769,7 @@ export class FileTypeParser {
603
769
  }
604
770
 
605
771
  async fromStream(stream) {
606
- const tokenizer = strtok3.fromWebStream(stream, this.tokenizerOptions);
772
+ const tokenizer = strtok3.fromWebStream(stream, this.getTokenizerOptions());
607
773
  try {
608
774
  return await this.fromTokenizer(tokenizer);
609
775
  } finally {
@@ -753,6 +919,11 @@ export class FileTypeParser {
753
919
  // -- 3-byte signatures --
754
920
 
755
921
  if (this.check([0xEF, 0xBB, 0xBF])) { // UTF-8-BOM
922
+ if (this.detectionReentryCount >= maximumDetectionReentryCount) {
923
+ return;
924
+ }
925
+
926
+ this.detectionReentryCount++;
756
927
  // Strip off UTF-8-BOM
757
928
  await this.tokenizer.ignore(3);
758
929
  return this.detectConfident(tokenizer);
@@ -773,10 +944,18 @@ export class FileTypeParser {
773
944
  }
774
945
 
775
946
  if (this.check([0x1F, 0x8B, 0x8])) {
947
+ if (this.gzipProbeDepth >= maximumNestedGzipProbeDepth) {
948
+ return {
949
+ ext: 'gz',
950
+ mime: 'application/gzip',
951
+ };
952
+ }
953
+
776
954
  const gzipHandler = new GzipHandler(tokenizer);
777
955
  const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes);
778
956
  let compressedFileType;
779
957
  try {
958
+ this.gzipProbeDepth++;
780
959
  compressedFileType = await this.fromStream(limitedInflatedStream);
781
960
  } catch (error) {
782
961
  if (error?.name === 'AbortError') {
@@ -784,6 +963,8 @@ export class FileTypeParser {
784
963
  }
785
964
 
786
965
  // Decompression or inner-detection failures are expected for non-tar gzip files.
966
+ } finally {
967
+ this.gzipProbeDepth--;
787
968
  }
788
969
 
789
970
  // We only need enough inflated bytes to confidently decide whether this is tar.gz.
@@ -850,7 +1031,12 @@ export class FileTypeParser {
850
1031
  throw error;
851
1032
  }
852
1033
 
853
- return this.fromTokenizer(tokenizer); // Skip ID3 header, recursion
1034
+ if (this.detectionReentryCount >= maximumDetectionReentryCount) {
1035
+ return;
1036
+ }
1037
+
1038
+ this.detectionReentryCount++;
1039
+ return this.fromTokenizer(tokenizer, this.detectionReentryCount); // Skip ID3 header, recursion
854
1040
  }
855
1041
 
856
1042
  // Musepack, SV7
@@ -971,7 +1157,7 @@ export class FileTypeParser {
971
1157
  stop: true,
972
1158
  };
973
1159
  case 'mimetype':
974
- if (!canReadZipEntryForDetection(zipHeader)) {
1160
+ if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
975
1161
  return {};
976
1162
  }
977
1163
 
@@ -987,8 +1173,7 @@ export class FileTypeParser {
987
1173
  case '[Content_Types].xml': {
988
1174
  openXmlState.hasContentTypesEntry = true;
989
1175
 
990
- const maximumContentTypesEntrySize = hasUnknownFileSize(tokenizer) ? maximumZipEntrySizeInBytes : Number.MAX_SAFE_INTEGER;
991
- if (!canReadZipEntryForDetection(zipHeader, maximumContentTypesEntrySize)) {
1176
+ if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
992
1177
  openXmlState.hasUnparseableContentTypes = true;
993
1178
  return {};
994
1179
  }
@@ -1269,6 +1454,7 @@ export class FileTypeParser {
1269
1454
  return;
1270
1455
  }
1271
1456
 
1457
+ const previousPosition = tokenizer.position;
1272
1458
  const element = await readElement();
1273
1459
 
1274
1460
  if (element.id === 0x42_82) {
@@ -1298,6 +1484,11 @@ export class FileTypeParser {
1298
1484
  reason: 'EBML payload',
1299
1485
  }); // ignore payload
1300
1486
  --children;
1487
+
1488
+ // Safeguard against malformed files: bail if the position did not advance.
1489
+ if (tokenizer.position <= previousPosition) {
1490
+ return;
1491
+ }
1301
1492
  }
1302
1493
  }
1303
1494
 
@@ -1683,11 +1874,18 @@ export class FileTypeParser {
1683
1874
 
1684
1875
  const isUnknownPngStream = hasUnknownFileSize(tokenizer);
1685
1876
  const pngScanStart = tokenizer.position;
1877
+ let pngChunkCount = 0;
1686
1878
  do {
1879
+ pngChunkCount++;
1880
+ if (pngChunkCount > maximumPngChunkCount) {
1881
+ break;
1882
+ }
1883
+
1687
1884
  if (hasExceededUnknownSizeScanBudget(tokenizer, pngScanStart, maximumPngChunkSizeInBytes)) {
1688
1885
  break;
1689
1886
  }
1690
1887
 
1888
+ const previousPosition = tokenizer.position;
1691
1889
  const chunk = await readChunkHeader();
1692
1890
  if (chunk.length < 0) {
1693
1891
  return; // Invalid chunk length
@@ -1726,6 +1924,11 @@ export class FileTypeParser {
1726
1924
  throw error;
1727
1925
  }
1728
1926
  }
1927
+
1928
+ // Safeguard against malformed files: bail if the position did not advance.
1929
+ if (tokenizer.position <= previousPosition) {
1930
+ break;
1931
+ }
1729
1932
  } while (tokenizer.position + 8 < tokenizer.fileInfo.size);
1730
1933
 
1731
1934
  return pngFileType;
@@ -1906,7 +2109,13 @@ export class FileTypeParser {
1906
2109
  });
1907
2110
  const isUnknownFileSize = hasUnknownFileSize(tokenizer);
1908
2111
  const asfHeaderScanStart = tokenizer.position;
2112
+ let asfHeaderObjectCount = 0;
1909
2113
  while (tokenizer.position + 24 < tokenizer.fileInfo.size) {
2114
+ asfHeaderObjectCount++;
2115
+ if (asfHeaderObjectCount > maximumAsfHeaderObjectCount) {
2116
+ break;
2117
+ }
2118
+
1910
2119
  if (hasExceededUnknownSizeScanBudget(tokenizer, asfHeaderScanStart, maximumUntrustedSkipSizeInBytes)) {
1911
2120
  break;
1912
2121
  }
@@ -2363,6 +2572,10 @@ export class FileTypeParser {
2363
2572
 
2364
2573
  async readTiffIFD(bigEndian) {
2365
2574
  const numberOfTags = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
2575
+ if (numberOfTags > maximumTiffTagCount) {
2576
+ return;
2577
+ }
2578
+
2366
2579
  if (
2367
2580
  hasUnknownFileSize(this.tokenizer)
2368
2581
  && (2 + (numberOfTags * 12)) > maximumTiffIfdOffsetInBytes
package/index.js CHANGED
@@ -27,7 +27,7 @@ function isTokenizerStreamBoundsError(error) {
27
27
 
28
28
  export class FileTypeParser extends DefaultFileTypeParser {
29
29
  async fromStream(stream) {
30
- const tokenizer = await (stream instanceof WebReadableStream ? strtok3.fromWebStream(stream, this.tokenizerOptions) : strtok3.fromStream(stream, this.tokenizerOptions));
30
+ const tokenizer = await (stream instanceof WebReadableStream ? strtok3.fromWebStream(stream, this.getTokenizerOptions()) : strtok3.fromStream(stream, this.getTokenizerOptions()));
31
31
  try {
32
32
  return await super.fromTokenizer(tokenizer);
33
33
  } catch (error) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "file-type",
3
- "version": "21.3.1",
3
+ "version": "21.3.2",
4
4
  "description": "Detect the file type of a file, stream, or data",
5
5
  "license": "MIT",
6
6
  "repository": "sindresorhus/file-type",
package/readme.md CHANGED
@@ -16,11 +16,12 @@ We accept contributions for commonly used modern file formats, not historical or
16
16
  npm install file-type
17
17
  ```
18
18
 
19
- **This package is an ESM package. Your project needs to be ESM too. [Read more](https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c). For TypeScript + CommonJS, see [`load-esm`](https://github.com/Borewit/load-esm).**
19
+ **This package is an ESM package. Your project needs to be ESM too. [Read more](https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c). For TypeScript + CommonJS, see [`load-esm`](https://github.com/Borewit/load-esm).** If you use it with Webpack, you need the latest Webpack version and ensure you configure it correctly for ESM.
20
20
 
21
- If you use it with Webpack, you need the latest Webpack version and ensure you configure it correctly for ESM.
22
-
23
- File type detection is based on binary signatures (magic numbers) and should be treated as a best-effort hint, not a guarantee.
21
+ > [!IMPORTANT]
22
+ > File type detection is based on binary signatures (magic numbers) and is a best-effort hint. It does not guarantee the file is actually of that type or that the file is valid/not malformed.
23
+ >
24
+ > Robustness against malformed input is best-effort. When processing untrusted files on a server, enforce a reasonable file size limit and use a worker thread with a timeout (e.g., [`make-asynchronous`](https://github.com/sindresorhus/make-asynchronous)). These are not considered security issues in this package.
24
25
 
25
26
  ## Usage
26
27