file-type 21.3.0 → 21.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/core.js +925 -148
  2. package/index.js +27 -3
  3. package/package.json +4 -4
  4. package/readme.md +9 -7
package/core.js CHANGED
@@ -14,6 +14,321 @@ import {
14
14
  import {extensions, mimeTypes} from './supported.js';
15
15
 
16
16
  export const reasonableDetectionSizeInBytes = 4100; // A fair amount of file-types are detectable within this range.
17
+ // Keep defensive limits small enough to avoid accidental memory spikes from untrusted inputs.
18
+ const maximumMpegOffsetTolerance = reasonableDetectionSizeInBytes - 2;
19
+ const maximumZipEntrySizeInBytes = 1024 * 1024;
20
+ const maximumZipEntryCount = 1024;
21
+ const maximumZipBufferedReadSizeInBytes = (2 ** 31) - 1;
22
+ const maximumUntrustedSkipSizeInBytes = 16 * 1024 * 1024;
23
+ const maximumZipTextEntrySizeInBytes = maximumZipEntrySizeInBytes;
24
+ const maximumNestedGzipDetectionSizeInBytes = maximumUntrustedSkipSizeInBytes;
25
+ const maximumNestedGzipProbeDepth = 1;
26
+ const maximumId3HeaderSizeInBytes = maximumUntrustedSkipSizeInBytes;
27
+ const maximumEbmlDocumentTypeSizeInBytes = 64;
28
+ const maximumEbmlElementPayloadSizeInBytes = maximumUntrustedSkipSizeInBytes;
29
+ const maximumEbmlElementCount = 256;
30
+ const maximumPngChunkCount = 512;
31
+ const maximumAsfHeaderObjectCount = 512;
32
+ const maximumTiffTagCount = 512;
33
+ const maximumDetectionReentryCount = 256;
34
+ const maximumPngChunkSizeInBytes = maximumUntrustedSkipSizeInBytes;
35
+ const maximumTiffIfdOffsetInBytes = maximumUntrustedSkipSizeInBytes;
36
+ const recoverableZipErrorMessages = new Set([
37
+ 'Unexpected signature',
38
+ 'Encrypted ZIP',
39
+ 'Expected Central-File-Header signature',
40
+ ]);
41
+ const recoverableZipErrorMessagePrefixes = [
42
+ 'ZIP entry count exceeds ',
43
+ 'Unsupported ZIP compression method:',
44
+ 'ZIP entry compressed data exceeds ',
45
+ 'ZIP entry decompressed data exceeds ',
46
+ ];
47
+ const recoverableZipErrorCodes = new Set([
48
+ 'Z_BUF_ERROR',
49
+ 'Z_DATA_ERROR',
50
+ 'ERR_INVALID_STATE',
51
+ ]);
52
+
53
+ class ParserHardLimitError extends Error {}
54
+
55
+ function getSafeBound(value, maximum, reason) {
56
+ if (
57
+ !Number.isFinite(value)
58
+ || value < 0
59
+ || value > maximum
60
+ ) {
61
+ throw new ParserHardLimitError(`${reason} has invalid size ${value} (maximum ${maximum} bytes)`);
62
+ }
63
+
64
+ return value;
65
+ }
66
+
67
+ async function safeIgnore(tokenizer, length, {maximumLength = maximumUntrustedSkipSizeInBytes, reason = 'skip'} = {}) {
68
+ const safeLength = getSafeBound(length, maximumLength, reason);
69
+ await tokenizer.ignore(safeLength);
70
+ }
71
+
72
+ async function safeReadBuffer(tokenizer, buffer, options, {maximumLength = buffer.length, reason = 'read'} = {}) {
73
+ const length = options?.length ?? buffer.length;
74
+ const safeLength = getSafeBound(length, maximumLength, reason);
75
+ return tokenizer.readBuffer(buffer, {
76
+ ...options,
77
+ length: safeLength,
78
+ });
79
+ }
80
+
81
+ async function decompressDeflateRawWithLimit(data, {maximumLength = maximumZipEntrySizeInBytes} = {}) {
82
+ const input = new ReadableStream({
83
+ start(controller) {
84
+ controller.enqueue(data);
85
+ controller.close();
86
+ },
87
+ });
88
+ const output = input.pipeThrough(new DecompressionStream('deflate-raw'));
89
+ const reader = output.getReader();
90
+ const chunks = [];
91
+ let totalLength = 0;
92
+
93
+ try {
94
+ for (;;) {
95
+ const {done, value} = await reader.read();
96
+ if (done) {
97
+ break;
98
+ }
99
+
100
+ totalLength += value.length;
101
+ if (totalLength > maximumLength) {
102
+ await reader.cancel();
103
+ throw new Error(`ZIP entry decompressed data exceeds ${maximumLength} bytes`);
104
+ }
105
+
106
+ chunks.push(value);
107
+ }
108
+ } finally {
109
+ reader.releaseLock();
110
+ }
111
+
112
+ const uncompressedData = new Uint8Array(totalLength);
113
+ let offset = 0;
114
+ for (const chunk of chunks) {
115
+ uncompressedData.set(chunk, offset);
116
+ offset += chunk.length;
117
+ }
118
+
119
+ return uncompressedData;
120
+ }
121
+
122
+ const zipDataDescriptorSignature = 0x08_07_4B_50;
123
+ const zipDataDescriptorLengthInBytes = 16;
124
+ const zipDataDescriptorOverlapLengthInBytes = zipDataDescriptorLengthInBytes - 1;
125
+
126
+ function findZipDataDescriptorOffset(buffer, bytesConsumed) {
127
+ if (buffer.length < zipDataDescriptorLengthInBytes) {
128
+ return -1;
129
+ }
130
+
131
+ const lastPossibleDescriptorOffset = buffer.length - zipDataDescriptorLengthInBytes;
132
+ for (let index = 0; index <= lastPossibleDescriptorOffset; index++) {
133
+ if (
134
+ Token.UINT32_LE.get(buffer, index) === zipDataDescriptorSignature
135
+ && Token.UINT32_LE.get(buffer, index + 8) === bytesConsumed + index
136
+ ) {
137
+ return index;
138
+ }
139
+ }
140
+
141
+ return -1;
142
+ }
143
+
144
+ function mergeByteChunks(chunks, totalLength) {
145
+ const merged = new Uint8Array(totalLength);
146
+ let offset = 0;
147
+
148
+ for (const chunk of chunks) {
149
+ merged.set(chunk, offset);
150
+ offset += chunk.length;
151
+ }
152
+
153
+ return merged;
154
+ }
155
+
156
+ async function readZipDataDescriptorEntryWithLimit(zipHandler, {shouldBuffer, maximumLength = maximumZipEntrySizeInBytes} = {}) {
157
+ const {syncBuffer} = zipHandler;
158
+ const {length: syncBufferLength} = syncBuffer;
159
+ const chunks = [];
160
+ let bytesConsumed = 0;
161
+
162
+ for (;;) {
163
+ const length = await zipHandler.tokenizer.peekBuffer(syncBuffer, {mayBeLess: true});
164
+ const dataDescriptorOffset = findZipDataDescriptorOffset(syncBuffer.subarray(0, length), bytesConsumed);
165
+ const retainedLength = dataDescriptorOffset >= 0
166
+ ? 0
167
+ : (
168
+ length === syncBufferLength
169
+ ? Math.min(zipDataDescriptorOverlapLengthInBytes, length - 1)
170
+ : 0
171
+ );
172
+ const chunkLength = dataDescriptorOffset >= 0 ? dataDescriptorOffset : length - retainedLength;
173
+
174
+ if (chunkLength === 0) {
175
+ break;
176
+ }
177
+
178
+ bytesConsumed += chunkLength;
179
+ if (bytesConsumed > maximumLength) {
180
+ throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
181
+ }
182
+
183
+ if (shouldBuffer) {
184
+ const data = new Uint8Array(chunkLength);
185
+ await zipHandler.tokenizer.readBuffer(data);
186
+ chunks.push(data);
187
+ } else {
188
+ await zipHandler.tokenizer.ignore(chunkLength);
189
+ }
190
+
191
+ if (dataDescriptorOffset >= 0) {
192
+ break;
193
+ }
194
+ }
195
+
196
+ if (!shouldBuffer) {
197
+ return;
198
+ }
199
+
200
+ return mergeByteChunks(chunks, bytesConsumed);
201
+ }
202
+
203
+ async function readZipEntryData(zipHandler, zipHeader, {shouldBuffer} = {}) {
204
+ if (
205
+ zipHeader.dataDescriptor
206
+ && zipHeader.compressedSize === 0
207
+ ) {
208
+ return readZipDataDescriptorEntryWithLimit(zipHandler, {shouldBuffer});
209
+ }
210
+
211
+ if (!shouldBuffer) {
212
+ await zipHandler.tokenizer.ignore(zipHeader.compressedSize);
213
+ return;
214
+ }
215
+
216
+ const maximumLength = getMaximumZipBufferedReadLength(zipHandler.tokenizer);
217
+ if (
218
+ !Number.isFinite(zipHeader.compressedSize)
219
+ || zipHeader.compressedSize < 0
220
+ || zipHeader.compressedSize > maximumLength
221
+ ) {
222
+ throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
223
+ }
224
+
225
+ const fileData = new Uint8Array(zipHeader.compressedSize);
226
+ await zipHandler.tokenizer.readBuffer(fileData);
227
+ return fileData;
228
+ }
229
+
230
+ // Override the default inflate to enforce decompression size limits, since @tokenizer/inflate does not expose a configuration hook for this.
231
+ ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
232
+ if (zipHeader.compressedMethod === 0) {
233
+ return callback(fileData);
234
+ }
235
+
236
+ if (zipHeader.compressedMethod !== 8) {
237
+ throw new Error(`Unsupported ZIP compression method: ${zipHeader.compressedMethod}`);
238
+ }
239
+
240
+ const uncompressedData = await decompressDeflateRawWithLimit(fileData, {maximumLength: maximumZipEntrySizeInBytes});
241
+ return callback(uncompressedData);
242
+ };
243
+
244
+ ZipHandler.prototype.unzip = async function (fileCallback) {
245
+ let stop = false;
246
+ let zipEntryCount = 0;
247
+ do {
248
+ const zipHeader = await this.readLocalFileHeader();
249
+ if (!zipHeader) {
250
+ break;
251
+ }
252
+
253
+ zipEntryCount++;
254
+ if (zipEntryCount > maximumZipEntryCount) {
255
+ throw new Error(`ZIP entry count exceeds ${maximumZipEntryCount}`);
256
+ }
257
+
258
+ const next = fileCallback(zipHeader);
259
+ stop = Boolean(next.stop);
260
+ await this.tokenizer.ignore(zipHeader.extraFieldLength);
261
+ const fileData = await readZipEntryData(this, zipHeader, {
262
+ shouldBuffer: Boolean(next.handler),
263
+ });
264
+
265
+ if (next.handler) {
266
+ await this.inflate(zipHeader, fileData, next.handler);
267
+ }
268
+
269
+ if (zipHeader.dataDescriptor) {
270
+ const dataDescriptor = new Uint8Array(zipDataDescriptorLengthInBytes);
271
+ await this.tokenizer.readBuffer(dataDescriptor);
272
+ if (Token.UINT32_LE.get(dataDescriptor, 0) !== zipDataDescriptorSignature) {
273
+ throw new Error(`Expected data-descriptor-signature at position ${this.tokenizer.position - dataDescriptor.length}`);
274
+ }
275
+ }
276
+ } while (!stop);
277
+ };
278
+
279
+ function createByteLimitedReadableStream(stream, maximumBytes) {
280
+ const reader = stream.getReader();
281
+ let emittedBytes = 0;
282
+ let sourceDone = false;
283
+ let sourceCanceled = false;
284
+
285
+ const cancelSource = async reason => {
286
+ if (
287
+ sourceDone
288
+ || sourceCanceled
289
+ ) {
290
+ return;
291
+ }
292
+
293
+ sourceCanceled = true;
294
+ await reader.cancel(reason);
295
+ };
296
+
297
+ return new ReadableStream({
298
+ async pull(controller) {
299
+ if (emittedBytes >= maximumBytes) {
300
+ controller.close();
301
+ await cancelSource();
302
+ return;
303
+ }
304
+
305
+ const {done, value} = await reader.read();
306
+ if (
307
+ done
308
+ || !value
309
+ ) {
310
+ sourceDone = true;
311
+ controller.close();
312
+ return;
313
+ }
314
+
315
+ const remainingBytes = maximumBytes - emittedBytes;
316
+ if (value.length > remainingBytes) {
317
+ controller.enqueue(value.subarray(0, remainingBytes));
318
+ emittedBytes += remainingBytes;
319
+ controller.close();
320
+ await cancelSource();
321
+ return;
322
+ }
323
+
324
+ controller.enqueue(value);
325
+ emittedBytes += value.length;
326
+ },
327
+ async cancel(reason) {
328
+ await cancelSource(reason);
329
+ },
330
+ });
331
+ }
17
332
 
18
333
  export async function fileTypeFromStream(stream, options) {
19
334
  return new FileTypeParser(options).fromStream(stream);
@@ -180,6 +495,195 @@ function _check(buffer, headers, options) {
180
495
  return true;
181
496
  }
182
497
 
498
+ export function normalizeSampleSize(sampleSize) {
499
+ // Accept odd caller input, but preserve valid caller-requested probe depth.
500
+ if (!Number.isFinite(sampleSize)) {
501
+ return reasonableDetectionSizeInBytes;
502
+ }
503
+
504
+ return Math.max(1, Math.trunc(sampleSize));
505
+ }
506
+
507
+ function normalizeMpegOffsetTolerance(mpegOffsetTolerance) {
508
+ // This value controls scan depth and therefore worst-case CPU work.
509
+ if (!Number.isFinite(mpegOffsetTolerance)) {
510
+ return 0;
511
+ }
512
+
513
+ return Math.max(0, Math.min(maximumMpegOffsetTolerance, Math.trunc(mpegOffsetTolerance)));
514
+ }
515
+
516
+ function getKnownFileSizeOrMaximum(fileSize) {
517
+ if (!Number.isFinite(fileSize)) {
518
+ return Number.MAX_SAFE_INTEGER;
519
+ }
520
+
521
+ return Math.max(0, fileSize);
522
+ }
523
+
524
+ function hasUnknownFileSize(tokenizer) {
525
+ const fileSize = tokenizer.fileInfo.size;
526
+ return (
527
+ !Number.isFinite(fileSize)
528
+ || fileSize === Number.MAX_SAFE_INTEGER
529
+ );
530
+ }
531
+
532
+ function hasExceededUnknownSizeScanBudget(tokenizer, startOffset, maximumBytes) {
533
+ return (
534
+ hasUnknownFileSize(tokenizer)
535
+ && tokenizer.position - startOffset > maximumBytes
536
+ );
537
+ }
538
+
539
+ function getMaximumZipBufferedReadLength(tokenizer) {
540
+ const fileSize = tokenizer.fileInfo.size;
541
+ const remainingBytes = Number.isFinite(fileSize)
542
+ ? Math.max(0, fileSize - tokenizer.position)
543
+ : Number.MAX_SAFE_INTEGER;
544
+
545
+ return Math.min(remainingBytes, maximumZipBufferedReadSizeInBytes);
546
+ }
547
+
548
+ function isRecoverableZipError(error) {
549
+ if (error instanceof strtok3.EndOfStreamError) {
550
+ return true;
551
+ }
552
+
553
+ if (error instanceof ParserHardLimitError) {
554
+ return true;
555
+ }
556
+
557
+ if (!(error instanceof Error)) {
558
+ return false;
559
+ }
560
+
561
+ if (recoverableZipErrorMessages.has(error.message)) {
562
+ return true;
563
+ }
564
+
565
+ if (recoverableZipErrorCodes.has(error.code)) {
566
+ return true;
567
+ }
568
+
569
+ for (const prefix of recoverableZipErrorMessagePrefixes) {
570
+ if (error.message.startsWith(prefix)) {
571
+ return true;
572
+ }
573
+ }
574
+
575
+ return false;
576
+ }
577
+
578
+ function canReadZipEntryForDetection(zipHeader, maximumSize = maximumZipEntrySizeInBytes) {
579
+ const sizes = [zipHeader.compressedSize, zipHeader.uncompressedSize];
580
+ for (const size of sizes) {
581
+ if (
582
+ !Number.isFinite(size)
583
+ || size < 0
584
+ || size > maximumSize
585
+ ) {
586
+ return false;
587
+ }
588
+ }
589
+
590
+ return true;
591
+ }
592
+
593
+ function createOpenXmlZipDetectionState() {
594
+ return {
595
+ hasContentTypesEntry: false,
596
+ hasParsedContentTypesEntry: false,
597
+ isParsingContentTypes: false,
598
+ hasUnparseableContentTypes: false,
599
+ hasWordDirectory: false,
600
+ hasPresentationDirectory: false,
601
+ hasSpreadsheetDirectory: false,
602
+ hasThreeDimensionalModelEntry: false,
603
+ };
604
+ }
605
+
606
+ function updateOpenXmlZipDetectionStateFromFilename(openXmlState, filename) {
607
+ if (filename.startsWith('word/')) {
608
+ openXmlState.hasWordDirectory = true;
609
+ }
610
+
611
+ if (filename.startsWith('ppt/')) {
612
+ openXmlState.hasPresentationDirectory = true;
613
+ }
614
+
615
+ if (filename.startsWith('xl/')) {
616
+ openXmlState.hasSpreadsheetDirectory = true;
617
+ }
618
+
619
+ if (
620
+ filename.startsWith('3D/')
621
+ && filename.endsWith('.model')
622
+ ) {
623
+ openXmlState.hasThreeDimensionalModelEntry = true;
624
+ }
625
+ }
626
+
627
+ function getOpenXmlFileTypeFromZipEntries(openXmlState) {
628
+ // Only use directory-name heuristic when [Content_Types].xml was present in the archive
629
+ // but its handler was skipped (not invoked, not currently running, and not already resolved).
630
+ // This avoids guessing from directory names when content-type parsing already gave a definitive answer or failed.
631
+ if (
632
+ !openXmlState.hasContentTypesEntry
633
+ || openXmlState.hasUnparseableContentTypes
634
+ || openXmlState.isParsingContentTypes
635
+ || openXmlState.hasParsedContentTypesEntry
636
+ ) {
637
+ return;
638
+ }
639
+
640
+ if (openXmlState.hasWordDirectory) {
641
+ return {
642
+ ext: 'docx',
643
+ mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
644
+ };
645
+ }
646
+
647
+ if (openXmlState.hasPresentationDirectory) {
648
+ return {
649
+ ext: 'pptx',
650
+ mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
651
+ };
652
+ }
653
+
654
+ if (openXmlState.hasSpreadsheetDirectory) {
655
+ return {
656
+ ext: 'xlsx',
657
+ mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
658
+ };
659
+ }
660
+
661
+ if (openXmlState.hasThreeDimensionalModelEntry) {
662
+ return {
663
+ ext: '3mf',
664
+ mime: 'model/3mf',
665
+ };
666
+ }
667
+ }
668
+
669
+ function getOpenXmlMimeTypeFromContentTypesXml(xmlContent) {
670
+ // We only need the `ContentType="...main+xml"` value, so a small string scan is enough and avoids full XML parsing.
671
+ const endPosition = xmlContent.indexOf('.main+xml"');
672
+ if (endPosition === -1) {
673
+ const mimeType = 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml';
674
+ if (xmlContent.includes(`ContentType="${mimeType}"`)) {
675
+ return mimeType;
676
+ }
677
+
678
+ return;
679
+ }
680
+
681
+ const truncatedContent = xmlContent.slice(0, endPosition);
682
+ const firstQuotePosition = truncatedContent.lastIndexOf('"');
683
+ // If no quote is found, `lastIndexOf` returns -1 and this intentionally falls back to the full truncated prefix.
684
+ return truncatedContent.slice(firstQuotePosition + 1);
685
+ }
686
+
183
687
  export async function fileTypeFromTokenizer(tokenizer, options) {
184
688
  return new FileTypeParser(options).fromTokenizer(tokenizer);
185
689
  }
@@ -190,25 +694,47 @@ export async function fileTypeStream(webStream, options) {
190
694
 
191
695
  export class FileTypeParser {
192
696
  constructor(options) {
697
+ const normalizedMpegOffsetTolerance = normalizeMpegOffsetTolerance(options?.mpegOffsetTolerance);
193
698
  this.options = {
194
- mpegOffsetTolerance: 0,
195
699
  ...options,
700
+ mpegOffsetTolerance: normalizedMpegOffsetTolerance,
196
701
  };
197
702
 
198
- this.detectors = [...(options?.customDetectors ?? []),
703
+ this.detectors = [...(this.options.customDetectors ?? []),
199
704
  {id: 'core', detect: this.detectConfident},
200
705
  {id: 'core.imprecise', detect: this.detectImprecise}];
201
706
  this.tokenizerOptions = {
202
- abortSignal: options?.signal,
707
+ abortSignal: this.options.signal,
203
708
  };
709
+ this.gzipProbeDepth = 0;
204
710
  }
205
711
 
206
- async fromTokenizer(tokenizer) {
207
- const initialPosition = tokenizer.position;
712
+ getTokenizerOptions() {
713
+ return {
714
+ ...this.tokenizerOptions,
715
+ };
716
+ }
208
717
 
718
+ async fromTokenizer(tokenizer, detectionReentryCount = 0) {
719
+ this.detectionReentryCount = detectionReentryCount;
720
+ const initialPosition = tokenizer.position;
209
721
  // Iterate through all file-type detectors
210
722
  for (const detector of this.detectors) {
211
- const fileType = await detector.detect(tokenizer);
723
+ let fileType;
724
+ try {
725
+ fileType = await detector.detect(tokenizer);
726
+ } catch (error) {
727
+ if (error instanceof strtok3.EndOfStreamError) {
728
+ return;
729
+ }
730
+
731
+ if (error instanceof ParserHardLimitError) {
732
+ return;
733
+ }
734
+
735
+ throw error;
736
+ }
737
+
212
738
  if (fileType) {
213
739
  return fileType;
214
740
  }
@@ -230,11 +756,11 @@ export class FileTypeParser {
230
756
  return;
231
757
  }
232
758
 
233
- return this.fromTokenizer(strtok3.fromBuffer(buffer, this.tokenizerOptions));
759
+ return this.fromTokenizer(strtok3.fromBuffer(buffer, this.getTokenizerOptions()));
234
760
  }
235
761
 
236
762
  async fromBlob(blob) {
237
- const tokenizer = strtok3.fromBlob(blob, this.tokenizerOptions);
763
+ const tokenizer = strtok3.fromBlob(blob, this.getTokenizerOptions());
238
764
  try {
239
765
  return await this.fromTokenizer(tokenizer);
240
766
  } finally {
@@ -243,7 +769,7 @@ export class FileTypeParser {
243
769
  }
244
770
 
245
771
  async fromStream(stream) {
246
- const tokenizer = strtok3.fromWebStream(stream, this.tokenizerOptions);
772
+ const tokenizer = strtok3.fromWebStream(stream, this.getTokenizerOptions());
247
773
  try {
248
774
  return await this.fromTokenizer(tokenizer);
249
775
  } finally {
@@ -252,7 +778,7 @@ export class FileTypeParser {
252
778
  }
253
779
 
254
780
  async toDetectionStream(stream, options) {
255
- const {sampleSize = reasonableDetectionSizeInBytes} = options;
781
+ const sampleSize = normalizeSampleSize(options?.sampleSize ?? reasonableDetectionSizeInBytes);
256
782
  let detectedFileType;
257
783
  let firstChunk;
258
784
 
@@ -393,8 +919,13 @@ export class FileTypeParser {
393
919
  // -- 3-byte signatures --
394
920
 
395
921
  if (this.check([0xEF, 0xBB, 0xBF])) { // UTF-8-BOM
922
+ if (this.detectionReentryCount >= maximumDetectionReentryCount) {
923
+ return;
924
+ }
925
+
926
+ this.detectionReentryCount++;
396
927
  // Strip off UTF-8-BOM
397
- this.tokenizer.ignore(3);
928
+ await this.tokenizer.ignore(3);
398
929
  return this.detectConfident(tokenizer);
399
930
  }
400
931
 
@@ -413,28 +944,35 @@ export class FileTypeParser {
413
944
  }
414
945
 
415
946
  if (this.check([0x1F, 0x8B, 0x8])) {
416
- const gzipHandler = new GzipHandler(tokenizer);
947
+ if (this.gzipProbeDepth >= maximumNestedGzipProbeDepth) {
948
+ return {
949
+ ext: 'gz',
950
+ mime: 'application/gzip',
951
+ };
952
+ }
417
953
 
418
- const stream = gzipHandler.inflate();
419
- let shouldCancelStream = true;
954
+ const gzipHandler = new GzipHandler(tokenizer);
955
+ const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes);
956
+ let compressedFileType;
420
957
  try {
421
- let compressedFileType;
422
- try {
423
- compressedFileType = await this.fromStream(stream);
424
- } catch {
425
- shouldCancelStream = false;
958
+ this.gzipProbeDepth++;
959
+ compressedFileType = await this.fromStream(limitedInflatedStream);
960
+ } catch (error) {
961
+ if (error?.name === 'AbortError') {
962
+ throw error;
426
963
  }
427
964
 
428
- if (compressedFileType && compressedFileType.ext === 'tar') {
429
- return {
430
- ext: 'tar.gz',
431
- mime: 'application/gzip',
432
- };
433
- }
965
+ // Decompression or inner-detection failures are expected for non-tar gzip files.
434
966
  } finally {
435
- if (shouldCancelStream) {
436
- await stream.cancel();
437
- }
967
+ this.gzipProbeDepth--;
968
+ }
969
+
970
+ // We only need enough inflated bytes to confidently decide whether this is tar.gz.
971
+ if (compressedFileType?.ext === 'tar') {
972
+ return {
973
+ ext: 'tar.gz',
974
+ mime: 'application/gzip',
975
+ };
438
976
  }
439
977
 
440
978
  return {
@@ -451,18 +989,54 @@ export class FileTypeParser {
451
989
  }
452
990
 
453
991
  if (this.checkString('ID3')) {
454
- await tokenizer.ignore(6); // Skip ID3 header until the header size
992
+ await safeIgnore(tokenizer, 6, {
993
+ maximumLength: 6,
994
+ reason: 'ID3 header prefix',
995
+ }); // Skip ID3 header until the header size
455
996
  const id3HeaderLength = await tokenizer.readToken(uint32SyncSafeToken);
997
+ const isUnknownFileSize = hasUnknownFileSize(tokenizer);
998
+ if (
999
+ !Number.isFinite(id3HeaderLength)
1000
+ || id3HeaderLength < 0
1001
+ // Keep ID3 probing bounded for unknown-size streams to avoid attacker-controlled large skips.
1002
+ || (
1003
+ isUnknownFileSize
1004
+ && id3HeaderLength > maximumId3HeaderSizeInBytes
1005
+ )
1006
+ ) {
1007
+ return;
1008
+ }
1009
+
456
1010
  if (tokenizer.position + id3HeaderLength > tokenizer.fileInfo.size) {
457
- // Guess file type based on ID3 header for backward compatibility
1011
+ if (isUnknownFileSize) {
1012
+ return;
1013
+ }
1014
+
458
1015
  return {
459
1016
  ext: 'mp3',
460
1017
  mime: 'audio/mpeg',
461
1018
  };
462
1019
  }
463
1020
 
464
- await tokenizer.ignore(id3HeaderLength);
465
- return this.fromTokenizer(tokenizer); // Skip ID3 header, recursion
1021
+ try {
1022
+ await safeIgnore(tokenizer, id3HeaderLength, {
1023
+ maximumLength: isUnknownFileSize ? maximumId3HeaderSizeInBytes : tokenizer.fileInfo.size,
1024
+ reason: 'ID3 payload',
1025
+ });
1026
+ } catch (error) {
1027
+ if (error instanceof strtok3.EndOfStreamError) {
1028
+ return;
1029
+ }
1030
+
1031
+ throw error;
1032
+ }
1033
+
1034
+ if (this.detectionReentryCount >= maximumDetectionReentryCount) {
1035
+ return;
1036
+ }
1037
+
1038
+ this.detectionReentryCount++;
1039
+ return this.fromTokenizer(tokenizer, this.detectionReentryCount); // Skip ID3 header, recursion
466
1040
  }
467
1041
 
468
1042
  // Musepack, SV7
@@ -547,72 +1121,104 @@ export class FileTypeParser {
547
1121
  // Need to be before the `zip` check
548
1122
  if (this.check([0x50, 0x4B, 0x3, 0x4])) { // Local file header signature
549
1123
  let fileType;
550
- await new ZipHandler(tokenizer).unzip(zipHeader => {
551
- switch (zipHeader.filename) {
552
- case 'META-INF/mozilla.rsa':
553
- fileType = {
554
- ext: 'xpi',
555
- mime: 'application/x-xpinstall',
556
- };
557
- return {
558
- stop: true,
559
- };
560
- case 'META-INF/MANIFEST.MF':
561
- fileType = {
562
- ext: 'jar',
563
- mime: 'application/java-archive',
564
- };
565
- return {
566
- stop: true,
567
- };
568
- case 'mimetype':
1124
+ const openXmlState = createOpenXmlZipDetectionState();
1125
+
1126
+ try {
1127
+ await new ZipHandler(tokenizer).unzip(zipHeader => {
1128
+ updateOpenXmlZipDetectionStateFromFilename(openXmlState, zipHeader.filename);
1129
+
1130
+ const isOpenXmlContentTypesEntry = zipHeader.filename === '[Content_Types].xml';
1131
+ const openXmlFileTypeFromEntries = getOpenXmlFileTypeFromZipEntries(openXmlState);
1132
+ if (
1133
+ !isOpenXmlContentTypesEntry
1134
+ && openXmlFileTypeFromEntries
1135
+ ) {
1136
+ fileType = openXmlFileTypeFromEntries;
569
1137
  return {
570
- async handler(fileData) {
571
- // Use TextDecoder to decode the UTF-8 encoded data
572
- const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
573
- fileType = getFileTypeFromMimeType(mimeType);
574
- },
575
1138
  stop: true,
576
1139
  };
1140
+ }
577
1141
 
578
- case '[Content_Types].xml':
579
- return {
580
- async handler(fileData) {
581
- // Use TextDecoder to decode the UTF-8 encoded data
582
- let xmlContent = new TextDecoder('utf-8').decode(fileData);
583
- const endPos = xmlContent.indexOf('.main+xml"');
584
- if (endPos === -1) {
585
- const mimeType = 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml';
586
- if (xmlContent.includes(`ContentType="${mimeType}"`)) {
1142
+ switch (zipHeader.filename) {
1143
+ case 'META-INF/mozilla.rsa':
1144
+ fileType = {
1145
+ ext: 'xpi',
1146
+ mime: 'application/x-xpinstall',
1147
+ };
1148
+ return {
1149
+ stop: true,
1150
+ };
1151
+ case 'META-INF/MANIFEST.MF':
1152
+ fileType = {
1153
+ ext: 'jar',
1154
+ mime: 'application/java-archive',
1155
+ };
1156
+ return {
1157
+ stop: true,
1158
+ };
1159
+ case 'mimetype':
1160
+ if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
1161
+ return {};
1162
+ }
1163
+
1164
+ return {
1165
+ async handler(fileData) {
1166
+ // Use TextDecoder to decode the UTF-8 encoded data
1167
+ const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
1168
+ fileType = getFileTypeFromMimeType(mimeType);
1169
+ },
1170
+ stop: true,
1171
+ };
1172
+
1173
+ case '[Content_Types].xml': {
1174
+ openXmlState.hasContentTypesEntry = true;
1175
+
1176
+ if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
1177
+ openXmlState.hasUnparseableContentTypes = true;
1178
+ return {};
1179
+ }
1180
+
1181
+ openXmlState.isParsingContentTypes = true;
1182
+ return {
1183
+ async handler(fileData) {
1184
+ // Use TextDecoder to decode the UTF-8 encoded data
1185
+ const xmlContent = new TextDecoder('utf-8').decode(fileData);
1186
+ const mimeType = getOpenXmlMimeTypeFromContentTypesXml(xmlContent);
1187
+ if (mimeType) {
587
1188
  fileType = getFileTypeFromMimeType(mimeType);
588
1189
  }
589
- } else {
590
- xmlContent = xmlContent.slice(0, Math.max(0, endPos));
591
- const firstPos = xmlContent.lastIndexOf('"');
592
- const mimeType = xmlContent.slice(Math.max(0, firstPos + 1));
593
- fileType = getFileTypeFromMimeType(mimeType);
594
- }
595
- },
596
- stop: true,
597
- };
598
- default:
599
- if (/classes\d*\.dex/.test(zipHeader.filename)) {
600
- fileType = {
601
- ext: 'apk',
602
- mime: 'application/vnd.android.package-archive',
1190
+
1191
+ openXmlState.hasParsedContentTypesEntry = true;
1192
+ openXmlState.isParsingContentTypes = false;
1193
+ },
1194
+ stop: true,
603
1195
  };
604
- return {stop: true};
605
1196
  }
606
1197
 
607
- return {};
1198
+ default:
1199
+ if (/classes\d*\.dex/.test(zipHeader.filename)) {
1200
+ fileType = {
1201
+ ext: 'apk',
1202
+ mime: 'application/vnd.android.package-archive',
1203
+ };
1204
+ return {stop: true};
1205
+ }
1206
+
1207
+ return {};
1208
+ }
1209
+ });
1210
+ } catch (error) {
1211
+ if (!isRecoverableZipError(error)) {
1212
+ throw error;
608
1213
  }
609
- }).catch(error => {
610
- if (!(error instanceof strtok3.EndOfStreamError)) {
611
- throw error; // Re-throw non-EndOfStreamError
1214
+
1215
+ if (openXmlState.isParsingContentTypes) {
1216
+ openXmlState.isParsingContentTypes = false;
1217
+ openXmlState.hasUnparseableContentTypes = true;
612
1218
  }
613
- });
1219
+ }
614
1220
 
615
- return fileType ?? {
1221
+ return fileType ?? getOpenXmlFileTypeFromZipEntries(openXmlState) ?? {
616
1222
  ext: 'zip',
617
1223
  mime: 'application/zip',
618
1224
  };
@@ -817,7 +1423,10 @@ export class FileTypeParser {
817
1423
  }
818
1424
 
819
1425
  const id = new Uint8Array(ic + 1);
820
- await tokenizer.readBuffer(id);
1426
+ await safeReadBuffer(tokenizer, id, undefined, {
1427
+ maximumLength: id.length,
1428
+ reason: 'EBML field',
1429
+ });
821
1430
  return id;
822
1431
  }
823
1432
 
@@ -838,20 +1447,53 @@ export class FileTypeParser {
838
1447
  }
839
1448
 
840
1449
  async function readChildren(children) {
1450
+ let ebmlElementCount = 0;
841
1451
  while (children > 0) {
1452
+ ebmlElementCount++;
1453
+ if (ebmlElementCount > maximumEbmlElementCount) {
1454
+ return;
1455
+ }
1456
+
1457
+ const previousPosition = tokenizer.position;
842
1458
  const element = await readElement();
1459
+
843
1460
  if (element.id === 0x42_82) {
844
- const rawValue = await tokenizer.readToken(new Token.StringType(element.len));
1461
+ // `DocType` is a short string ("webm", "matroska", ...), reject implausible lengths to avoid large allocations.
1462
+ if (element.len > maximumEbmlDocumentTypeSizeInBytes) {
1463
+ return;
1464
+ }
1465
+
1466
+ const documentTypeLength = getSafeBound(element.len, maximumEbmlDocumentTypeSizeInBytes, 'EBML DocType');
1467
+ const rawValue = await tokenizer.readToken(new Token.StringType(documentTypeLength));
845
1468
  return rawValue.replaceAll(/\00.*$/g, ''); // Return DocType
846
1469
  }
847
1470
 
848
- await tokenizer.ignore(element.len); // ignore payload
1471
+ if (
1472
+ hasUnknownFileSize(tokenizer)
1473
+ && (
1474
+ !Number.isFinite(element.len)
1475
+ || element.len < 0
1476
+ || element.len > maximumEbmlElementPayloadSizeInBytes
1477
+ )
1478
+ ) {
1479
+ return;
1480
+ }
1481
+
1482
+ await safeIgnore(tokenizer, element.len, {
1483
+ maximumLength: hasUnknownFileSize(tokenizer) ? maximumEbmlElementPayloadSizeInBytes : tokenizer.fileInfo.size,
1484
+ reason: 'EBML payload',
1485
+ }); // ignore payload
849
1486
  --children;
1487
+
1488
+ // Safeguard against malformed files: bail if the position did not advance.
1489
+ if (tokenizer.position <= previousPosition) {
1490
+ return;
1491
+ }
850
1492
  }
851
1493
  }
852
1494
 
853
- const re = await readElement();
854
- const documentType = await readChildren(re.len);
1495
+ const rootElement = await readElement();
1496
+ const documentType = await readChildren(rootElement.len);
855
1497
 
856
1498
  switch (documentType) {
857
1499
  case 'webm':
@@ -1203,6 +1845,16 @@ export class FileTypeParser {
1203
1845
  // -- 8-byte signatures --
1204
1846
 
1205
1847
  if (this.check([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])) {
1848
+ const pngFileType = {
1849
+ ext: 'png',
1850
+ mime: 'image/png',
1851
+ };
1852
+
1853
+ const apngFileType = {
1854
+ ext: 'apng',
1855
+ mime: 'image/apng',
1856
+ };
1857
+
1206
1858
  // APNG format (https://wiki.mozilla.org/APNG_Specification)
1207
1859
  // 1. Find the first IDAT (image data) chunk (49 44 41 54)
1208
1860
  // 2. Check if there is an "acTL" chunk before the IDAT one (61 63 54 4C)
@@ -1220,7 +1872,20 @@ export class FileTypeParser {
1220
1872
  };
1221
1873
  }
1222
1874
 
1875
+ const isUnknownPngStream = hasUnknownFileSize(tokenizer);
1876
+ const pngScanStart = tokenizer.position;
1877
+ let pngChunkCount = 0;
1223
1878
  do {
1879
+ pngChunkCount++;
1880
+ if (pngChunkCount > maximumPngChunkCount) {
1881
+ break;
1882
+ }
1883
+
1884
+ if (hasExceededUnknownSizeScanBudget(tokenizer, pngScanStart, maximumPngChunkSizeInBytes)) {
1885
+ break;
1886
+ }
1887
+
1888
+ const previousPosition = tokenizer.position;
1224
1889
  const chunk = await readChunkHeader();
1225
1890
  if (chunk.length < 0) {
1226
1891
  return; // Invalid chunk length
@@ -1228,24 +1893,45 @@ export class FileTypeParser {
1228
1893
 
1229
1894
  switch (chunk.type) {
1230
1895
  case 'IDAT':
1231
- return {
1232
- ext: 'png',
1233
- mime: 'image/png',
1234
- };
1896
+ return pngFileType;
1235
1897
  case 'acTL':
1236
- return {
1237
- ext: 'apng',
1238
- mime: 'image/apng',
1239
- };
1898
+ return apngFileType;
1240
1899
  default:
1241
- await tokenizer.ignore(chunk.length + 4); // Ignore chunk-data + CRC
1900
+ if (
1901
+ isUnknownPngStream
1902
+ && chunk.length > maximumPngChunkSizeInBytes
1903
+ ) {
1904
+ // Avoid huge attacker-controlled skips when probing unknown-size streams.
1905
+ return;
1906
+ }
1907
+
1908
+ try {
1909
+ await safeIgnore(tokenizer, chunk.length + 4, {
1910
+ maximumLength: isUnknownPngStream ? maximumPngChunkSizeInBytes + 4 : tokenizer.fileInfo.size,
1911
+ reason: 'PNG chunk payload',
1912
+ }); // Ignore chunk-data + CRC
1913
+ } catch (error) {
1914
+ if (
1915
+ !isUnknownPngStream
1916
+ && (
1917
+ error instanceof ParserHardLimitError
1918
+ || error instanceof strtok3.EndOfStreamError
1919
+ )
1920
+ ) {
1921
+ return pngFileType;
1922
+ }
1923
+
1924
+ throw error;
1925
+ }
1926
+ }
1927
+
1928
+ // Safeguard against malformed files: bail if the position did not advance.
1929
+ if (tokenizer.position <= previousPosition) {
1930
+ break;
1242
1931
  }
1243
1932
  } while (tokenizer.position + 8 < tokenizer.fileInfo.size);
1244
1933
 
1245
- return {
1246
- ext: 'png',
1247
- mime: 'image/png',
1248
- };
1934
+ return pngFileType;
1249
1935
  }
1250
1936
 
1251
1937
  if (this.check([0x41, 0x52, 0x52, 0x4F, 0x57, 0x31, 0x00, 0x00])) {
@@ -1403,45 +2089,101 @@ export class FileTypeParser {
1403
2089
 
1404
2090
  // ASF_Header_Object first 80 bytes
1405
2091
  if (this.check([0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11, 0xA6, 0xD9])) {
1406
- async function readHeader() {
1407
- const guid = new Uint8Array(16);
1408
- await tokenizer.readBuffer(guid);
1409
- return {
1410
- id: guid,
1411
- size: Number(await tokenizer.readToken(Token.UINT64_LE)),
1412
- };
1413
- }
2092
+ let isMalformedAsf = false;
2093
+ try {
2094
+ async function readHeader() {
2095
+ const guid = new Uint8Array(16);
2096
+ await safeReadBuffer(tokenizer, guid, undefined, {
2097
+ maximumLength: guid.length,
2098
+ reason: 'ASF header GUID',
2099
+ });
2100
+ return {
2101
+ id: guid,
2102
+ size: Number(await tokenizer.readToken(Token.UINT64_LE)),
2103
+ };
2104
+ }
1414
2105
 
1415
- await tokenizer.ignore(30);
1416
- // Search for header should be in first 1KB of file.
1417
- while (tokenizer.position + 24 < tokenizer.fileInfo.size) {
1418
- const header = await readHeader();
1419
- let payload = header.size - 24;
1420
- if (_check(header.id, [0x91, 0x07, 0xDC, 0xB7, 0xB7, 0xA9, 0xCF, 0x11, 0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65])) {
1421
- // Sync on Stream-Properties-Object (B7DC0791-A9B7-11CF-8EE6-00C00C205365)
1422
- const typeId = new Uint8Array(16);
1423
- payload -= await tokenizer.readBuffer(typeId);
2106
+ await safeIgnore(tokenizer, 30, {
2107
+ maximumLength: 30,
2108
+ reason: 'ASF header prelude',
2109
+ });
2110
+ const isUnknownFileSize = hasUnknownFileSize(tokenizer);
2111
+ const asfHeaderScanStart = tokenizer.position;
2112
+ let asfHeaderObjectCount = 0;
2113
+ while (tokenizer.position + 24 < tokenizer.fileInfo.size) {
2114
+ asfHeaderObjectCount++;
2115
+ if (asfHeaderObjectCount > maximumAsfHeaderObjectCount) {
2116
+ break;
2117
+ }
1424
2118
 
1425
- if (_check(typeId, [0x40, 0x9E, 0x69, 0xF8, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
1426
- // Found audio:
1427
- return {
1428
- ext: 'asf',
1429
- mime: 'audio/x-ms-asf',
1430
- };
2119
+ if (hasExceededUnknownSizeScanBudget(tokenizer, asfHeaderScanStart, maximumUntrustedSkipSizeInBytes)) {
2120
+ break;
1431
2121
  }
1432
2122
 
1433
- if (_check(typeId, [0xC0, 0xEF, 0x19, 0xBC, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
1434
- // Found video:
1435
- return {
1436
- ext: 'asf',
1437
- mime: 'video/x-ms-asf',
1438
- };
2123
+ const previousPosition = tokenizer.position;
2124
+ const header = await readHeader();
2125
+ let payload = header.size - 24;
2126
+ if (
2127
+ !Number.isFinite(payload)
2128
+ || payload < 0
2129
+ ) {
2130
+ isMalformedAsf = true;
2131
+ break;
1439
2132
  }
1440
2133
 
1441
- break;
2134
+ if (_check(header.id, [0x91, 0x07, 0xDC, 0xB7, 0xB7, 0xA9, 0xCF, 0x11, 0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65])) {
2135
+ // Sync on Stream-Properties-Object (B7DC0791-A9B7-11CF-8EE6-00C00C205365)
2136
+ const typeId = new Uint8Array(16);
2137
+ payload -= await safeReadBuffer(tokenizer, typeId, undefined, {
2138
+ maximumLength: typeId.length,
2139
+ reason: 'ASF stream type GUID',
2140
+ });
2141
+
2142
+ if (_check(typeId, [0x40, 0x9E, 0x69, 0xF8, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
2143
+ // Found audio:
2144
+ return {
2145
+ ext: 'asf',
2146
+ mime: 'audio/x-ms-asf',
2147
+ };
2148
+ }
2149
+
2150
+ if (_check(typeId, [0xC0, 0xEF, 0x19, 0xBC, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
2151
+ // Found video:
2152
+ return {
2153
+ ext: 'asf',
2154
+ mime: 'video/x-ms-asf',
2155
+ };
2156
+ }
2157
+
2158
+ break;
2159
+ }
2160
+
2161
+ await safeIgnore(tokenizer, payload, {
2162
+ maximumLength: isUnknownFileSize ? maximumUntrustedSkipSizeInBytes : tokenizer.fileInfo.size,
2163
+ reason: 'ASF header payload',
2164
+ });
2165
+
2166
+ // Safeguard against malformed files: break if the position did not advance.
2167
+ if (tokenizer.position <= previousPosition) {
2168
+ isMalformedAsf = true;
2169
+ break;
2170
+ }
1442
2171
  }
2172
+ } catch (error) {
2173
+ if (
2174
+ error instanceof strtok3.EndOfStreamError
2175
+ || error instanceof ParserHardLimitError
2176
+ ) {
2177
+ if (hasUnknownFileSize(tokenizer)) {
2178
+ isMalformedAsf = true;
2179
+ }
2180
+ } else {
2181
+ throw error;
2182
+ }
2183
+ }
1443
2184
 
1444
- await tokenizer.ignore(payload);
2185
+ if (isMalformedAsf) {
2186
+ return;
1445
2187
  }
1446
2188
 
1447
2189
  // Default to ASF generic extension
@@ -1760,9 +2502,10 @@ export class FileTypeParser {
1760
2502
  // Detections with limited supporting data, resulting in a higher likelihood of false positives
1761
2503
  detectImprecise = async tokenizer => {
1762
2504
  this.buffer = new Uint8Array(reasonableDetectionSizeInBytes);
2505
+ const fileSize = getKnownFileSizeOrMaximum(tokenizer.fileInfo.size);
1763
2506
 
1764
2507
  // Read initial sample size of 8 bytes
1765
- await tokenizer.peekBuffer(this.buffer, {length: Math.min(8, tokenizer.fileInfo.size), mayBeLess: true});
2508
+ await tokenizer.peekBuffer(this.buffer, {length: Math.min(8, fileSize), mayBeLess: true});
1766
2509
 
1767
2510
  if (
1768
2511
  this.check([0x0, 0x0, 0x1, 0xBA])
@@ -1796,7 +2539,7 @@ export class FileTypeParser {
1796
2539
  }
1797
2540
 
1798
2541
  // Adjust buffer to `mpegOffsetTolerance`
1799
- await tokenizer.peekBuffer(this.buffer, {length: Math.min(2 + this.options.mpegOffsetTolerance, tokenizer.fileInfo.size), mayBeLess: true});
2542
+ await tokenizer.peekBuffer(this.buffer, {length: Math.min(2 + this.options.mpegOffsetTolerance, fileSize), mayBeLess: true});
1800
2543
 
1801
2544
  // Check MPEG 1 or 2 Layer 3 header, or 'layer 0' for ADTS (MPEG sync-word 0xFFE)
1802
2545
  if (this.buffer.length >= (2 + this.options.mpegOffsetTolerance)) {
@@ -1811,7 +2554,7 @@ export class FileTypeParser {
1811
2554
 
1812
2555
  async readTiffTag(bigEndian) {
1813
2556
  const tagId = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
1814
- this.tokenizer.ignore(10);
2557
+ await this.tokenizer.ignore(10);
1815
2558
  switch (tagId) {
1816
2559
  case 50_341:
1817
2560
  return {
@@ -1829,6 +2572,17 @@ export class FileTypeParser {
1829
2572
 
1830
2573
  async readTiffIFD(bigEndian) {
1831
2574
  const numberOfTags = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
2575
+ if (numberOfTags > maximumTiffTagCount) {
2576
+ return;
2577
+ }
2578
+
2579
+ if (
2580
+ hasUnknownFileSize(this.tokenizer)
2581
+ && (2 + (numberOfTags * 12)) > maximumTiffIfdOffsetInBytes
2582
+ ) {
2583
+ return;
2584
+ }
2585
+
1832
2586
  for (let n = 0; n < numberOfTags; ++n) {
1833
2587
  const fileType = await this.readTiffTag(bigEndian);
1834
2588
  if (fileType) {
@@ -1838,6 +2592,11 @@ export class FileTypeParser {
1838
2592
  }
1839
2593
 
1840
2594
  async readTiffHeader(bigEndian) {
2595
+ const tiffFileType = {
2596
+ ext: 'tif',
2597
+ mime: 'image/tiff',
2598
+ };
2599
+
1841
2600
  const version = (bigEndian ? Token.UINT16_BE : Token.UINT16_LE).get(this.buffer, 2);
1842
2601
  const ifdOffset = (bigEndian ? Token.UINT32_BE : Token.UINT32_LE).get(this.buffer, 4);
1843
2602
 
@@ -1866,19 +2625,37 @@ export class FileTypeParser {
1866
2625
  }
1867
2626
  }
1868
2627
 
1869
- await this.tokenizer.ignore(ifdOffset);
1870
- const fileType = await this.readTiffIFD(bigEndian);
1871
- return fileType ?? {
1872
- ext: 'tif',
1873
- mime: 'image/tiff',
1874
- };
2628
+ const maximumTiffOffset = hasUnknownFileSize(this.tokenizer) ? maximumTiffIfdOffsetInBytes : this.tokenizer.fileInfo.size;
2629
+
2630
+ try {
2631
+ await safeIgnore(this.tokenizer, ifdOffset, {
2632
+ maximumLength: maximumTiffOffset,
2633
+ reason: 'TIFF IFD offset',
2634
+ });
2635
+ } catch (error) {
2636
+ if (error instanceof strtok3.EndOfStreamError) {
2637
+ return;
2638
+ }
2639
+
2640
+ throw error;
2641
+ }
2642
+
2643
+ let fileType;
2644
+ try {
2645
+ fileType = await this.readTiffIFD(bigEndian);
2646
+ } catch (error) {
2647
+ if (error instanceof strtok3.EndOfStreamError) {
2648
+ return;
2649
+ }
2650
+
2651
+ throw error;
2652
+ }
2653
+
2654
+ return fileType ?? tiffFileType;
1875
2655
  }
1876
2656
 
1877
2657
  if (version === 43) { // Big TIFF file header
1878
- return {
1879
- ext: 'tif',
1880
- mime: 'image/tiff',
1881
- };
2658
+ return tiffFileType;
1882
2659
  }
1883
2660
  }
1884
2661