file-type 21.3.0 → 21.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/core.js +709 -145
  2. package/index.js +26 -2
  3. package/package.json +4 -4
  4. package/readme.md +4 -3
package/core.js CHANGED
@@ -14,6 +14,169 @@ import {
14
14
  import {extensions, mimeTypes} from './supported.js';
15
15
 
16
16
  export const reasonableDetectionSizeInBytes = 4100; // A fair amount of file-types are detectable within this range.
17
+ // Keep defensive limits small enough to avoid accidental memory spikes from untrusted inputs.
18
+ const maximumMpegOffsetTolerance = reasonableDetectionSizeInBytes - 2;
19
+ const maximumZipEntrySizeInBytes = 1024 * 1024;
20
+ const maximumUntrustedSkipSizeInBytes = 16 * 1024 * 1024;
21
+ const maximumNestedGzipDetectionSizeInBytes = maximumUntrustedSkipSizeInBytes;
22
+ const maximumId3HeaderSizeInBytes = maximumUntrustedSkipSizeInBytes;
23
+ const maximumEbmlDocumentTypeSizeInBytes = 64;
24
+ const maximumEbmlElementPayloadSizeInBytes = maximumUntrustedSkipSizeInBytes;
25
+ const maximumEbmlElementCount = 256;
26
+ const maximumPngChunkSizeInBytes = maximumUntrustedSkipSizeInBytes;
27
+ const maximumTiffIfdOffsetInBytes = maximumUntrustedSkipSizeInBytes;
28
+ const recoverableZipErrorMessages = new Set([
29
+ 'Unexpected signature',
30
+ 'Encrypted ZIP',
31
+ 'Expected Central-File-Header signature',
32
+ ]);
33
+ const recoverableZipErrorMessagePrefixes = [
34
+ 'Unsupported ZIP compression method:',
35
+ 'ZIP entry decompressed data exceeds ',
36
+ ];
37
+ const recoverableZipErrorCodes = new Set([
38
+ 'Z_BUF_ERROR',
39
+ 'Z_DATA_ERROR',
40
+ 'ERR_INVALID_STATE',
41
+ ]);
42
+
43
+ class ParserHardLimitError extends Error {}
44
+
45
+ function getSafeBound(value, maximum, reason) {
46
+ if (
47
+ !Number.isFinite(value)
48
+ || value < 0
49
+ || value > maximum
50
+ ) {
51
+ throw new ParserHardLimitError(`${reason} has invalid size ${value} (maximum ${maximum} bytes)`);
52
+ }
53
+
54
+ return value;
55
+ }
56
+
57
+ async function safeIgnore(tokenizer, length, {maximumLength = maximumUntrustedSkipSizeInBytes, reason = 'skip'} = {}) {
58
+ const safeLength = getSafeBound(length, maximumLength, reason);
59
+ await tokenizer.ignore(safeLength);
60
+ }
61
+
62
+ async function safeReadBuffer(tokenizer, buffer, options, {maximumLength = buffer.length, reason = 'read'} = {}) {
63
+ const length = options?.length ?? buffer.length;
64
+ const safeLength = getSafeBound(length, maximumLength, reason);
65
+ return tokenizer.readBuffer(buffer, {
66
+ ...options,
67
+ length: safeLength,
68
+ });
69
+ }
70
+
71
+ async function decompressDeflateRawWithLimit(data, {maximumLength = maximumZipEntrySizeInBytes} = {}) {
72
+ const input = new ReadableStream({
73
+ start(controller) {
74
+ controller.enqueue(data);
75
+ controller.close();
76
+ },
77
+ });
78
+ const output = input.pipeThrough(new DecompressionStream('deflate-raw'));
79
+ const reader = output.getReader();
80
+ const chunks = [];
81
+ let totalLength = 0;
82
+
83
+ try {
84
+ for (;;) {
85
+ const {done, value} = await reader.read();
86
+ if (done) {
87
+ break;
88
+ }
89
+
90
+ totalLength += value.length;
91
+ if (totalLength > maximumLength) {
92
+ await reader.cancel();
93
+ throw new Error(`ZIP entry decompressed data exceeds ${maximumLength} bytes`);
94
+ }
95
+
96
+ chunks.push(value);
97
+ }
98
+ } finally {
99
+ reader.releaseLock();
100
+ }
101
+
102
+ const uncompressedData = new Uint8Array(totalLength);
103
+ let offset = 0;
104
+ for (const chunk of chunks) {
105
+ uncompressedData.set(chunk, offset);
106
+ offset += chunk.length;
107
+ }
108
+
109
+ return uncompressedData;
110
+ }
111
+
112
+ // Override the default inflate to enforce decompression size limits, since @tokenizer/inflate does not expose a configuration hook for this.
113
+ ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
114
+ if (zipHeader.compressedMethod === 0) {
115
+ return callback(fileData);
116
+ }
117
+
118
+ if (zipHeader.compressedMethod !== 8) {
119
+ throw new Error(`Unsupported ZIP compression method: ${zipHeader.compressedMethod}`);
120
+ }
121
+
122
+ const maximumLength = hasUnknownFileSize(this.tokenizer) ? maximumZipEntrySizeInBytes : Number.MAX_SAFE_INTEGER;
123
+ const uncompressedData = await decompressDeflateRawWithLimit(fileData, {maximumLength});
124
+ return callback(uncompressedData);
125
+ };
126
+
127
+ function createByteLimitedReadableStream(stream, maximumBytes) {
128
+ const reader = stream.getReader();
129
+ let emittedBytes = 0;
130
+ let sourceDone = false;
131
+ let sourceCanceled = false;
132
+
133
+ const cancelSource = async reason => {
134
+ if (
135
+ sourceDone
136
+ || sourceCanceled
137
+ ) {
138
+ return;
139
+ }
140
+
141
+ sourceCanceled = true;
142
+ await reader.cancel(reason);
143
+ };
144
+
145
+ return new ReadableStream({
146
+ async pull(controller) {
147
+ if (emittedBytes >= maximumBytes) {
148
+ controller.close();
149
+ await cancelSource();
150
+ return;
151
+ }
152
+
153
+ const {done, value} = await reader.read();
154
+ if (
155
+ done
156
+ || !value
157
+ ) {
158
+ sourceDone = true;
159
+ controller.close();
160
+ return;
161
+ }
162
+
163
+ const remainingBytes = maximumBytes - emittedBytes;
164
+ if (value.length > remainingBytes) {
165
+ controller.enqueue(value.subarray(0, remainingBytes));
166
+ emittedBytes += remainingBytes;
167
+ controller.close();
168
+ await cancelSource();
169
+ return;
170
+ }
171
+
172
+ controller.enqueue(value);
173
+ emittedBytes += value.length;
174
+ },
175
+ async cancel(reason) {
176
+ await cancelSource(reason);
177
+ },
178
+ });
179
+ }
17
180
 
18
181
  export async function fileTypeFromStream(stream, options) {
19
182
  return new FileTypeParser(options).fromStream(stream);
@@ -180,6 +343,189 @@ function _check(buffer, headers, options) {
180
343
  return true;
181
344
  }
182
345
 
346
+ export function normalizeSampleSize(sampleSize) {
347
+ // Accept odd caller input, but preserve valid caller-requested probe depth.
348
+ if (!Number.isFinite(sampleSize)) {
349
+ return reasonableDetectionSizeInBytes;
350
+ }
351
+
352
+ return Math.max(1, Math.trunc(sampleSize));
353
+ }
354
+
355
+ function normalizeMpegOffsetTolerance(mpegOffsetTolerance) {
356
+ // This value controls scan depth and therefore worst-case CPU work.
357
+ if (!Number.isFinite(mpegOffsetTolerance)) {
358
+ return 0;
359
+ }
360
+
361
+ return Math.max(0, Math.min(maximumMpegOffsetTolerance, Math.trunc(mpegOffsetTolerance)));
362
+ }
363
+
364
+ function getKnownFileSizeOrMaximum(fileSize) {
365
+ if (!Number.isFinite(fileSize)) {
366
+ return Number.MAX_SAFE_INTEGER;
367
+ }
368
+
369
+ return Math.max(0, fileSize);
370
+ }
371
+
372
+ function hasUnknownFileSize(tokenizer) {
373
+ const fileSize = tokenizer.fileInfo.size;
374
+ return (
375
+ !Number.isFinite(fileSize)
376
+ || fileSize === Number.MAX_SAFE_INTEGER
377
+ );
378
+ }
379
+
380
+ function hasExceededUnknownSizeScanBudget(tokenizer, startOffset, maximumBytes) {
381
+ return (
382
+ hasUnknownFileSize(tokenizer)
383
+ && tokenizer.position - startOffset > maximumBytes
384
+ );
385
+ }
386
+
387
+ function isRecoverableZipError(error) {
388
+ if (error instanceof strtok3.EndOfStreamError) {
389
+ return true;
390
+ }
391
+
392
+ if (error instanceof ParserHardLimitError) {
393
+ return true;
394
+ }
395
+
396
+ if (!(error instanceof Error)) {
397
+ return false;
398
+ }
399
+
400
+ if (recoverableZipErrorMessages.has(error.message)) {
401
+ return true;
402
+ }
403
+
404
+ if (
405
+ error instanceof TypeError
406
+ && recoverableZipErrorCodes.has(error.code)
407
+ ) {
408
+ return true;
409
+ }
410
+
411
+ for (const prefix of recoverableZipErrorMessagePrefixes) {
412
+ if (error.message.startsWith(prefix)) {
413
+ return true;
414
+ }
415
+ }
416
+
417
+ return false;
418
+ }
419
+
420
+ function canReadZipEntryForDetection(zipHeader, maximumSize = maximumZipEntrySizeInBytes) {
421
+ const sizes = [zipHeader.compressedSize, zipHeader.uncompressedSize];
422
+ for (const size of sizes) {
423
+ if (
424
+ !Number.isFinite(size)
425
+ || size < 0
426
+ || size > maximumSize
427
+ ) {
428
+ return false;
429
+ }
430
+ }
431
+
432
+ return true;
433
+ }
434
+
435
+ function createOpenXmlZipDetectionState() {
436
+ return {
437
+ hasContentTypesEntry: false,
438
+ hasParsedContentTypesEntry: false,
439
+ isParsingContentTypes: false,
440
+ hasUnparseableContentTypes: false,
441
+ hasWordDirectory: false,
442
+ hasPresentationDirectory: false,
443
+ hasSpreadsheetDirectory: false,
444
+ hasThreeDimensionalModelEntry: false,
445
+ };
446
+ }
447
+
448
+ function updateOpenXmlZipDetectionStateFromFilename(openXmlState, filename) {
449
+ if (filename.startsWith('word/')) {
450
+ openXmlState.hasWordDirectory = true;
451
+ }
452
+
453
+ if (filename.startsWith('ppt/')) {
454
+ openXmlState.hasPresentationDirectory = true;
455
+ }
456
+
457
+ if (filename.startsWith('xl/')) {
458
+ openXmlState.hasSpreadsheetDirectory = true;
459
+ }
460
+
461
+ if (
462
+ filename.startsWith('3D/')
463
+ && filename.endsWith('.model')
464
+ ) {
465
+ openXmlState.hasThreeDimensionalModelEntry = true;
466
+ }
467
+ }
468
+
469
+ function getOpenXmlFileTypeFromZipEntries(openXmlState) {
470
+ // Only use directory-name heuristic when [Content_Types].xml was present in the archive
471
+ // but its handler was skipped (not invoked, not currently running, and not already resolved).
472
+ // This avoids guessing from directory names when content-type parsing already gave a definitive answer or failed.
473
+ if (
474
+ !openXmlState.hasContentTypesEntry
475
+ || openXmlState.hasUnparseableContentTypes
476
+ || openXmlState.isParsingContentTypes
477
+ || openXmlState.hasParsedContentTypesEntry
478
+ ) {
479
+ return;
480
+ }
481
+
482
+ if (openXmlState.hasWordDirectory) {
483
+ return {
484
+ ext: 'docx',
485
+ mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
486
+ };
487
+ }
488
+
489
+ if (openXmlState.hasPresentationDirectory) {
490
+ return {
491
+ ext: 'pptx',
492
+ mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
493
+ };
494
+ }
495
+
496
+ if (openXmlState.hasSpreadsheetDirectory) {
497
+ return {
498
+ ext: 'xlsx',
499
+ mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
500
+ };
501
+ }
502
+
503
+ if (openXmlState.hasThreeDimensionalModelEntry) {
504
+ return {
505
+ ext: '3mf',
506
+ mime: 'model/3mf',
507
+ };
508
+ }
509
+ }
510
+
511
+ function getOpenXmlMimeTypeFromContentTypesXml(xmlContent) {
512
+ // We only need the `ContentType="...main+xml"` value, so a small string scan is enough and avoids full XML parsing.
513
+ const endPosition = xmlContent.indexOf('.main+xml"');
514
+ if (endPosition === -1) {
515
+ const mimeType = 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml';
516
+ if (xmlContent.includes(`ContentType="${mimeType}"`)) {
517
+ return mimeType;
518
+ }
519
+
520
+ return;
521
+ }
522
+
523
+ const truncatedContent = xmlContent.slice(0, endPosition);
524
+ const firstQuotePosition = truncatedContent.lastIndexOf('"');
525
+ // If no quote is found, `lastIndexOf` returns -1 and this intentionally falls back to the full truncated prefix.
526
+ return truncatedContent.slice(firstQuotePosition + 1);
527
+ }
528
+
183
529
  export async function fileTypeFromTokenizer(tokenizer, options) {
184
530
  return new FileTypeParser(options).fromTokenizer(tokenizer);
185
531
  }
@@ -190,25 +536,39 @@ export async function fileTypeStream(webStream, options) {
190
536
 
191
537
  export class FileTypeParser {
192
538
  constructor(options) {
539
+ const normalizedMpegOffsetTolerance = normalizeMpegOffsetTolerance(options?.mpegOffsetTolerance);
193
540
  this.options = {
194
- mpegOffsetTolerance: 0,
195
541
  ...options,
542
+ mpegOffsetTolerance: normalizedMpegOffsetTolerance,
196
543
  };
197
544
 
198
- this.detectors = [...(options?.customDetectors ?? []),
545
+ this.detectors = [...(this.options.customDetectors ?? []),
199
546
  {id: 'core', detect: this.detectConfident},
200
547
  {id: 'core.imprecise', detect: this.detectImprecise}];
201
548
  this.tokenizerOptions = {
202
- abortSignal: options?.signal,
549
+ abortSignal: this.options.signal,
203
550
  };
204
551
  }
205
552
 
206
553
  async fromTokenizer(tokenizer) {
207
554
  const initialPosition = tokenizer.position;
208
-
209
555
  // Iterate through all file-type detectors
210
556
  for (const detector of this.detectors) {
211
- const fileType = await detector.detect(tokenizer);
557
+ let fileType;
558
+ try {
559
+ fileType = await detector.detect(tokenizer);
560
+ } catch (error) {
561
+ if (error instanceof strtok3.EndOfStreamError) {
562
+ return;
563
+ }
564
+
565
+ if (error instanceof ParserHardLimitError) {
566
+ return;
567
+ }
568
+
569
+ throw error;
570
+ }
571
+
212
572
  if (fileType) {
213
573
  return fileType;
214
574
  }
@@ -252,7 +612,7 @@ export class FileTypeParser {
252
612
  }
253
613
 
254
614
  async toDetectionStream(stream, options) {
255
- const {sampleSize = reasonableDetectionSizeInBytes} = options;
615
+ const sampleSize = normalizeSampleSize(options?.sampleSize ?? reasonableDetectionSizeInBytes);
256
616
  let detectedFileType;
257
617
  let firstChunk;
258
618
 
@@ -394,7 +754,7 @@ export class FileTypeParser {
394
754
 
395
755
  if (this.check([0xEF, 0xBB, 0xBF])) { // UTF-8-BOM
396
756
  // Strip off UTF-8-BOM
397
- this.tokenizer.ignore(3);
757
+ await this.tokenizer.ignore(3);
398
758
  return this.detectConfident(tokenizer);
399
759
  }
400
760
 
@@ -414,27 +774,24 @@ export class FileTypeParser {
414
774
 
415
775
  if (this.check([0x1F, 0x8B, 0x8])) {
416
776
  const gzipHandler = new GzipHandler(tokenizer);
417
-
418
- const stream = gzipHandler.inflate();
419
- let shouldCancelStream = true;
777
+ const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes);
778
+ let compressedFileType;
420
779
  try {
421
- let compressedFileType;
422
- try {
423
- compressedFileType = await this.fromStream(stream);
424
- } catch {
425
- shouldCancelStream = false;
780
+ compressedFileType = await this.fromStream(limitedInflatedStream);
781
+ } catch (error) {
782
+ if (error?.name === 'AbortError') {
783
+ throw error;
426
784
  }
427
785
 
428
- if (compressedFileType && compressedFileType.ext === 'tar') {
429
- return {
430
- ext: 'tar.gz',
431
- mime: 'application/gzip',
432
- };
433
- }
434
- } finally {
435
- if (shouldCancelStream) {
436
- await stream.cancel();
437
- }
786
+ // Decompression or inner-detection failures are expected for non-tar gzip files.
787
+ }
788
+
789
+ // We only need enough inflated bytes to confidently decide whether this is tar.gz.
790
+ if (compressedFileType?.ext === 'tar') {
791
+ return {
792
+ ext: 'tar.gz',
793
+ mime: 'application/gzip',
794
+ };
438
795
  }
439
796
 
440
797
  return {
@@ -451,17 +808,48 @@ export class FileTypeParser {
451
808
  }
452
809
 
453
810
  if (this.checkString('ID3')) {
454
- await tokenizer.ignore(6); // Skip ID3 header until the header size
811
+ await safeIgnore(tokenizer, 6, {
812
+ maximumLength: 6,
813
+ reason: 'ID3 header prefix',
814
+ }); // Skip ID3 header until the header size
455
815
  const id3HeaderLength = await tokenizer.readToken(uint32SyncSafeToken);
816
+ const isUnknownFileSize = hasUnknownFileSize(tokenizer);
817
+ if (
818
+ !Number.isFinite(id3HeaderLength)
819
+ || id3HeaderLength < 0
820
+ // Keep ID3 probing bounded for unknown-size streams to avoid attacker-controlled large skips.
821
+ || (
822
+ isUnknownFileSize
823
+ && id3HeaderLength > maximumId3HeaderSizeInBytes
824
+ )
825
+ ) {
826
+ return;
827
+ }
828
+
456
829
  if (tokenizer.position + id3HeaderLength > tokenizer.fileInfo.size) {
457
- // Guess file type based on ID3 header for backward compatibility
830
+ if (isUnknownFileSize) {
831
+ return;
832
+ }
833
+
458
834
  return {
459
835
  ext: 'mp3',
460
836
  mime: 'audio/mpeg',
461
837
  };
462
838
  }
463
839
 
464
- await tokenizer.ignore(id3HeaderLength);
840
+ try {
841
+ await safeIgnore(tokenizer, id3HeaderLength, {
842
+ maximumLength: isUnknownFileSize ? maximumId3HeaderSizeInBytes : tokenizer.fileInfo.size,
843
+ reason: 'ID3 payload',
844
+ });
845
+ } catch (error) {
846
+ if (error instanceof strtok3.EndOfStreamError) {
847
+ return;
848
+ }
849
+
850
+ throw error;
851
+ }
852
+
465
853
  return this.fromTokenizer(tokenizer); // Skip ID3 header, recursion
466
854
  }
467
855
 
@@ -547,72 +935,105 @@ export class FileTypeParser {
547
935
  // Need to be before the `zip` check
548
936
  if (this.check([0x50, 0x4B, 0x3, 0x4])) { // Local file header signature
549
937
  let fileType;
550
- await new ZipHandler(tokenizer).unzip(zipHeader => {
551
- switch (zipHeader.filename) {
552
- case 'META-INF/mozilla.rsa':
553
- fileType = {
554
- ext: 'xpi',
555
- mime: 'application/x-xpinstall',
556
- };
557
- return {
558
- stop: true,
559
- };
560
- case 'META-INF/MANIFEST.MF':
561
- fileType = {
562
- ext: 'jar',
563
- mime: 'application/java-archive',
564
- };
565
- return {
566
- stop: true,
567
- };
568
- case 'mimetype':
938
+ const openXmlState = createOpenXmlZipDetectionState();
939
+
940
+ try {
941
+ await new ZipHandler(tokenizer).unzip(zipHeader => {
942
+ updateOpenXmlZipDetectionStateFromFilename(openXmlState, zipHeader.filename);
943
+
944
+ const isOpenXmlContentTypesEntry = zipHeader.filename === '[Content_Types].xml';
945
+ const openXmlFileTypeFromEntries = getOpenXmlFileTypeFromZipEntries(openXmlState);
946
+ if (
947
+ !isOpenXmlContentTypesEntry
948
+ && openXmlFileTypeFromEntries
949
+ ) {
950
+ fileType = openXmlFileTypeFromEntries;
569
951
  return {
570
- async handler(fileData) {
571
- // Use TextDecoder to decode the UTF-8 encoded data
572
- const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
573
- fileType = getFileTypeFromMimeType(mimeType);
574
- },
575
952
  stop: true,
576
953
  };
954
+ }
577
955
 
578
- case '[Content_Types].xml':
579
- return {
580
- async handler(fileData) {
581
- // Use TextDecoder to decode the UTF-8 encoded data
582
- let xmlContent = new TextDecoder('utf-8').decode(fileData);
583
- const endPos = xmlContent.indexOf('.main+xml"');
584
- if (endPos === -1) {
585
- const mimeType = 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml';
586
- if (xmlContent.includes(`ContentType="${mimeType}"`)) {
956
+ switch (zipHeader.filename) {
957
+ case 'META-INF/mozilla.rsa':
958
+ fileType = {
959
+ ext: 'xpi',
960
+ mime: 'application/x-xpinstall',
961
+ };
962
+ return {
963
+ stop: true,
964
+ };
965
+ case 'META-INF/MANIFEST.MF':
966
+ fileType = {
967
+ ext: 'jar',
968
+ mime: 'application/java-archive',
969
+ };
970
+ return {
971
+ stop: true,
972
+ };
973
+ case 'mimetype':
974
+ if (!canReadZipEntryForDetection(zipHeader)) {
975
+ return {};
976
+ }
977
+
978
+ return {
979
+ async handler(fileData) {
980
+ // Use TextDecoder to decode the UTF-8 encoded data
981
+ const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
982
+ fileType = getFileTypeFromMimeType(mimeType);
983
+ },
984
+ stop: true,
985
+ };
986
+
987
+ case '[Content_Types].xml': {
988
+ openXmlState.hasContentTypesEntry = true;
989
+
990
+ const maximumContentTypesEntrySize = hasUnknownFileSize(tokenizer) ? maximumZipEntrySizeInBytes : Number.MAX_SAFE_INTEGER;
991
+ if (!canReadZipEntryForDetection(zipHeader, maximumContentTypesEntrySize)) {
992
+ openXmlState.hasUnparseableContentTypes = true;
993
+ return {};
994
+ }
995
+
996
+ openXmlState.isParsingContentTypes = true;
997
+ return {
998
+ async handler(fileData) {
999
+ // Use TextDecoder to decode the UTF-8 encoded data
1000
+ const xmlContent = new TextDecoder('utf-8').decode(fileData);
1001
+ const mimeType = getOpenXmlMimeTypeFromContentTypesXml(xmlContent);
1002
+ if (mimeType) {
587
1003
  fileType = getFileTypeFromMimeType(mimeType);
588
1004
  }
589
- } else {
590
- xmlContent = xmlContent.slice(0, Math.max(0, endPos));
591
- const firstPos = xmlContent.lastIndexOf('"');
592
- const mimeType = xmlContent.slice(Math.max(0, firstPos + 1));
593
- fileType = getFileTypeFromMimeType(mimeType);
594
- }
595
- },
596
- stop: true,
597
- };
598
- default:
599
- if (/classes\d*\.dex/.test(zipHeader.filename)) {
600
- fileType = {
601
- ext: 'apk',
602
- mime: 'application/vnd.android.package-archive',
1005
+
1006
+ openXmlState.hasParsedContentTypesEntry = true;
1007
+ openXmlState.isParsingContentTypes = false;
1008
+ },
1009
+ stop: true,
603
1010
  };
604
- return {stop: true};
605
1011
  }
606
1012
 
607
- return {};
1013
+ default:
1014
+ if (/classes\d*\.dex/.test(zipHeader.filename)) {
1015
+ fileType = {
1016
+ ext: 'apk',
1017
+ mime: 'application/vnd.android.package-archive',
1018
+ };
1019
+ return {stop: true};
1020
+ }
1021
+
1022
+ return {};
1023
+ }
1024
+ });
1025
+ } catch (error) {
1026
+ if (!isRecoverableZipError(error)) {
1027
+ throw error;
608
1028
  }
609
- }).catch(error => {
610
- if (!(error instanceof strtok3.EndOfStreamError)) {
611
- throw error; // Re-throw non-EndOfStreamError
1029
+
1030
+ if (openXmlState.isParsingContentTypes) {
1031
+ openXmlState.isParsingContentTypes = false;
1032
+ openXmlState.hasUnparseableContentTypes = true;
612
1033
  }
613
- });
1034
+ }
614
1035
 
615
- return fileType ?? {
1036
+ return fileType ?? getOpenXmlFileTypeFromZipEntries(openXmlState) ?? {
616
1037
  ext: 'zip',
617
1038
  mime: 'application/zip',
618
1039
  };
@@ -817,7 +1238,10 @@ export class FileTypeParser {
817
1238
  }
818
1239
 
819
1240
  const id = new Uint8Array(ic + 1);
820
- await tokenizer.readBuffer(id);
1241
+ await safeReadBuffer(tokenizer, id, undefined, {
1242
+ maximumLength: id.length,
1243
+ reason: 'EBML field',
1244
+ });
821
1245
  return id;
822
1246
  }
823
1247
 
@@ -838,20 +1262,47 @@ export class FileTypeParser {
838
1262
  }
839
1263
 
840
1264
  async function readChildren(children) {
1265
+ let ebmlElementCount = 0;
841
1266
  while (children > 0) {
1267
+ ebmlElementCount++;
1268
+ if (ebmlElementCount > maximumEbmlElementCount) {
1269
+ return;
1270
+ }
1271
+
842
1272
  const element = await readElement();
1273
+
843
1274
  if (element.id === 0x42_82) {
844
- const rawValue = await tokenizer.readToken(new Token.StringType(element.len));
1275
+ // `DocType` is a short string ("webm", "matroska", ...), reject implausible lengths to avoid large allocations.
1276
+ if (element.len > maximumEbmlDocumentTypeSizeInBytes) {
1277
+ return;
1278
+ }
1279
+
1280
+ const documentTypeLength = getSafeBound(element.len, maximumEbmlDocumentTypeSizeInBytes, 'EBML DocType');
1281
+ const rawValue = await tokenizer.readToken(new Token.StringType(documentTypeLength));
845
1282
  return rawValue.replaceAll(/\00.*$/g, ''); // Return DocType
846
1283
  }
847
1284
 
848
- await tokenizer.ignore(element.len); // ignore payload
1285
+ if (
1286
+ hasUnknownFileSize(tokenizer)
1287
+ && (
1288
+ !Number.isFinite(element.len)
1289
+ || element.len < 0
1290
+ || element.len > maximumEbmlElementPayloadSizeInBytes
1291
+ )
1292
+ ) {
1293
+ return;
1294
+ }
1295
+
1296
+ await safeIgnore(tokenizer, element.len, {
1297
+ maximumLength: hasUnknownFileSize(tokenizer) ? maximumEbmlElementPayloadSizeInBytes : tokenizer.fileInfo.size,
1298
+ reason: 'EBML payload',
1299
+ }); // ignore payload
849
1300
  --children;
850
1301
  }
851
1302
  }
852
1303
 
853
- const re = await readElement();
854
- const documentType = await readChildren(re.len);
1304
+ const rootElement = await readElement();
1305
+ const documentType = await readChildren(rootElement.len);
855
1306
 
856
1307
  switch (documentType) {
857
1308
  case 'webm':
@@ -1203,6 +1654,16 @@ export class FileTypeParser {
1203
1654
  // -- 8-byte signatures --
1204
1655
 
1205
1656
  if (this.check([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])) {
1657
+ const pngFileType = {
1658
+ ext: 'png',
1659
+ mime: 'image/png',
1660
+ };
1661
+
1662
+ const apngFileType = {
1663
+ ext: 'apng',
1664
+ mime: 'image/apng',
1665
+ };
1666
+
1206
1667
  // APNG format (https://wiki.mozilla.org/APNG_Specification)
1207
1668
  // 1. Find the first IDAT (image data) chunk (49 44 41 54)
1208
1669
  // 2. Check if there is an "acTL" chunk before the IDAT one (61 63 54 4C)
@@ -1220,7 +1681,13 @@ export class FileTypeParser {
1220
1681
  };
1221
1682
  }
1222
1683
 
1684
+ const isUnknownPngStream = hasUnknownFileSize(tokenizer);
1685
+ const pngScanStart = tokenizer.position;
1223
1686
  do {
1687
+ if (hasExceededUnknownSizeScanBudget(tokenizer, pngScanStart, maximumPngChunkSizeInBytes)) {
1688
+ break;
1689
+ }
1690
+
1224
1691
  const chunk = await readChunkHeader();
1225
1692
  if (chunk.length < 0) {
1226
1693
  return; // Invalid chunk length
@@ -1228,24 +1695,40 @@ export class FileTypeParser {
1228
1695
 
1229
1696
  switch (chunk.type) {
1230
1697
  case 'IDAT':
1231
- return {
1232
- ext: 'png',
1233
- mime: 'image/png',
1234
- };
1698
+ return pngFileType;
1235
1699
  case 'acTL':
1236
- return {
1237
- ext: 'apng',
1238
- mime: 'image/apng',
1239
- };
1700
+ return apngFileType;
1240
1701
  default:
1241
- await tokenizer.ignore(chunk.length + 4); // Ignore chunk-data + CRC
1702
+ if (
1703
+ isUnknownPngStream
1704
+ && chunk.length > maximumPngChunkSizeInBytes
1705
+ ) {
1706
+ // Avoid huge attacker-controlled skips when probing unknown-size streams.
1707
+ return;
1708
+ }
1709
+
1710
+ try {
1711
+ await safeIgnore(tokenizer, chunk.length + 4, {
1712
+ maximumLength: isUnknownPngStream ? maximumPngChunkSizeInBytes + 4 : tokenizer.fileInfo.size,
1713
+ reason: 'PNG chunk payload',
1714
+ }); // Ignore chunk-data + CRC
1715
+ } catch (error) {
1716
+ if (
1717
+ !isUnknownPngStream
1718
+ && (
1719
+ error instanceof ParserHardLimitError
1720
+ || error instanceof strtok3.EndOfStreamError
1721
+ )
1722
+ ) {
1723
+ return pngFileType;
1724
+ }
1725
+
1726
+ throw error;
1727
+ }
1242
1728
  }
1243
1729
  } while (tokenizer.position + 8 < tokenizer.fileInfo.size);
1244
1730
 
1245
- return {
1246
- ext: 'png',
1247
- mime: 'image/png',
1248
- };
1731
+ return pngFileType;
1249
1732
  }
1250
1733
 
1251
1734
  if (this.check([0x41, 0x52, 0x52, 0x4F, 0x57, 0x31, 0x00, 0x00])) {
@@ -1403,45 +1886,95 @@ export class FileTypeParser {
1403
1886
 
1404
1887
  // ASF_Header_Object first 80 bytes
1405
1888
  if (this.check([0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11, 0xA6, 0xD9])) {
1406
- async function readHeader() {
1407
- const guid = new Uint8Array(16);
1408
- await tokenizer.readBuffer(guid);
1409
- return {
1410
- id: guid,
1411
- size: Number(await tokenizer.readToken(Token.UINT64_LE)),
1412
- };
1413
- }
1889
+ let isMalformedAsf = false;
1890
+ try {
1891
+ async function readHeader() {
1892
+ const guid = new Uint8Array(16);
1893
+ await safeReadBuffer(tokenizer, guid, undefined, {
1894
+ maximumLength: guid.length,
1895
+ reason: 'ASF header GUID',
1896
+ });
1897
+ return {
1898
+ id: guid,
1899
+ size: Number(await tokenizer.readToken(Token.UINT64_LE)),
1900
+ };
1901
+ }
1414
1902
 
1415
- await tokenizer.ignore(30);
1416
- // Search for header should be in first 1KB of file.
1417
- while (tokenizer.position + 24 < tokenizer.fileInfo.size) {
1418
- const header = await readHeader();
1419
- let payload = header.size - 24;
1420
- if (_check(header.id, [0x91, 0x07, 0xDC, 0xB7, 0xB7, 0xA9, 0xCF, 0x11, 0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65])) {
1421
- // Sync on Stream-Properties-Object (B7DC0791-A9B7-11CF-8EE6-00C00C205365)
1422
- const typeId = new Uint8Array(16);
1423
- payload -= await tokenizer.readBuffer(typeId);
1424
-
1425
- if (_check(typeId, [0x40, 0x9E, 0x69, 0xF8, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
1426
- // Found audio:
1427
- return {
1428
- ext: 'asf',
1429
- mime: 'audio/x-ms-asf',
1430
- };
1903
+ await safeIgnore(tokenizer, 30, {
1904
+ maximumLength: 30,
1905
+ reason: 'ASF header prelude',
1906
+ });
1907
+ const isUnknownFileSize = hasUnknownFileSize(tokenizer);
1908
+ const asfHeaderScanStart = tokenizer.position;
1909
+ while (tokenizer.position + 24 < tokenizer.fileInfo.size) {
1910
+ if (hasExceededUnknownSizeScanBudget(tokenizer, asfHeaderScanStart, maximumUntrustedSkipSizeInBytes)) {
1911
+ break;
1431
1912
  }
1432
1913
 
1433
- if (_check(typeId, [0xC0, 0xEF, 0x19, 0xBC, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
1434
- // Found video:
1435
- return {
1436
- ext: 'asf',
1437
- mime: 'video/x-ms-asf',
1438
- };
1914
+ const previousPosition = tokenizer.position;
1915
+ const header = await readHeader();
1916
+ let payload = header.size - 24;
1917
+ if (
1918
+ !Number.isFinite(payload)
1919
+ || payload < 0
1920
+ ) {
1921
+ isMalformedAsf = true;
1922
+ break;
1439
1923
  }
1440
1924
 
1441
- break;
1925
+ if (_check(header.id, [0x91, 0x07, 0xDC, 0xB7, 0xB7, 0xA9, 0xCF, 0x11, 0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65])) {
1926
+ // Sync on Stream-Properties-Object (B7DC0791-A9B7-11CF-8EE6-00C00C205365)
1927
+ const typeId = new Uint8Array(16);
1928
+ payload -= await safeReadBuffer(tokenizer, typeId, undefined, {
1929
+ maximumLength: typeId.length,
1930
+ reason: 'ASF stream type GUID',
1931
+ });
1932
+
1933
+ if (_check(typeId, [0x40, 0x9E, 0x69, 0xF8, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
1934
+ // Found audio:
1935
+ return {
1936
+ ext: 'asf',
1937
+ mime: 'audio/x-ms-asf',
1938
+ };
1939
+ }
1940
+
1941
+ if (_check(typeId, [0xC0, 0xEF, 0x19, 0xBC, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
1942
+ // Found video:
1943
+ return {
1944
+ ext: 'asf',
1945
+ mime: 'video/x-ms-asf',
1946
+ };
1947
+ }
1948
+
1949
+ break;
1950
+ }
1951
+
1952
+ await safeIgnore(tokenizer, payload, {
1953
+ maximumLength: isUnknownFileSize ? maximumUntrustedSkipSizeInBytes : tokenizer.fileInfo.size,
1954
+ reason: 'ASF header payload',
1955
+ });
1956
+
1957
+ // Safeguard against malformed files: break if the position did not advance.
1958
+ if (tokenizer.position <= previousPosition) {
1959
+ isMalformedAsf = true;
1960
+ break;
1961
+ }
1962
+ }
1963
+ } catch (error) {
1964
+ if (
1965
+ error instanceof strtok3.EndOfStreamError
1966
+ || error instanceof ParserHardLimitError
1967
+ ) {
1968
+ if (hasUnknownFileSize(tokenizer)) {
1969
+ isMalformedAsf = true;
1970
+ }
1971
+ } else {
1972
+ throw error;
1442
1973
  }
1974
+ }
1443
1975
 
1444
- await tokenizer.ignore(payload);
1976
+ if (isMalformedAsf) {
1977
+ return;
1445
1978
  }
1446
1979
 
1447
1980
  // Default to ASF generic extension
@@ -1760,9 +2293,10 @@ export class FileTypeParser {
1760
2293
  // Detections with limited supporting data, resulting in a higher likelihood of false positives
1761
2294
  detectImprecise = async tokenizer => {
1762
2295
  this.buffer = new Uint8Array(reasonableDetectionSizeInBytes);
2296
+ const fileSize = getKnownFileSizeOrMaximum(tokenizer.fileInfo.size);
1763
2297
 
1764
2298
  // Read initial sample size of 8 bytes
1765
- await tokenizer.peekBuffer(this.buffer, {length: Math.min(8, tokenizer.fileInfo.size), mayBeLess: true});
2299
+ await tokenizer.peekBuffer(this.buffer, {length: Math.min(8, fileSize), mayBeLess: true});
1766
2300
 
1767
2301
  if (
1768
2302
  this.check([0x0, 0x0, 0x1, 0xBA])
@@ -1796,7 +2330,7 @@ export class FileTypeParser {
1796
2330
  }
1797
2331
 
1798
2332
  // Adjust buffer to `mpegOffsetTolerance`
1799
- await tokenizer.peekBuffer(this.buffer, {length: Math.min(2 + this.options.mpegOffsetTolerance, tokenizer.fileInfo.size), mayBeLess: true});
2333
+ await tokenizer.peekBuffer(this.buffer, {length: Math.min(2 + this.options.mpegOffsetTolerance, fileSize), mayBeLess: true});
1800
2334
 
1801
2335
  // Check MPEG 1 or 2 Layer 3 header, or 'layer 0' for ADTS (MPEG sync-word 0xFFE)
1802
2336
  if (this.buffer.length >= (2 + this.options.mpegOffsetTolerance)) {
@@ -1811,7 +2345,7 @@ export class FileTypeParser {
1811
2345
 
1812
2346
  async readTiffTag(bigEndian) {
1813
2347
  const tagId = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
1814
- this.tokenizer.ignore(10);
2348
+ await this.tokenizer.ignore(10);
1815
2349
  switch (tagId) {
1816
2350
  case 50_341:
1817
2351
  return {
@@ -1829,6 +2363,13 @@ export class FileTypeParser {
1829
2363
 
1830
2364
  async readTiffIFD(bigEndian) {
1831
2365
  const numberOfTags = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
2366
+ if (
2367
+ hasUnknownFileSize(this.tokenizer)
2368
+ && (2 + (numberOfTags * 12)) > maximumTiffIfdOffsetInBytes
2369
+ ) {
2370
+ return;
2371
+ }
2372
+
1832
2373
  for (let n = 0; n < numberOfTags; ++n) {
1833
2374
  const fileType = await this.readTiffTag(bigEndian);
1834
2375
  if (fileType) {
@@ -1838,6 +2379,11 @@ export class FileTypeParser {
1838
2379
  }
1839
2380
 
1840
2381
  async readTiffHeader(bigEndian) {
2382
+ const tiffFileType = {
2383
+ ext: 'tif',
2384
+ mime: 'image/tiff',
2385
+ };
2386
+
1841
2387
  const version = (bigEndian ? Token.UINT16_BE : Token.UINT16_LE).get(this.buffer, 2);
1842
2388
  const ifdOffset = (bigEndian ? Token.UINT32_BE : Token.UINT32_LE).get(this.buffer, 4);
1843
2389
 
@@ -1866,19 +2412,37 @@ export class FileTypeParser {
1866
2412
  }
1867
2413
  }
1868
2414
 
1869
- await this.tokenizer.ignore(ifdOffset);
1870
- const fileType = await this.readTiffIFD(bigEndian);
1871
- return fileType ?? {
1872
- ext: 'tif',
1873
- mime: 'image/tiff',
1874
- };
2415
+ const maximumTiffOffset = hasUnknownFileSize(this.tokenizer) ? maximumTiffIfdOffsetInBytes : this.tokenizer.fileInfo.size;
2416
+
2417
+ try {
2418
+ await safeIgnore(this.tokenizer, ifdOffset, {
2419
+ maximumLength: maximumTiffOffset,
2420
+ reason: 'TIFF IFD offset',
2421
+ });
2422
+ } catch (error) {
2423
+ if (error instanceof strtok3.EndOfStreamError) {
2424
+ return;
2425
+ }
2426
+
2427
+ throw error;
2428
+ }
2429
+
2430
+ let fileType;
2431
+ try {
2432
+ fileType = await this.readTiffIFD(bigEndian);
2433
+ } catch (error) {
2434
+ if (error instanceof strtok3.EndOfStreamError) {
2435
+ return;
2436
+ }
2437
+
2438
+ throw error;
2439
+ }
2440
+
2441
+ return fileType ?? tiffFileType;
1875
2442
  }
1876
2443
 
1877
2444
  if (version === 43) { // Big TIFF file header
1878
- return {
1879
- ext: 'tif',
1880
- mime: 'image/tiff',
1881
- };
2445
+ return tiffFileType;
1882
2446
  }
1883
2447
  }
1884
2448
 
package/index.js CHANGED
@@ -5,13 +5,37 @@ Node.js specific entry point.
5
5
  import {ReadableStream as WebReadableStream} from 'node:stream/web';
6
6
  import {pipeline, PassThrough, Readable} from 'node:stream';
7
7
  import * as strtok3 from 'strtok3';
8
- import {FileTypeParser as DefaultFileTypeParser, reasonableDetectionSizeInBytes} from './core.js';
8
+ import {
9
+ FileTypeParser as DefaultFileTypeParser,
10
+ reasonableDetectionSizeInBytes,
11
+ normalizeSampleSize,
12
+ } from './core.js';
13
+
14
+ function isTokenizerStreamBoundsError(error) {
15
+ if (
16
+ !(error instanceof RangeError)
17
+ || error.message !== 'offset is out of bounds'
18
+ || typeof error.stack !== 'string'
19
+ ) {
20
+ return false;
21
+ }
22
+
23
+ // Some malformed or non-byte Node.js streams can surface this tokenizer-internal range error.
24
+ // Note: This stack-trace check is fragile and may break if strtok3 restructures its internals.
25
+ return /strtok3[/\\]lib[/\\]stream[/\\]/.test(error.stack);
26
+ }
9
27
 
10
28
  export class FileTypeParser extends DefaultFileTypeParser {
11
29
  async fromStream(stream) {
12
30
  const tokenizer = await (stream instanceof WebReadableStream ? strtok3.fromWebStream(stream, this.tokenizerOptions) : strtok3.fromStream(stream, this.tokenizerOptions));
13
31
  try {
14
32
  return await super.fromTokenizer(tokenizer);
33
+ } catch (error) {
34
+ if (isTokenizerStreamBoundsError(error)) {
35
+ return;
36
+ }
37
+
38
+ throw error;
15
39
  } finally {
16
40
  await tokenizer.close();
17
41
  }
@@ -31,7 +55,7 @@ export class FileTypeParser extends DefaultFileTypeParser {
31
55
  return super.toDetectionStream(readableStream, options);
32
56
  }
33
57
 
34
- const {sampleSize = reasonableDetectionSizeInBytes} = options;
58
+ const sampleSize = normalizeSampleSize(options.sampleSize ?? reasonableDetectionSizeInBytes);
35
59
 
36
60
  return new Promise((resolve, reject) => {
37
61
  readableStream.on('error', reject);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "file-type",
3
- "version": "21.3.0",
3
+ "version": "21.3.1",
4
4
  "description": "Detect the file type of a file, stream, or data",
5
5
  "license": "MIT",
6
6
  "repository": "sindresorhus/file-type",
@@ -258,12 +258,12 @@
258
258
  },
259
259
  "devDependencies": {
260
260
  "@tokenizer/token": "^0.3.0",
261
- "@types/node": "^22.15.21",
262
- "ava": "^6.3.0",
261
+ "@types/node": "^25.3.3",
262
+ "ava": "^7.0.0",
263
263
  "commonmark": "^0.31.2",
264
264
  "get-stream": "^9.0.1",
265
265
  "noop-stream": "^1.0.0",
266
- "tsd": "^0.32.0",
266
+ "tsd": "^0.33.0",
267
267
  "xo": "^0.60.0"
268
268
  },
269
269
  "xo": {
package/readme.md CHANGED
@@ -380,6 +380,7 @@ console.log(fileType);
380
380
  ### Available third-party file-type detectors
381
381
 
382
382
  - [@file-type/av](https://github.com/Borewit/file-type-av): Improves detection of audio and video file formats, with accurate differentiation between the two
383
+ - [@file-type/cfbf](https://github.com/Borewit/file-type-cfbf): Detects Compound File Binary Format (CFBF) based formats, such as Office 97–2003 documents and `.msi`.
383
384
  - [@file-type/pdf](https://github.com/Borewit/file-type-pdf): Detects PDF based file types, such as Adobe Illustrator
384
385
  - [@file-type/xml](https://github.com/Borewit/file-type-xml): Detects common XML file types, such as GLM, KML, MusicXML, RSS, SVG, and XHTML
385
386
 
@@ -628,14 +629,14 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
628
629
 
629
630
  *[Pull requests](.github/pull_request_template.md) are welcome for additional commonly used file types.*
630
631
 
631
- The following file types will not be accepted:
632
- - [MS-CFB: Microsoft Compound File Binary File Format based formats](https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/53989ce4-7b05-4f8d-829b-d08d6148375b), too old and difficult to parse:
632
+ The following file types will not be accepted, but most of them are supported by [third-party detector](#available-third-party-file-type-detectors)
633
+ - [MS-CFB: Microsoft Compound File Binary File Format based formats](https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/53989ce4-7b05-4f8d-829b-d08d6148375b)
633
634
  - `.doc` - Microsoft Word 97-2003 Document
634
635
  - `.xls` - Microsoft Excel 97-2003 Document
635
636
  - `.ppt` - Microsoft PowerPoint97-2003 Document
636
637
  - `.msi` - Microsoft Windows Installer
637
638
  - `.csv` - [Reason.](https://github.com/sindresorhus/file-type/issues/264#issuecomment-568439196)
638
- - `.svg` - Supported by [third-party detector](#available-third-party-file-type-detectors).
639
+ - `.svg`
639
640
 
640
641
  #### tokenizer
641
642