file-type 21.3.2 → 21.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core.js +243 -59
- package/index.js +71 -18
- package/package.json +1 -1
- package/readme.md +4 -1
package/core.js
CHANGED
|
@@ -20,18 +20,23 @@ const maximumZipEntrySizeInBytes = 1024 * 1024;
|
|
|
20
20
|
const maximumZipEntryCount = 1024;
|
|
21
21
|
const maximumZipBufferedReadSizeInBytes = (2 ** 31) - 1;
|
|
22
22
|
const maximumUntrustedSkipSizeInBytes = 16 * 1024 * 1024;
|
|
23
|
+
const maximumUnknownSizePayloadProbeSizeInBytes = maximumZipEntrySizeInBytes;
|
|
23
24
|
const maximumZipTextEntrySizeInBytes = maximumZipEntrySizeInBytes;
|
|
24
25
|
const maximumNestedGzipDetectionSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
25
26
|
const maximumNestedGzipProbeDepth = 1;
|
|
27
|
+
const unknownSizeGzipProbeTimeoutInMilliseconds = 100;
|
|
26
28
|
const maximumId3HeaderSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
27
29
|
const maximumEbmlDocumentTypeSizeInBytes = 64;
|
|
28
|
-
const maximumEbmlElementPayloadSizeInBytes =
|
|
30
|
+
const maximumEbmlElementPayloadSizeInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
|
|
29
31
|
const maximumEbmlElementCount = 256;
|
|
30
32
|
const maximumPngChunkCount = 512;
|
|
33
|
+
const maximumPngStreamScanBudgetInBytes = maximumUntrustedSkipSizeInBytes;
|
|
31
34
|
const maximumAsfHeaderObjectCount = 512;
|
|
32
35
|
const maximumTiffTagCount = 512;
|
|
33
36
|
const maximumDetectionReentryCount = 256;
|
|
34
|
-
const maximumPngChunkSizeInBytes =
|
|
37
|
+
const maximumPngChunkSizeInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
|
|
38
|
+
const maximumAsfHeaderPayloadSizeInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
|
|
39
|
+
const maximumTiffStreamIfdOffsetInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
|
|
35
40
|
const maximumTiffIfdOffsetInBytes = maximumUntrustedSkipSizeInBytes;
|
|
36
41
|
const recoverableZipErrorMessages = new Set([
|
|
37
42
|
'Unexpected signature',
|
|
@@ -43,6 +48,7 @@ const recoverableZipErrorMessagePrefixes = [
|
|
|
43
48
|
'Unsupported ZIP compression method:',
|
|
44
49
|
'ZIP entry compressed data exceeds ',
|
|
45
50
|
'ZIP entry decompressed data exceeds ',
|
|
51
|
+
'Expected data-descriptor-signature at position ',
|
|
46
52
|
];
|
|
47
53
|
const recoverableZipErrorCodes = new Set([
|
|
48
54
|
'Z_BUF_ERROR',
|
|
@@ -52,6 +58,27 @@ const recoverableZipErrorCodes = new Set([
|
|
|
52
58
|
|
|
53
59
|
class ParserHardLimitError extends Error {}
|
|
54
60
|
|
|
61
|
+
function patchWebByobTokenizerClose(tokenizer) {
|
|
62
|
+
const streamReader = tokenizer?.streamReader;
|
|
63
|
+
if (streamReader?.constructor?.name !== 'WebStreamByobReader') {
|
|
64
|
+
return tokenizer;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const {reader} = streamReader;
|
|
68
|
+
const cancelAndRelease = async () => {
|
|
69
|
+
await reader.cancel();
|
|
70
|
+
reader.releaseLock();
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
streamReader.close = cancelAndRelease;
|
|
74
|
+
streamReader.abort = async () => {
|
|
75
|
+
streamReader.interrupted = true;
|
|
76
|
+
await cancelAndRelease();
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
return tokenizer;
|
|
80
|
+
}
|
|
81
|
+
|
|
55
82
|
function getSafeBound(value, maximum, reason) {
|
|
56
83
|
if (
|
|
57
84
|
!Number.isFinite(value)
|
|
@@ -141,6 +168,10 @@ function findZipDataDescriptorOffset(buffer, bytesConsumed) {
|
|
|
141
168
|
return -1;
|
|
142
169
|
}
|
|
143
170
|
|
|
171
|
+
function isPngAncillaryChunk(type) {
|
|
172
|
+
return (type.codePointAt(0) & 0x20) !== 0;
|
|
173
|
+
}
|
|
174
|
+
|
|
144
175
|
function mergeByteChunks(chunks, totalLength) {
|
|
145
176
|
const merged = new Uint8Array(totalLength);
|
|
146
177
|
let offset = 0;
|
|
@@ -193,6 +224,10 @@ async function readZipDataDescriptorEntryWithLimit(zipHandler, {shouldBuffer, ma
|
|
|
193
224
|
}
|
|
194
225
|
}
|
|
195
226
|
|
|
227
|
+
if (!hasUnknownFileSize(zipHandler.tokenizer)) {
|
|
228
|
+
zipHandler.knownSizeDescriptorScannedBytes += bytesConsumed;
|
|
229
|
+
}
|
|
230
|
+
|
|
196
231
|
if (!shouldBuffer) {
|
|
197
232
|
return;
|
|
198
233
|
}
|
|
@@ -200,16 +235,30 @@ async function readZipDataDescriptorEntryWithLimit(zipHandler, {shouldBuffer, ma
|
|
|
200
235
|
return mergeByteChunks(chunks, bytesConsumed);
|
|
201
236
|
}
|
|
202
237
|
|
|
203
|
-
|
|
238
|
+
function getRemainingZipScanBudget(zipHandler, startOffset) {
|
|
239
|
+
if (hasUnknownFileSize(zipHandler.tokenizer)) {
|
|
240
|
+
return Math.max(0, maximumUntrustedSkipSizeInBytes - (zipHandler.tokenizer.position - startOffset));
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return Math.max(0, maximumZipEntrySizeInBytes - zipHandler.knownSizeDescriptorScannedBytes);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
async function readZipEntryData(zipHandler, zipHeader, {shouldBuffer, maximumDescriptorLength = maximumZipEntrySizeInBytes} = {}) {
|
|
204
247
|
if (
|
|
205
248
|
zipHeader.dataDescriptor
|
|
206
249
|
&& zipHeader.compressedSize === 0
|
|
207
250
|
) {
|
|
208
|
-
return readZipDataDescriptorEntryWithLimit(zipHandler, {
|
|
251
|
+
return readZipDataDescriptorEntryWithLimit(zipHandler, {
|
|
252
|
+
shouldBuffer,
|
|
253
|
+
maximumLength: maximumDescriptorLength,
|
|
254
|
+
});
|
|
209
255
|
}
|
|
210
256
|
|
|
211
257
|
if (!shouldBuffer) {
|
|
212
|
-
await zipHandler.tokenizer
|
|
258
|
+
await safeIgnore(zipHandler.tokenizer, zipHeader.compressedSize, {
|
|
259
|
+
maximumLength: hasUnknownFileSize(zipHandler.tokenizer) ? maximumZipEntrySizeInBytes : zipHandler.tokenizer.fileInfo.size,
|
|
260
|
+
reason: 'ZIP entry compressed data',
|
|
261
|
+
});
|
|
213
262
|
return;
|
|
214
263
|
}
|
|
215
264
|
|
|
@@ -244,7 +293,13 @@ ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
|
|
|
244
293
|
ZipHandler.prototype.unzip = async function (fileCallback) {
|
|
245
294
|
let stop = false;
|
|
246
295
|
let zipEntryCount = 0;
|
|
296
|
+
const zipScanStart = this.tokenizer.position;
|
|
297
|
+
this.knownSizeDescriptorScannedBytes = 0;
|
|
247
298
|
do {
|
|
299
|
+
if (hasExceededUnknownSizeScanBudget(this.tokenizer, zipScanStart, maximumUntrustedSkipSizeInBytes)) {
|
|
300
|
+
throw new ParserHardLimitError(`ZIP stream probing exceeds ${maximumUntrustedSkipSizeInBytes} bytes`);
|
|
301
|
+
}
|
|
302
|
+
|
|
248
303
|
const zipHeader = await this.readLocalFileHeader();
|
|
249
304
|
if (!zipHeader) {
|
|
250
305
|
break;
|
|
@@ -260,6 +315,7 @@ ZipHandler.prototype.unzip = async function (fileCallback) {
|
|
|
260
315
|
await this.tokenizer.ignore(zipHeader.extraFieldLength);
|
|
261
316
|
const fileData = await readZipEntryData(this, zipHeader, {
|
|
262
317
|
shouldBuffer: Boolean(next.handler),
|
|
318
|
+
maximumDescriptorLength: Math.min(maximumZipEntrySizeInBytes, getRemainingZipScanBudget(this, zipScanStart)),
|
|
263
319
|
});
|
|
264
320
|
|
|
265
321
|
if (next.handler) {
|
|
@@ -273,6 +329,10 @@ ZipHandler.prototype.unzip = async function (fileCallback) {
|
|
|
273
329
|
throw new Error(`Expected data-descriptor-signature at position ${this.tokenizer.position - dataDescriptor.length}`);
|
|
274
330
|
}
|
|
275
331
|
}
|
|
332
|
+
|
|
333
|
+
if (hasExceededUnknownSizeScanBudget(this.tokenizer, zipScanStart, maximumUntrustedSkipSizeInBytes)) {
|
|
334
|
+
throw new ParserHardLimitError(`ZIP stream probing exceeds ${maximumUntrustedSkipSizeInBytes} bytes`);
|
|
335
|
+
}
|
|
276
336
|
} while (!stop);
|
|
277
337
|
};
|
|
278
338
|
|
|
@@ -496,7 +556,8 @@ function _check(buffer, headers, options) {
|
|
|
496
556
|
}
|
|
497
557
|
|
|
498
558
|
export function normalizeSampleSize(sampleSize) {
|
|
499
|
-
//
|
|
559
|
+
// `sampleSize` is an explicit caller-controlled tuning knob, not untrusted file input.
|
|
560
|
+
// Preserve valid caller-requested probe depth here; applications must bound attacker-derived option values themselves.
|
|
500
561
|
if (!Number.isFinite(sampleSize)) {
|
|
501
562
|
return reasonableDetectionSizeInBytes;
|
|
502
563
|
}
|
|
@@ -504,6 +565,45 @@ export function normalizeSampleSize(sampleSize) {
|
|
|
504
565
|
return Math.max(1, Math.trunc(sampleSize));
|
|
505
566
|
}
|
|
506
567
|
|
|
568
|
+
function readByobReaderWithSignal(reader, buffer, signal) {
|
|
569
|
+
if (signal === undefined) {
|
|
570
|
+
return reader.read(buffer);
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
signal.throwIfAborted();
|
|
574
|
+
|
|
575
|
+
return new Promise((resolve, reject) => {
|
|
576
|
+
const cleanup = () => {
|
|
577
|
+
signal.removeEventListener('abort', onAbort);
|
|
578
|
+
};
|
|
579
|
+
|
|
580
|
+
const onAbort = () => {
|
|
581
|
+
const abortReason = signal.reason;
|
|
582
|
+
cleanup();
|
|
583
|
+
|
|
584
|
+
(async () => {
|
|
585
|
+
try {
|
|
586
|
+
await reader.cancel(abortReason);
|
|
587
|
+
} catch {}
|
|
588
|
+
})();
|
|
589
|
+
|
|
590
|
+
reject(abortReason);
|
|
591
|
+
};
|
|
592
|
+
|
|
593
|
+
signal.addEventListener('abort', onAbort, {once: true});
|
|
594
|
+
(async () => {
|
|
595
|
+
try {
|
|
596
|
+
const result = await reader.read(buffer);
|
|
597
|
+
cleanup();
|
|
598
|
+
resolve(result);
|
|
599
|
+
} catch (error) {
|
|
600
|
+
cleanup();
|
|
601
|
+
reject(error);
|
|
602
|
+
}
|
|
603
|
+
})();
|
|
604
|
+
});
|
|
605
|
+
}
|
|
606
|
+
|
|
507
607
|
function normalizeMpegOffsetTolerance(mpegOffsetTolerance) {
|
|
508
608
|
// This value controls scan depth and therefore worst-case CPU work.
|
|
509
609
|
if (!Number.isFinite(mpegOffsetTolerance)) {
|
|
@@ -715,7 +815,11 @@ export class FileTypeParser {
|
|
|
715
815
|
};
|
|
716
816
|
}
|
|
717
817
|
|
|
718
|
-
|
|
818
|
+
createTokenizerFromWebStream(stream) {
|
|
819
|
+
return patchWebByobTokenizerClose(strtok3.fromWebStream(stream, this.getTokenizerOptions()));
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
async parseTokenizer(tokenizer, detectionReentryCount = 0) {
|
|
719
823
|
this.detectionReentryCount = detectionReentryCount;
|
|
720
824
|
const initialPosition = tokenizer.position;
|
|
721
825
|
// Iterate through all file-type detectors
|
|
@@ -745,6 +849,14 @@ export class FileTypeParser {
|
|
|
745
849
|
}
|
|
746
850
|
}
|
|
747
851
|
|
|
852
|
+
async fromTokenizer(tokenizer) {
|
|
853
|
+
try {
|
|
854
|
+
return await this.parseTokenizer(tokenizer);
|
|
855
|
+
} finally {
|
|
856
|
+
await tokenizer.close();
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
|
|
748
860
|
async fromBuffer(input) {
|
|
749
861
|
if (!(input instanceof Uint8Array || input instanceof ArrayBuffer)) {
|
|
750
862
|
throw new TypeError(`Expected the \`input\` argument to be of type \`Uint8Array\` or \`ArrayBuffer\`, got \`${typeof input}\``);
|
|
@@ -760,21 +872,15 @@ export class FileTypeParser {
|
|
|
760
872
|
}
|
|
761
873
|
|
|
762
874
|
async fromBlob(blob) {
|
|
875
|
+
this.options.signal?.throwIfAborted();
|
|
763
876
|
const tokenizer = strtok3.fromBlob(blob, this.getTokenizerOptions());
|
|
764
|
-
|
|
765
|
-
return await this.fromTokenizer(tokenizer);
|
|
766
|
-
} finally {
|
|
767
|
-
await tokenizer.close();
|
|
768
|
-
}
|
|
877
|
+
return this.fromTokenizer(tokenizer);
|
|
769
878
|
}
|
|
770
879
|
|
|
771
880
|
async fromStream(stream) {
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
} finally {
|
|
776
|
-
await tokenizer.close();
|
|
777
|
-
}
|
|
881
|
+
this.options.signal?.throwIfAborted();
|
|
882
|
+
const tokenizer = this.createTokenizerFromWebStream(stream);
|
|
883
|
+
return this.fromTokenizer(tokenizer);
|
|
778
884
|
}
|
|
779
885
|
|
|
780
886
|
async toDetectionStream(stream, options) {
|
|
@@ -785,7 +891,7 @@ export class FileTypeParser {
|
|
|
785
891
|
const reader = stream.getReader({mode: 'byob'});
|
|
786
892
|
try {
|
|
787
893
|
// Read the first chunk from the stream
|
|
788
|
-
const {value: chunk, done} = await reader
|
|
894
|
+
const {value: chunk, done} = await readByobReaderWithSignal(reader, new Uint8Array(sampleSize), this.options.signal);
|
|
789
895
|
firstChunk = chunk;
|
|
790
896
|
if (!done && chunk) {
|
|
791
897
|
try {
|
|
@@ -822,6 +928,71 @@ export class FileTypeParser {
|
|
|
822
928
|
return newStream;
|
|
823
929
|
}
|
|
824
930
|
|
|
931
|
+
async detectGzip(tokenizer) {
|
|
932
|
+
if (this.gzipProbeDepth >= maximumNestedGzipProbeDepth) {
|
|
933
|
+
return {
|
|
934
|
+
ext: 'gz',
|
|
935
|
+
mime: 'application/gzip',
|
|
936
|
+
};
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
const gzipHandler = new GzipHandler(tokenizer);
|
|
940
|
+
const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes);
|
|
941
|
+
const hasUnknownSize = hasUnknownFileSize(tokenizer);
|
|
942
|
+
let timeout;
|
|
943
|
+
let probeSignal;
|
|
944
|
+
let probeParser;
|
|
945
|
+
let compressedFileType;
|
|
946
|
+
|
|
947
|
+
if (hasUnknownSize) {
|
|
948
|
+
const timeoutController = new AbortController();
|
|
949
|
+
timeout = setTimeout(() => {
|
|
950
|
+
timeoutController.abort(new DOMException(`Operation timed out after ${unknownSizeGzipProbeTimeoutInMilliseconds} ms`, 'TimeoutError'));
|
|
951
|
+
}, unknownSizeGzipProbeTimeoutInMilliseconds);
|
|
952
|
+
probeSignal = this.options.signal === undefined
|
|
953
|
+
? timeoutController.signal
|
|
954
|
+
// eslint-disable-next-line n/no-unsupported-features/node-builtins
|
|
955
|
+
: AbortSignal.any([this.options.signal, timeoutController.signal]);
|
|
956
|
+
probeParser = new FileTypeParser({
|
|
957
|
+
...this.options,
|
|
958
|
+
signal: probeSignal,
|
|
959
|
+
});
|
|
960
|
+
probeParser.gzipProbeDepth = this.gzipProbeDepth + 1;
|
|
961
|
+
} else {
|
|
962
|
+
this.gzipProbeDepth++;
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
try {
|
|
966
|
+
compressedFileType = await (probeParser ?? this).fromStream(limitedInflatedStream);
|
|
967
|
+
} catch (error) {
|
|
968
|
+
if (
|
|
969
|
+
error?.name === 'AbortError'
|
|
970
|
+
&& probeSignal?.reason?.name !== 'TimeoutError'
|
|
971
|
+
) {
|
|
972
|
+
throw error;
|
|
973
|
+
}
|
|
974
|
+
|
|
975
|
+
// Timeout, decompression, or inner-detection failures are expected for non-tar gzip files.
|
|
976
|
+
} finally {
|
|
977
|
+
clearTimeout(timeout);
|
|
978
|
+
if (!hasUnknownSize) {
|
|
979
|
+
this.gzipProbeDepth--;
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
if (compressedFileType?.ext === 'tar') {
|
|
984
|
+
return {
|
|
985
|
+
ext: 'tar.gz',
|
|
986
|
+
mime: 'application/gzip',
|
|
987
|
+
};
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
return {
|
|
991
|
+
ext: 'gz',
|
|
992
|
+
mime: 'application/gzip',
|
|
993
|
+
};
|
|
994
|
+
}
|
|
995
|
+
|
|
825
996
|
check(header, options) {
|
|
826
997
|
return _check(this.buffer, header, options);
|
|
827
998
|
}
|
|
@@ -841,6 +1012,13 @@ export class FileTypeParser {
|
|
|
841
1012
|
|
|
842
1013
|
this.tokenizer = tokenizer;
|
|
843
1014
|
|
|
1015
|
+
if (hasUnknownFileSize(tokenizer)) {
|
|
1016
|
+
await tokenizer.peekBuffer(this.buffer, {length: 3, mayBeLess: true});
|
|
1017
|
+
if (this.check([0x1F, 0x8B, 0x8])) {
|
|
1018
|
+
return this.detectGzip(tokenizer);
|
|
1019
|
+
}
|
|
1020
|
+
}
|
|
1021
|
+
|
|
844
1022
|
await tokenizer.peekBuffer(this.buffer, {length: 32, mayBeLess: true});
|
|
845
1023
|
|
|
846
1024
|
// -- 2-byte signatures --
|
|
@@ -944,41 +1122,7 @@ export class FileTypeParser {
|
|
|
944
1122
|
}
|
|
945
1123
|
|
|
946
1124
|
if (this.check([0x1F, 0x8B, 0x8])) {
|
|
947
|
-
|
|
948
|
-
return {
|
|
949
|
-
ext: 'gz',
|
|
950
|
-
mime: 'application/gzip',
|
|
951
|
-
};
|
|
952
|
-
}
|
|
953
|
-
|
|
954
|
-
const gzipHandler = new GzipHandler(tokenizer);
|
|
955
|
-
const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes);
|
|
956
|
-
let compressedFileType;
|
|
957
|
-
try {
|
|
958
|
-
this.gzipProbeDepth++;
|
|
959
|
-
compressedFileType = await this.fromStream(limitedInflatedStream);
|
|
960
|
-
} catch (error) {
|
|
961
|
-
if (error?.name === 'AbortError') {
|
|
962
|
-
throw error;
|
|
963
|
-
}
|
|
964
|
-
|
|
965
|
-
// Decompression or inner-detection failures are expected for non-tar gzip files.
|
|
966
|
-
} finally {
|
|
967
|
-
this.gzipProbeDepth--;
|
|
968
|
-
}
|
|
969
|
-
|
|
970
|
-
// We only need enough inflated bytes to confidently decide whether this is tar.gz.
|
|
971
|
-
if (compressedFileType?.ext === 'tar') {
|
|
972
|
-
return {
|
|
973
|
-
ext: 'tar.gz',
|
|
974
|
-
mime: 'application/gzip',
|
|
975
|
-
};
|
|
976
|
-
}
|
|
977
|
-
|
|
978
|
-
return {
|
|
979
|
-
ext: 'gz',
|
|
980
|
-
mime: 'application/gzip',
|
|
981
|
-
};
|
|
1125
|
+
return this.detectGzip(tokenizer);
|
|
982
1126
|
}
|
|
983
1127
|
|
|
984
1128
|
if (this.check([0x42, 0x5A, 0x68])) {
|
|
@@ -1001,7 +1145,10 @@ export class FileTypeParser {
|
|
|
1001
1145
|
// Keep ID3 probing bounded for unknown-size streams to avoid attacker-controlled large skips.
|
|
1002
1146
|
|| (
|
|
1003
1147
|
isUnknownFileSize
|
|
1004
|
-
&&
|
|
1148
|
+
&& (
|
|
1149
|
+
id3HeaderLength > maximumId3HeaderSizeInBytes
|
|
1150
|
+
|| (tokenizer.position + id3HeaderLength) > maximumId3HeaderSizeInBytes
|
|
1151
|
+
)
|
|
1005
1152
|
)
|
|
1006
1153
|
) {
|
|
1007
1154
|
return;
|
|
@@ -1036,7 +1183,7 @@ export class FileTypeParser {
|
|
|
1036
1183
|
}
|
|
1037
1184
|
|
|
1038
1185
|
this.detectionReentryCount++;
|
|
1039
|
-
return this.
|
|
1186
|
+
return this.parseTokenizer(tokenizer, this.detectionReentryCount); // Skip ID3 header, recursion
|
|
1040
1187
|
}
|
|
1041
1188
|
|
|
1042
1189
|
// Musepack, SV7
|
|
@@ -1454,6 +1601,10 @@ export class FileTypeParser {
|
|
|
1454
1601
|
return;
|
|
1455
1602
|
}
|
|
1456
1603
|
|
|
1604
|
+
if (hasExceededUnknownSizeScanBudget(tokenizer, ebmlScanStart, maximumUntrustedSkipSizeInBytes)) {
|
|
1605
|
+
return;
|
|
1606
|
+
}
|
|
1607
|
+
|
|
1457
1608
|
const previousPosition = tokenizer.position;
|
|
1458
1609
|
const element = await readElement();
|
|
1459
1610
|
|
|
@@ -1493,6 +1644,7 @@ export class FileTypeParser {
|
|
|
1493
1644
|
}
|
|
1494
1645
|
|
|
1495
1646
|
const rootElement = await readElement();
|
|
1647
|
+
const ebmlScanStart = tokenizer.position;
|
|
1496
1648
|
const documentType = await readChildren(rootElement.len);
|
|
1497
1649
|
|
|
1498
1650
|
switch (documentType) {
|
|
@@ -1875,13 +2027,14 @@ export class FileTypeParser {
|
|
|
1875
2027
|
const isUnknownPngStream = hasUnknownFileSize(tokenizer);
|
|
1876
2028
|
const pngScanStart = tokenizer.position;
|
|
1877
2029
|
let pngChunkCount = 0;
|
|
2030
|
+
let hasSeenImageHeader = false;
|
|
1878
2031
|
do {
|
|
1879
2032
|
pngChunkCount++;
|
|
1880
2033
|
if (pngChunkCount > maximumPngChunkCount) {
|
|
1881
2034
|
break;
|
|
1882
2035
|
}
|
|
1883
2036
|
|
|
1884
|
-
if (hasExceededUnknownSizeScanBudget(tokenizer, pngScanStart,
|
|
2037
|
+
if (hasExceededUnknownSizeScanBudget(tokenizer, pngScanStart, maximumPngStreamScanBudgetInBytes)) {
|
|
1885
2038
|
break;
|
|
1886
2039
|
}
|
|
1887
2040
|
|
|
@@ -1891,18 +2044,34 @@ export class FileTypeParser {
|
|
|
1891
2044
|
return; // Invalid chunk length
|
|
1892
2045
|
}
|
|
1893
2046
|
|
|
2047
|
+
if (chunk.type === 'IHDR') {
|
|
2048
|
+
// PNG requires the first real image header to be a 13-byte IHDR chunk.
|
|
2049
|
+
if (chunk.length !== 13) {
|
|
2050
|
+
return;
|
|
2051
|
+
}
|
|
2052
|
+
|
|
2053
|
+
hasSeenImageHeader = true;
|
|
2054
|
+
}
|
|
2055
|
+
|
|
1894
2056
|
switch (chunk.type) {
|
|
1895
2057
|
case 'IDAT':
|
|
1896
2058
|
return pngFileType;
|
|
1897
2059
|
case 'acTL':
|
|
1898
2060
|
return apngFileType;
|
|
1899
2061
|
default:
|
|
2062
|
+
if (
|
|
2063
|
+
!hasSeenImageHeader
|
|
2064
|
+
&& chunk.type !== 'CgBI'
|
|
2065
|
+
) {
|
|
2066
|
+
return;
|
|
2067
|
+
}
|
|
2068
|
+
|
|
1900
2069
|
if (
|
|
1901
2070
|
isUnknownPngStream
|
|
1902
2071
|
&& chunk.length > maximumPngChunkSizeInBytes
|
|
1903
2072
|
) {
|
|
1904
2073
|
// Avoid huge attacker-controlled skips when probing unknown-size streams.
|
|
1905
|
-
return;
|
|
2074
|
+
return hasSeenImageHeader && isPngAncillaryChunk(chunk.type) ? pngFileType : undefined;
|
|
1906
2075
|
}
|
|
1907
2076
|
|
|
1908
2077
|
try {
|
|
@@ -2158,8 +2327,16 @@ export class FileTypeParser {
|
|
|
2158
2327
|
break;
|
|
2159
2328
|
}
|
|
2160
2329
|
|
|
2330
|
+
if (
|
|
2331
|
+
isUnknownFileSize
|
|
2332
|
+
&& payload > maximumAsfHeaderPayloadSizeInBytes
|
|
2333
|
+
) {
|
|
2334
|
+
isMalformedAsf = true;
|
|
2335
|
+
break;
|
|
2336
|
+
}
|
|
2337
|
+
|
|
2161
2338
|
await safeIgnore(tokenizer, payload, {
|
|
2162
|
-
maximumLength: isUnknownFileSize ?
|
|
2339
|
+
maximumLength: isUnknownFileSize ? maximumAsfHeaderPayloadSizeInBytes : tokenizer.fileInfo.size,
|
|
2163
2340
|
reason: 'ASF header payload',
|
|
2164
2341
|
});
|
|
2165
2342
|
|
|
@@ -2625,6 +2802,13 @@ export class FileTypeParser {
|
|
|
2625
2802
|
}
|
|
2626
2803
|
}
|
|
2627
2804
|
|
|
2805
|
+
if (
|
|
2806
|
+
hasUnknownFileSize(this.tokenizer)
|
|
2807
|
+
&& ifdOffset > maximumTiffStreamIfdOffsetInBytes
|
|
2808
|
+
) {
|
|
2809
|
+
return tiffFileType;
|
|
2810
|
+
}
|
|
2811
|
+
|
|
2628
2812
|
const maximumTiffOffset = hasUnknownFileSize(this.tokenizer) ? maximumTiffIfdOffsetInBytes : this.tokenizer.fileInfo.size;
|
|
2629
2813
|
|
|
2630
2814
|
try {
|
package/index.js
CHANGED
|
@@ -4,6 +4,8 @@ Node.js specific entry point.
|
|
|
4
4
|
|
|
5
5
|
import {ReadableStream as WebReadableStream} from 'node:stream/web';
|
|
6
6
|
import {pipeline, PassThrough, Readable} from 'node:stream';
|
|
7
|
+
import fs from 'node:fs/promises';
|
|
8
|
+
import {constants as fileSystemConstants} from 'node:fs';
|
|
7
9
|
import * as strtok3 from 'strtok3';
|
|
8
10
|
import {
|
|
9
11
|
FileTypeParser as DefaultFileTypeParser,
|
|
@@ -27,7 +29,8 @@ function isTokenizerStreamBoundsError(error) {
|
|
|
27
29
|
|
|
28
30
|
export class FileTypeParser extends DefaultFileTypeParser {
|
|
29
31
|
async fromStream(stream) {
|
|
30
|
-
|
|
32
|
+
this.options.signal?.throwIfAborted();
|
|
33
|
+
const tokenizer = await (stream instanceof WebReadableStream ? this.createTokenizerFromWebStream(stream) : strtok3.fromStream(stream, this.getTokenizerOptions()));
|
|
31
34
|
try {
|
|
32
35
|
return await super.fromTokenizer(tokenizer);
|
|
33
36
|
} catch (error) {
|
|
@@ -37,17 +40,34 @@ export class FileTypeParser extends DefaultFileTypeParser {
|
|
|
37
40
|
|
|
38
41
|
throw error;
|
|
39
42
|
} finally {
|
|
40
|
-
|
|
43
|
+
// TODO: Remove this when `strtok3.fromStream()` closes the underlying Readable instead of only aborting tokenizer reads.
|
|
44
|
+
if (
|
|
45
|
+
stream instanceof Readable
|
|
46
|
+
&& !stream.destroyed
|
|
47
|
+
) {
|
|
48
|
+
stream.destroy();
|
|
49
|
+
}
|
|
41
50
|
}
|
|
42
51
|
}
|
|
43
52
|
|
|
44
53
|
async fromFile(path) {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
54
|
+
this.options.signal?.throwIfAborted();
|
|
55
|
+
// TODO: Remove this when `strtok3.fromFile()` safely rejects non-regular filesystem objects without a pathname race.
|
|
56
|
+
const fileHandle = await fs.open(path, fileSystemConstants.O_RDONLY | fileSystemConstants.O_NONBLOCK);
|
|
57
|
+
const fileStat = await fileHandle.stat();
|
|
58
|
+
if (!fileStat.isFile()) {
|
|
59
|
+
await fileHandle.close();
|
|
60
|
+
return;
|
|
50
61
|
}
|
|
62
|
+
|
|
63
|
+
const tokenizer = new strtok3.FileTokenizer(fileHandle, {
|
|
64
|
+
...this.getTokenizerOptions(),
|
|
65
|
+
fileInfo: {
|
|
66
|
+
path,
|
|
67
|
+
size: fileStat.size,
|
|
68
|
+
},
|
|
69
|
+
});
|
|
70
|
+
return super.fromTokenizer(tokenizer);
|
|
51
71
|
}
|
|
52
72
|
|
|
53
73
|
async toDetectionStream(readableStream, options = {}) {
|
|
@@ -55,36 +75,69 @@ export class FileTypeParser extends DefaultFileTypeParser {
|
|
|
55
75
|
return super.toDetectionStream(readableStream, options);
|
|
56
76
|
}
|
|
57
77
|
|
|
58
|
-
const sampleSize =
|
|
78
|
+
const {sampleSize = reasonableDetectionSizeInBytes} = options;
|
|
79
|
+
const {signal} = this.options;
|
|
80
|
+
const normalizedSampleSize = normalizeSampleSize(sampleSize);
|
|
81
|
+
|
|
82
|
+
signal?.throwIfAborted();
|
|
59
83
|
|
|
60
84
|
return new Promise((resolve, reject) => {
|
|
61
|
-
|
|
85
|
+
let isSettled = false;
|
|
86
|
+
|
|
87
|
+
const cleanup = () => {
|
|
88
|
+
readableStream.off('error', onError);
|
|
89
|
+
readableStream.off('readable', onReadable);
|
|
90
|
+
signal?.removeEventListener('abort', onAbort);
|
|
91
|
+
};
|
|
62
92
|
|
|
63
|
-
|
|
93
|
+
const settle = (callback, value) => {
|
|
94
|
+
if (isSettled) {
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
isSettled = true;
|
|
99
|
+
cleanup();
|
|
100
|
+
callback(value);
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
const onError = error => {
|
|
104
|
+
settle(reject, error);
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
const onAbort = () => {
|
|
108
|
+
if (!readableStream.destroyed) {
|
|
109
|
+
readableStream.destroy();
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
settle(reject, signal.reason);
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
const onReadable = () => {
|
|
64
116
|
(async () => {
|
|
65
117
|
try {
|
|
66
|
-
// Set up output stream
|
|
67
118
|
const pass = new PassThrough();
|
|
68
119
|
const outputStream = pipeline ? pipeline(readableStream, pass, () => {}) : readableStream.pipe(pass);
|
|
69
|
-
|
|
70
|
-
// Read the input stream and detect the filetype
|
|
71
|
-
const chunk = readableStream.read(sampleSize) ?? readableStream.read() ?? new Uint8Array(0);
|
|
120
|
+
const chunk = readableStream.read(normalizedSampleSize) ?? readableStream.read() ?? new Uint8Array(0);
|
|
72
121
|
try {
|
|
73
122
|
pass.fileType = await this.fromBuffer(chunk);
|
|
74
123
|
} catch (error) {
|
|
75
124
|
if (error instanceof strtok3.EndOfStreamError) {
|
|
76
125
|
pass.fileType = undefined;
|
|
77
126
|
} else {
|
|
78
|
-
reject
|
|
127
|
+
settle(reject, error);
|
|
79
128
|
}
|
|
80
129
|
}
|
|
81
130
|
|
|
82
|
-
resolve
|
|
131
|
+
settle(resolve, outputStream);
|
|
83
132
|
} catch (error) {
|
|
84
|
-
reject
|
|
133
|
+
settle(reject, error);
|
|
85
134
|
}
|
|
86
135
|
})();
|
|
87
|
-
}
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
readableStream.on('error', onError);
|
|
139
|
+
readableStream.once('readable', onReadable);
|
|
140
|
+
signal?.addEventListener('abort', onAbort, {once: true});
|
|
88
141
|
});
|
|
89
142
|
}
|
|
90
143
|
}
|
package/package.json
CHANGED
package/readme.md
CHANGED
|
@@ -10,6 +10,9 @@ This package is for detecting binary-based file formats, not text-based formats
|
|
|
10
10
|
|
|
11
11
|
We accept contributions for commonly used modern file formats, not historical or obscure ones. Open an issue first for discussion.
|
|
12
12
|
|
|
13
|
+
> [!IMPORTANT]
|
|
14
|
+
> NO SECURITY REPORTS WILL BE ACCEPTED RIGHT NOW. I'm currently hardening the parser and all the low-quality AI-generated security reports is just a huge waste of time.
|
|
15
|
+
|
|
13
16
|
## Install
|
|
14
17
|
|
|
15
18
|
```sh
|
|
@@ -438,7 +441,7 @@ import {FileTypeParser} from 'file-type';
|
|
|
438
441
|
|
|
439
442
|
const abortController = new AbortController()
|
|
440
443
|
|
|
441
|
-
const parser = new FileTypeParser({
|
|
444
|
+
const parser = new FileTypeParser({signal: abortController.signal});
|
|
442
445
|
|
|
443
446
|
const promise = parser.fromStream(blob.stream());
|
|
444
447
|
|