file-type 21.3.1 → 21.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core.js +310 -20
- package/index.js +18 -2
- package/package.json +1 -1
- package/readme.md +8 -4
package/core.js
CHANGED
|
@@ -17,13 +17,25 @@ export const reasonableDetectionSizeInBytes = 4100; // A fair amount of file-typ
|
|
|
17
17
|
// Keep defensive limits small enough to avoid accidental memory spikes from untrusted inputs.
|
|
18
18
|
const maximumMpegOffsetTolerance = reasonableDetectionSizeInBytes - 2;
|
|
19
19
|
const maximumZipEntrySizeInBytes = 1024 * 1024;
|
|
20
|
+
const maximumZipEntryCount = 1024;
|
|
21
|
+
const maximumZipBufferedReadSizeInBytes = (2 ** 31) - 1;
|
|
20
22
|
const maximumUntrustedSkipSizeInBytes = 16 * 1024 * 1024;
|
|
23
|
+
const maximumUnknownSizePayloadProbeSizeInBytes = maximumZipEntrySizeInBytes;
|
|
24
|
+
const maximumZipTextEntrySizeInBytes = maximumZipEntrySizeInBytes;
|
|
21
25
|
const maximumNestedGzipDetectionSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
26
|
+
const maximumNestedGzipProbeDepth = 1;
|
|
22
27
|
const maximumId3HeaderSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
23
28
|
const maximumEbmlDocumentTypeSizeInBytes = 64;
|
|
24
|
-
const maximumEbmlElementPayloadSizeInBytes =
|
|
29
|
+
const maximumEbmlElementPayloadSizeInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
|
|
25
30
|
const maximumEbmlElementCount = 256;
|
|
26
|
-
const
|
|
31
|
+
const maximumPngChunkCount = 512;
|
|
32
|
+
const maximumPngStreamScanBudgetInBytes = maximumUntrustedSkipSizeInBytes;
|
|
33
|
+
const maximumAsfHeaderObjectCount = 512;
|
|
34
|
+
const maximumTiffTagCount = 512;
|
|
35
|
+
const maximumDetectionReentryCount = 256;
|
|
36
|
+
const maximumPngChunkSizeInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
|
|
37
|
+
const maximumAsfHeaderPayloadSizeInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
|
|
38
|
+
const maximumTiffStreamIfdOffsetInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
|
|
27
39
|
const maximumTiffIfdOffsetInBytes = maximumUntrustedSkipSizeInBytes;
|
|
28
40
|
const recoverableZipErrorMessages = new Set([
|
|
29
41
|
'Unexpected signature',
|
|
@@ -31,7 +43,9 @@ const recoverableZipErrorMessages = new Set([
|
|
|
31
43
|
'Expected Central-File-Header signature',
|
|
32
44
|
]);
|
|
33
45
|
const recoverableZipErrorMessagePrefixes = [
|
|
46
|
+
'ZIP entry count exceeds ',
|
|
34
47
|
'Unsupported ZIP compression method:',
|
|
48
|
+
'ZIP entry compressed data exceeds ',
|
|
35
49
|
'ZIP entry decompressed data exceeds ',
|
|
36
50
|
];
|
|
37
51
|
const recoverableZipErrorCodes = new Set([
|
|
@@ -109,6 +123,136 @@ async function decompressDeflateRawWithLimit(data, {maximumLength = maximumZipEn
|
|
|
109
123
|
return uncompressedData;
|
|
110
124
|
}
|
|
111
125
|
|
|
126
|
+
const zipDataDescriptorSignature = 0x08_07_4B_50;
|
|
127
|
+
const zipDataDescriptorLengthInBytes = 16;
|
|
128
|
+
const zipDataDescriptorOverlapLengthInBytes = zipDataDescriptorLengthInBytes - 1;
|
|
129
|
+
|
|
130
|
+
function findZipDataDescriptorOffset(buffer, bytesConsumed) {
|
|
131
|
+
if (buffer.length < zipDataDescriptorLengthInBytes) {
|
|
132
|
+
return -1;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const lastPossibleDescriptorOffset = buffer.length - zipDataDescriptorLengthInBytes;
|
|
136
|
+
for (let index = 0; index <= lastPossibleDescriptorOffset; index++) {
|
|
137
|
+
if (
|
|
138
|
+
Token.UINT32_LE.get(buffer, index) === zipDataDescriptorSignature
|
|
139
|
+
&& Token.UINT32_LE.get(buffer, index + 8) === bytesConsumed + index
|
|
140
|
+
) {
|
|
141
|
+
return index;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return -1;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function isPngAncillaryChunk(type) {
|
|
149
|
+
return (type.codePointAt(0) & 0x20) !== 0;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function mergeByteChunks(chunks, totalLength) {
|
|
153
|
+
const merged = new Uint8Array(totalLength);
|
|
154
|
+
let offset = 0;
|
|
155
|
+
|
|
156
|
+
for (const chunk of chunks) {
|
|
157
|
+
merged.set(chunk, offset);
|
|
158
|
+
offset += chunk.length;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return merged;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
async function readZipDataDescriptorEntryWithLimit(zipHandler, {shouldBuffer, maximumLength = maximumZipEntrySizeInBytes} = {}) {
|
|
165
|
+
const {syncBuffer} = zipHandler;
|
|
166
|
+
const {length: syncBufferLength} = syncBuffer;
|
|
167
|
+
const chunks = [];
|
|
168
|
+
let bytesConsumed = 0;
|
|
169
|
+
|
|
170
|
+
for (;;) {
|
|
171
|
+
const length = await zipHandler.tokenizer.peekBuffer(syncBuffer, {mayBeLess: true});
|
|
172
|
+
const dataDescriptorOffset = findZipDataDescriptorOffset(syncBuffer.subarray(0, length), bytesConsumed);
|
|
173
|
+
const retainedLength = dataDescriptorOffset >= 0
|
|
174
|
+
? 0
|
|
175
|
+
: (
|
|
176
|
+
length === syncBufferLength
|
|
177
|
+
? Math.min(zipDataDescriptorOverlapLengthInBytes, length - 1)
|
|
178
|
+
: 0
|
|
179
|
+
);
|
|
180
|
+
const chunkLength = dataDescriptorOffset >= 0 ? dataDescriptorOffset : length - retainedLength;
|
|
181
|
+
|
|
182
|
+
if (chunkLength === 0) {
|
|
183
|
+
break;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
bytesConsumed += chunkLength;
|
|
187
|
+
if (bytesConsumed > maximumLength) {
|
|
188
|
+
throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
if (shouldBuffer) {
|
|
192
|
+
const data = new Uint8Array(chunkLength);
|
|
193
|
+
await zipHandler.tokenizer.readBuffer(data);
|
|
194
|
+
chunks.push(data);
|
|
195
|
+
} else {
|
|
196
|
+
await zipHandler.tokenizer.ignore(chunkLength);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
if (dataDescriptorOffset >= 0) {
|
|
200
|
+
break;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
if (!hasUnknownFileSize(zipHandler.tokenizer)) {
|
|
205
|
+
zipHandler.knownSizeDescriptorScannedBytes += bytesConsumed;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
if (!shouldBuffer) {
|
|
209
|
+
return;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
return mergeByteChunks(chunks, bytesConsumed);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function getRemainingZipScanBudget(zipHandler, startOffset) {
|
|
216
|
+
if (hasUnknownFileSize(zipHandler.tokenizer)) {
|
|
217
|
+
return Math.max(0, maximumUntrustedSkipSizeInBytes - (zipHandler.tokenizer.position - startOffset));
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
return Math.max(0, maximumZipEntrySizeInBytes - zipHandler.knownSizeDescriptorScannedBytes);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
async function readZipEntryData(zipHandler, zipHeader, {shouldBuffer, maximumDescriptorLength = maximumZipEntrySizeInBytes} = {}) {
|
|
224
|
+
if (
|
|
225
|
+
zipHeader.dataDescriptor
|
|
226
|
+
&& zipHeader.compressedSize === 0
|
|
227
|
+
) {
|
|
228
|
+
return readZipDataDescriptorEntryWithLimit(zipHandler, {
|
|
229
|
+
shouldBuffer,
|
|
230
|
+
maximumLength: maximumDescriptorLength,
|
|
231
|
+
});
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
if (!shouldBuffer) {
|
|
235
|
+
await safeIgnore(zipHandler.tokenizer, zipHeader.compressedSize, {
|
|
236
|
+
maximumLength: hasUnknownFileSize(zipHandler.tokenizer) ? maximumZipEntrySizeInBytes : zipHandler.tokenizer.fileInfo.size,
|
|
237
|
+
reason: 'ZIP entry compressed data',
|
|
238
|
+
});
|
|
239
|
+
return;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
const maximumLength = getMaximumZipBufferedReadLength(zipHandler.tokenizer);
|
|
243
|
+
if (
|
|
244
|
+
!Number.isFinite(zipHeader.compressedSize)
|
|
245
|
+
|| zipHeader.compressedSize < 0
|
|
246
|
+
|| zipHeader.compressedSize > maximumLength
|
|
247
|
+
) {
|
|
248
|
+
throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
const fileData = new Uint8Array(zipHeader.compressedSize);
|
|
252
|
+
await zipHandler.tokenizer.readBuffer(fileData);
|
|
253
|
+
return fileData;
|
|
254
|
+
}
|
|
255
|
+
|
|
112
256
|
// Override the default inflate to enforce decompression size limits, since @tokenizer/inflate does not expose a configuration hook for this.
|
|
113
257
|
ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
|
|
114
258
|
if (zipHeader.compressedMethod === 0) {
|
|
@@ -119,11 +263,56 @@ ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
|
|
|
119
263
|
throw new Error(`Unsupported ZIP compression method: ${zipHeader.compressedMethod}`);
|
|
120
264
|
}
|
|
121
265
|
|
|
122
|
-
const
|
|
123
|
-
const uncompressedData = await decompressDeflateRawWithLimit(fileData, {maximumLength});
|
|
266
|
+
const uncompressedData = await decompressDeflateRawWithLimit(fileData, {maximumLength: maximumZipEntrySizeInBytes});
|
|
124
267
|
return callback(uncompressedData);
|
|
125
268
|
};
|
|
126
269
|
|
|
270
|
+
ZipHandler.prototype.unzip = async function (fileCallback) {
|
|
271
|
+
let stop = false;
|
|
272
|
+
let zipEntryCount = 0;
|
|
273
|
+
const zipScanStart = this.tokenizer.position;
|
|
274
|
+
this.knownSizeDescriptorScannedBytes = 0;
|
|
275
|
+
do {
|
|
276
|
+
if (hasExceededUnknownSizeScanBudget(this.tokenizer, zipScanStart, maximumUntrustedSkipSizeInBytes)) {
|
|
277
|
+
throw new ParserHardLimitError(`ZIP stream probing exceeds ${maximumUntrustedSkipSizeInBytes} bytes`);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
const zipHeader = await this.readLocalFileHeader();
|
|
281
|
+
if (!zipHeader) {
|
|
282
|
+
break;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
zipEntryCount++;
|
|
286
|
+
if (zipEntryCount > maximumZipEntryCount) {
|
|
287
|
+
throw new Error(`ZIP entry count exceeds ${maximumZipEntryCount}`);
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
const next = fileCallback(zipHeader);
|
|
291
|
+
stop = Boolean(next.stop);
|
|
292
|
+
await this.tokenizer.ignore(zipHeader.extraFieldLength);
|
|
293
|
+
const fileData = await readZipEntryData(this, zipHeader, {
|
|
294
|
+
shouldBuffer: Boolean(next.handler),
|
|
295
|
+
maximumDescriptorLength: Math.min(maximumZipEntrySizeInBytes, getRemainingZipScanBudget(this, zipScanStart)),
|
|
296
|
+
});
|
|
297
|
+
|
|
298
|
+
if (next.handler) {
|
|
299
|
+
await this.inflate(zipHeader, fileData, next.handler);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
if (zipHeader.dataDescriptor) {
|
|
303
|
+
const dataDescriptor = new Uint8Array(zipDataDescriptorLengthInBytes);
|
|
304
|
+
await this.tokenizer.readBuffer(dataDescriptor);
|
|
305
|
+
if (Token.UINT32_LE.get(dataDescriptor, 0) !== zipDataDescriptorSignature) {
|
|
306
|
+
throw new Error(`Expected data-descriptor-signature at position ${this.tokenizer.position - dataDescriptor.length}`);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
if (hasExceededUnknownSizeScanBudget(this.tokenizer, zipScanStart, maximumUntrustedSkipSizeInBytes)) {
|
|
311
|
+
throw new ParserHardLimitError(`ZIP stream probing exceeds ${maximumUntrustedSkipSizeInBytes} bytes`);
|
|
312
|
+
}
|
|
313
|
+
} while (!stop);
|
|
314
|
+
};
|
|
315
|
+
|
|
127
316
|
function createByteLimitedReadableStream(stream, maximumBytes) {
|
|
128
317
|
const reader = stream.getReader();
|
|
129
318
|
let emittedBytes = 0;
|
|
@@ -384,6 +573,15 @@ function hasExceededUnknownSizeScanBudget(tokenizer, startOffset, maximumBytes)
|
|
|
384
573
|
);
|
|
385
574
|
}
|
|
386
575
|
|
|
576
|
+
function getMaximumZipBufferedReadLength(tokenizer) {
|
|
577
|
+
const fileSize = tokenizer.fileInfo.size;
|
|
578
|
+
const remainingBytes = Number.isFinite(fileSize)
|
|
579
|
+
? Math.max(0, fileSize - tokenizer.position)
|
|
580
|
+
: Number.MAX_SAFE_INTEGER;
|
|
581
|
+
|
|
582
|
+
return Math.min(remainingBytes, maximumZipBufferedReadSizeInBytes);
|
|
583
|
+
}
|
|
584
|
+
|
|
387
585
|
function isRecoverableZipError(error) {
|
|
388
586
|
if (error instanceof strtok3.EndOfStreamError) {
|
|
389
587
|
return true;
|
|
@@ -401,10 +599,7 @@ function isRecoverableZipError(error) {
|
|
|
401
599
|
return true;
|
|
402
600
|
}
|
|
403
601
|
|
|
404
|
-
if (
|
|
405
|
-
error instanceof TypeError
|
|
406
|
-
&& recoverableZipErrorCodes.has(error.code)
|
|
407
|
-
) {
|
|
602
|
+
if (recoverableZipErrorCodes.has(error.code)) {
|
|
408
603
|
return true;
|
|
409
604
|
}
|
|
410
605
|
|
|
@@ -548,9 +743,17 @@ export class FileTypeParser {
|
|
|
548
743
|
this.tokenizerOptions = {
|
|
549
744
|
abortSignal: this.options.signal,
|
|
550
745
|
};
|
|
746
|
+
this.gzipProbeDepth = 0;
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
getTokenizerOptions() {
|
|
750
|
+
return {
|
|
751
|
+
...this.tokenizerOptions,
|
|
752
|
+
};
|
|
551
753
|
}
|
|
552
754
|
|
|
553
|
-
async fromTokenizer(tokenizer) {
|
|
755
|
+
async fromTokenizer(tokenizer, detectionReentryCount = 0) {
|
|
756
|
+
this.detectionReentryCount = detectionReentryCount;
|
|
554
757
|
const initialPosition = tokenizer.position;
|
|
555
758
|
// Iterate through all file-type detectors
|
|
556
759
|
for (const detector of this.detectors) {
|
|
@@ -590,11 +793,11 @@ export class FileTypeParser {
|
|
|
590
793
|
return;
|
|
591
794
|
}
|
|
592
795
|
|
|
593
|
-
return this.fromTokenizer(strtok3.fromBuffer(buffer, this.
|
|
796
|
+
return this.fromTokenizer(strtok3.fromBuffer(buffer, this.getTokenizerOptions()));
|
|
594
797
|
}
|
|
595
798
|
|
|
596
799
|
async fromBlob(blob) {
|
|
597
|
-
const tokenizer = strtok3.fromBlob(blob, this.
|
|
800
|
+
const tokenizer = strtok3.fromBlob(blob, this.getTokenizerOptions());
|
|
598
801
|
try {
|
|
599
802
|
return await this.fromTokenizer(tokenizer);
|
|
600
803
|
} finally {
|
|
@@ -603,7 +806,7 @@ export class FileTypeParser {
|
|
|
603
806
|
}
|
|
604
807
|
|
|
605
808
|
async fromStream(stream) {
|
|
606
|
-
const tokenizer = strtok3.fromWebStream(stream, this.
|
|
809
|
+
const tokenizer = strtok3.fromWebStream(stream, this.getTokenizerOptions());
|
|
607
810
|
try {
|
|
608
811
|
return await this.fromTokenizer(tokenizer);
|
|
609
812
|
} finally {
|
|
@@ -753,6 +956,11 @@ export class FileTypeParser {
|
|
|
753
956
|
// -- 3-byte signatures --
|
|
754
957
|
|
|
755
958
|
if (this.check([0xEF, 0xBB, 0xBF])) { // UTF-8-BOM
|
|
959
|
+
if (this.detectionReentryCount >= maximumDetectionReentryCount) {
|
|
960
|
+
return;
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
this.detectionReentryCount++;
|
|
756
964
|
// Strip off UTF-8-BOM
|
|
757
965
|
await this.tokenizer.ignore(3);
|
|
758
966
|
return this.detectConfident(tokenizer);
|
|
@@ -773,10 +981,18 @@ export class FileTypeParser {
|
|
|
773
981
|
}
|
|
774
982
|
|
|
775
983
|
if (this.check([0x1F, 0x8B, 0x8])) {
|
|
984
|
+
if (this.gzipProbeDepth >= maximumNestedGzipProbeDepth) {
|
|
985
|
+
return {
|
|
986
|
+
ext: 'gz',
|
|
987
|
+
mime: 'application/gzip',
|
|
988
|
+
};
|
|
989
|
+
}
|
|
990
|
+
|
|
776
991
|
const gzipHandler = new GzipHandler(tokenizer);
|
|
777
992
|
const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes);
|
|
778
993
|
let compressedFileType;
|
|
779
994
|
try {
|
|
995
|
+
this.gzipProbeDepth++;
|
|
780
996
|
compressedFileType = await this.fromStream(limitedInflatedStream);
|
|
781
997
|
} catch (error) {
|
|
782
998
|
if (error?.name === 'AbortError') {
|
|
@@ -784,6 +1000,8 @@ export class FileTypeParser {
|
|
|
784
1000
|
}
|
|
785
1001
|
|
|
786
1002
|
// Decompression or inner-detection failures are expected for non-tar gzip files.
|
|
1003
|
+
} finally {
|
|
1004
|
+
this.gzipProbeDepth--;
|
|
787
1005
|
}
|
|
788
1006
|
|
|
789
1007
|
// We only need enough inflated bytes to confidently decide whether this is tar.gz.
|
|
@@ -820,7 +1038,10 @@ export class FileTypeParser {
|
|
|
820
1038
|
// Keep ID3 probing bounded for unknown-size streams to avoid attacker-controlled large skips.
|
|
821
1039
|
|| (
|
|
822
1040
|
isUnknownFileSize
|
|
823
|
-
&&
|
|
1041
|
+
&& (
|
|
1042
|
+
id3HeaderLength > maximumId3HeaderSizeInBytes
|
|
1043
|
+
|| (tokenizer.position + id3HeaderLength) > maximumId3HeaderSizeInBytes
|
|
1044
|
+
)
|
|
824
1045
|
)
|
|
825
1046
|
) {
|
|
826
1047
|
return;
|
|
@@ -850,7 +1071,12 @@ export class FileTypeParser {
|
|
|
850
1071
|
throw error;
|
|
851
1072
|
}
|
|
852
1073
|
|
|
853
|
-
|
|
1074
|
+
if (this.detectionReentryCount >= maximumDetectionReentryCount) {
|
|
1075
|
+
return;
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
this.detectionReentryCount++;
|
|
1079
|
+
return this.fromTokenizer(tokenizer, this.detectionReentryCount); // Skip ID3 header, recursion
|
|
854
1080
|
}
|
|
855
1081
|
|
|
856
1082
|
// Musepack, SV7
|
|
@@ -971,7 +1197,7 @@ export class FileTypeParser {
|
|
|
971
1197
|
stop: true,
|
|
972
1198
|
};
|
|
973
1199
|
case 'mimetype':
|
|
974
|
-
if (!canReadZipEntryForDetection(zipHeader)) {
|
|
1200
|
+
if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
|
|
975
1201
|
return {};
|
|
976
1202
|
}
|
|
977
1203
|
|
|
@@ -987,8 +1213,7 @@ export class FileTypeParser {
|
|
|
987
1213
|
case '[Content_Types].xml': {
|
|
988
1214
|
openXmlState.hasContentTypesEntry = true;
|
|
989
1215
|
|
|
990
|
-
|
|
991
|
-
if (!canReadZipEntryForDetection(zipHeader, maximumContentTypesEntrySize)) {
|
|
1216
|
+
if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
|
|
992
1217
|
openXmlState.hasUnparseableContentTypes = true;
|
|
993
1218
|
return {};
|
|
994
1219
|
}
|
|
@@ -1269,6 +1494,11 @@ export class FileTypeParser {
|
|
|
1269
1494
|
return;
|
|
1270
1495
|
}
|
|
1271
1496
|
|
|
1497
|
+
if (hasExceededUnknownSizeScanBudget(tokenizer, ebmlScanStart, maximumUntrustedSkipSizeInBytes)) {
|
|
1498
|
+
return;
|
|
1499
|
+
}
|
|
1500
|
+
|
|
1501
|
+
const previousPosition = tokenizer.position;
|
|
1272
1502
|
const element = await readElement();
|
|
1273
1503
|
|
|
1274
1504
|
if (element.id === 0x42_82) {
|
|
@@ -1298,10 +1528,16 @@ export class FileTypeParser {
|
|
|
1298
1528
|
reason: 'EBML payload',
|
|
1299
1529
|
}); // ignore payload
|
|
1300
1530
|
--children;
|
|
1531
|
+
|
|
1532
|
+
// Safeguard against malformed files: bail if the position did not advance.
|
|
1533
|
+
if (tokenizer.position <= previousPosition) {
|
|
1534
|
+
return;
|
|
1535
|
+
}
|
|
1301
1536
|
}
|
|
1302
1537
|
}
|
|
1303
1538
|
|
|
1304
1539
|
const rootElement = await readElement();
|
|
1540
|
+
const ebmlScanStart = tokenizer.position;
|
|
1305
1541
|
const documentType = await readChildren(rootElement.len);
|
|
1306
1542
|
|
|
1307
1543
|
switch (documentType) {
|
|
@@ -1683,28 +1919,52 @@ export class FileTypeParser {
|
|
|
1683
1919
|
|
|
1684
1920
|
const isUnknownPngStream = hasUnknownFileSize(tokenizer);
|
|
1685
1921
|
const pngScanStart = tokenizer.position;
|
|
1922
|
+
let pngChunkCount = 0;
|
|
1923
|
+
let hasSeenImageHeader = false;
|
|
1686
1924
|
do {
|
|
1687
|
-
|
|
1925
|
+
pngChunkCount++;
|
|
1926
|
+
if (pngChunkCount > maximumPngChunkCount) {
|
|
1688
1927
|
break;
|
|
1689
1928
|
}
|
|
1690
1929
|
|
|
1930
|
+
if (hasExceededUnknownSizeScanBudget(tokenizer, pngScanStart, maximumPngStreamScanBudgetInBytes)) {
|
|
1931
|
+
break;
|
|
1932
|
+
}
|
|
1933
|
+
|
|
1934
|
+
const previousPosition = tokenizer.position;
|
|
1691
1935
|
const chunk = await readChunkHeader();
|
|
1692
1936
|
if (chunk.length < 0) {
|
|
1693
1937
|
return; // Invalid chunk length
|
|
1694
1938
|
}
|
|
1695
1939
|
|
|
1940
|
+
if (chunk.type === 'IHDR') {
|
|
1941
|
+
// PNG requires the first real image header to be a 13-byte IHDR chunk.
|
|
1942
|
+
if (chunk.length !== 13) {
|
|
1943
|
+
return;
|
|
1944
|
+
}
|
|
1945
|
+
|
|
1946
|
+
hasSeenImageHeader = true;
|
|
1947
|
+
}
|
|
1948
|
+
|
|
1696
1949
|
switch (chunk.type) {
|
|
1697
1950
|
case 'IDAT':
|
|
1698
1951
|
return pngFileType;
|
|
1699
1952
|
case 'acTL':
|
|
1700
1953
|
return apngFileType;
|
|
1701
1954
|
default:
|
|
1955
|
+
if (
|
|
1956
|
+
!hasSeenImageHeader
|
|
1957
|
+
&& chunk.type !== 'CgBI'
|
|
1958
|
+
) {
|
|
1959
|
+
return;
|
|
1960
|
+
}
|
|
1961
|
+
|
|
1702
1962
|
if (
|
|
1703
1963
|
isUnknownPngStream
|
|
1704
1964
|
&& chunk.length > maximumPngChunkSizeInBytes
|
|
1705
1965
|
) {
|
|
1706
1966
|
// Avoid huge attacker-controlled skips when probing unknown-size streams.
|
|
1707
|
-
return;
|
|
1967
|
+
return hasSeenImageHeader && isPngAncillaryChunk(chunk.type) ? pngFileType : undefined;
|
|
1708
1968
|
}
|
|
1709
1969
|
|
|
1710
1970
|
try {
|
|
@@ -1726,6 +1986,11 @@ export class FileTypeParser {
|
|
|
1726
1986
|
throw error;
|
|
1727
1987
|
}
|
|
1728
1988
|
}
|
|
1989
|
+
|
|
1990
|
+
// Safeguard against malformed files: bail if the position did not advance.
|
|
1991
|
+
if (tokenizer.position <= previousPosition) {
|
|
1992
|
+
break;
|
|
1993
|
+
}
|
|
1729
1994
|
} while (tokenizer.position + 8 < tokenizer.fileInfo.size);
|
|
1730
1995
|
|
|
1731
1996
|
return pngFileType;
|
|
@@ -1906,7 +2171,13 @@ export class FileTypeParser {
|
|
|
1906
2171
|
});
|
|
1907
2172
|
const isUnknownFileSize = hasUnknownFileSize(tokenizer);
|
|
1908
2173
|
const asfHeaderScanStart = tokenizer.position;
|
|
2174
|
+
let asfHeaderObjectCount = 0;
|
|
1909
2175
|
while (tokenizer.position + 24 < tokenizer.fileInfo.size) {
|
|
2176
|
+
asfHeaderObjectCount++;
|
|
2177
|
+
if (asfHeaderObjectCount > maximumAsfHeaderObjectCount) {
|
|
2178
|
+
break;
|
|
2179
|
+
}
|
|
2180
|
+
|
|
1910
2181
|
if (hasExceededUnknownSizeScanBudget(tokenizer, asfHeaderScanStart, maximumUntrustedSkipSizeInBytes)) {
|
|
1911
2182
|
break;
|
|
1912
2183
|
}
|
|
@@ -1949,8 +2220,16 @@ export class FileTypeParser {
|
|
|
1949
2220
|
break;
|
|
1950
2221
|
}
|
|
1951
2222
|
|
|
2223
|
+
if (
|
|
2224
|
+
isUnknownFileSize
|
|
2225
|
+
&& payload > maximumAsfHeaderPayloadSizeInBytes
|
|
2226
|
+
) {
|
|
2227
|
+
isMalformedAsf = true;
|
|
2228
|
+
break;
|
|
2229
|
+
}
|
|
2230
|
+
|
|
1952
2231
|
await safeIgnore(tokenizer, payload, {
|
|
1953
|
-
maximumLength: isUnknownFileSize ?
|
|
2232
|
+
maximumLength: isUnknownFileSize ? maximumAsfHeaderPayloadSizeInBytes : tokenizer.fileInfo.size,
|
|
1954
2233
|
reason: 'ASF header payload',
|
|
1955
2234
|
});
|
|
1956
2235
|
|
|
@@ -2363,6 +2642,10 @@ export class FileTypeParser {
|
|
|
2363
2642
|
|
|
2364
2643
|
async readTiffIFD(bigEndian) {
|
|
2365
2644
|
const numberOfTags = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
|
|
2645
|
+
if (numberOfTags > maximumTiffTagCount) {
|
|
2646
|
+
return;
|
|
2647
|
+
}
|
|
2648
|
+
|
|
2366
2649
|
if (
|
|
2367
2650
|
hasUnknownFileSize(this.tokenizer)
|
|
2368
2651
|
&& (2 + (numberOfTags * 12)) > maximumTiffIfdOffsetInBytes
|
|
@@ -2412,6 +2695,13 @@ export class FileTypeParser {
|
|
|
2412
2695
|
}
|
|
2413
2696
|
}
|
|
2414
2697
|
|
|
2698
|
+
if (
|
|
2699
|
+
hasUnknownFileSize(this.tokenizer)
|
|
2700
|
+
&& ifdOffset > maximumTiffStreamIfdOffsetInBytes
|
|
2701
|
+
) {
|
|
2702
|
+
return tiffFileType;
|
|
2703
|
+
}
|
|
2704
|
+
|
|
2415
2705
|
const maximumTiffOffset = hasUnknownFileSize(this.tokenizer) ? maximumTiffIfdOffsetInBytes : this.tokenizer.fileInfo.size;
|
|
2416
2706
|
|
|
2417
2707
|
try {
|
package/index.js
CHANGED
|
@@ -4,6 +4,8 @@ Node.js specific entry point.
|
|
|
4
4
|
|
|
5
5
|
import {ReadableStream as WebReadableStream} from 'node:stream/web';
|
|
6
6
|
import {pipeline, PassThrough, Readable} from 'node:stream';
|
|
7
|
+
import fs from 'node:fs/promises';
|
|
8
|
+
import {constants as fileSystemConstants} from 'node:fs';
|
|
7
9
|
import * as strtok3 from 'strtok3';
|
|
8
10
|
import {
|
|
9
11
|
FileTypeParser as DefaultFileTypeParser,
|
|
@@ -27,7 +29,7 @@ function isTokenizerStreamBoundsError(error) {
|
|
|
27
29
|
|
|
28
30
|
export class FileTypeParser extends DefaultFileTypeParser {
|
|
29
31
|
async fromStream(stream) {
|
|
30
|
-
const tokenizer = await (stream instanceof WebReadableStream ? strtok3.fromWebStream(stream, this.
|
|
32
|
+
const tokenizer = await (stream instanceof WebReadableStream ? strtok3.fromWebStream(stream, this.getTokenizerOptions()) : strtok3.fromStream(stream, this.getTokenizerOptions()));
|
|
31
33
|
try {
|
|
32
34
|
return await super.fromTokenizer(tokenizer);
|
|
33
35
|
} catch (error) {
|
|
@@ -42,7 +44,21 @@ export class FileTypeParser extends DefaultFileTypeParser {
|
|
|
42
44
|
}
|
|
43
45
|
|
|
44
46
|
async fromFile(path) {
|
|
45
|
-
|
|
47
|
+
// TODO: Remove this when `strtok3.fromFile()` safely rejects non-regular filesystem objects without a pathname race.
|
|
48
|
+
const fileHandle = await fs.open(path, fileSystemConstants.O_RDONLY | fileSystemConstants.O_NONBLOCK);
|
|
49
|
+
const fileStat = await fileHandle.stat();
|
|
50
|
+
if (!fileStat.isFile()) {
|
|
51
|
+
await fileHandle.close();
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const tokenizer = new strtok3.FileTokenizer(fileHandle, {
|
|
56
|
+
...this.getTokenizerOptions(),
|
|
57
|
+
fileInfo: {
|
|
58
|
+
path,
|
|
59
|
+
size: fileStat.size,
|
|
60
|
+
},
|
|
61
|
+
});
|
|
46
62
|
try {
|
|
47
63
|
return await super.fromTokenizer(tokenizer);
|
|
48
64
|
} finally {
|
package/package.json
CHANGED
package/readme.md
CHANGED
|
@@ -10,17 +10,21 @@ This package is for detecting binary-based file formats, not text-based formats
|
|
|
10
10
|
|
|
11
11
|
We accept contributions for commonly used modern file formats, not historical or obscure ones. Open an issue first for discussion.
|
|
12
12
|
|
|
13
|
+
> [!IMPORTANT]
|
|
14
|
+
> NO SECURITY REPORTS WILL BE ACCEPTED RIGHT NOW. I'm currently hardening the parser and all the low-quality AI-generated security reports is just a huge waste of time.
|
|
15
|
+
|
|
13
16
|
## Install
|
|
14
17
|
|
|
15
18
|
```sh
|
|
16
19
|
npm install file-type
|
|
17
20
|
```
|
|
18
21
|
|
|
19
|
-
**This package is an ESM package. Your project needs to be ESM too. [Read more](https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c). For TypeScript + CommonJS, see [`load-esm`](https://github.com/Borewit/load-esm).**
|
|
20
|
-
|
|
21
|
-
If you use it with Webpack, you need the latest Webpack version and ensure you configure it correctly for ESM.
|
|
22
|
+
**This package is an ESM package. Your project needs to be ESM too. [Read more](https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c). For TypeScript + CommonJS, see [`load-esm`](https://github.com/Borewit/load-esm).** If you use it with Webpack, you need the latest Webpack version and ensure you configure it correctly for ESM.
|
|
22
23
|
|
|
23
|
-
|
|
24
|
+
> [!IMPORTANT]
|
|
25
|
+
> File type detection is based on binary signatures (magic numbers) and is a best-effort hint. It does not guarantee the file is actually of that type or that the file is valid/not malformed.
|
|
26
|
+
>
|
|
27
|
+
> Robustness against malformed input is best-effort. When processing untrusted files on a server, enforce a reasonable file size limit and use a worker thread with a timeout (e.g., [`make-asynchronous`](https://github.com/sindresorhus/make-asynchronous)). These are not considered security issues in this package.
|
|
24
28
|
|
|
25
29
|
## Usage
|
|
26
30
|
|