file-type 21.3.0 → 21.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core.js +925 -148
- package/index.js +27 -3
- package/package.json +4 -4
- package/readme.md +9 -7
package/core.js
CHANGED
|
@@ -14,6 +14,321 @@ import {
|
|
|
14
14
|
import {extensions, mimeTypes} from './supported.js';
|
|
15
15
|
|
|
16
16
|
export const reasonableDetectionSizeInBytes = 4100; // A fair amount of file-types are detectable within this range.
|
|
17
|
+
// Keep defensive limits small enough to avoid accidental memory spikes from untrusted inputs.
|
|
18
|
+
const maximumMpegOffsetTolerance = reasonableDetectionSizeInBytes - 2;
|
|
19
|
+
const maximumZipEntrySizeInBytes = 1024 * 1024;
|
|
20
|
+
const maximumZipEntryCount = 1024;
|
|
21
|
+
const maximumZipBufferedReadSizeInBytes = (2 ** 31) - 1;
|
|
22
|
+
const maximumUntrustedSkipSizeInBytes = 16 * 1024 * 1024;
|
|
23
|
+
const maximumZipTextEntrySizeInBytes = maximumZipEntrySizeInBytes;
|
|
24
|
+
const maximumNestedGzipDetectionSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
25
|
+
const maximumNestedGzipProbeDepth = 1;
|
|
26
|
+
const maximumId3HeaderSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
27
|
+
const maximumEbmlDocumentTypeSizeInBytes = 64;
|
|
28
|
+
const maximumEbmlElementPayloadSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
29
|
+
const maximumEbmlElementCount = 256;
|
|
30
|
+
const maximumPngChunkCount = 512;
|
|
31
|
+
const maximumAsfHeaderObjectCount = 512;
|
|
32
|
+
const maximumTiffTagCount = 512;
|
|
33
|
+
const maximumDetectionReentryCount = 256;
|
|
34
|
+
const maximumPngChunkSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
35
|
+
const maximumTiffIfdOffsetInBytes = maximumUntrustedSkipSizeInBytes;
|
|
36
|
+
const recoverableZipErrorMessages = new Set([
|
|
37
|
+
'Unexpected signature',
|
|
38
|
+
'Encrypted ZIP',
|
|
39
|
+
'Expected Central-File-Header signature',
|
|
40
|
+
]);
|
|
41
|
+
const recoverableZipErrorMessagePrefixes = [
|
|
42
|
+
'ZIP entry count exceeds ',
|
|
43
|
+
'Unsupported ZIP compression method:',
|
|
44
|
+
'ZIP entry compressed data exceeds ',
|
|
45
|
+
'ZIP entry decompressed data exceeds ',
|
|
46
|
+
];
|
|
47
|
+
const recoverableZipErrorCodes = new Set([
|
|
48
|
+
'Z_BUF_ERROR',
|
|
49
|
+
'Z_DATA_ERROR',
|
|
50
|
+
'ERR_INVALID_STATE',
|
|
51
|
+
]);
|
|
52
|
+
|
|
53
|
+
class ParserHardLimitError extends Error {}
|
|
54
|
+
|
|
55
|
+
function getSafeBound(value, maximum, reason) {
|
|
56
|
+
if (
|
|
57
|
+
!Number.isFinite(value)
|
|
58
|
+
|| value < 0
|
|
59
|
+
|| value > maximum
|
|
60
|
+
) {
|
|
61
|
+
throw new ParserHardLimitError(`${reason} has invalid size ${value} (maximum ${maximum} bytes)`);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return value;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async function safeIgnore(tokenizer, length, {maximumLength = maximumUntrustedSkipSizeInBytes, reason = 'skip'} = {}) {
|
|
68
|
+
const safeLength = getSafeBound(length, maximumLength, reason);
|
|
69
|
+
await tokenizer.ignore(safeLength);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async function safeReadBuffer(tokenizer, buffer, options, {maximumLength = buffer.length, reason = 'read'} = {}) {
|
|
73
|
+
const length = options?.length ?? buffer.length;
|
|
74
|
+
const safeLength = getSafeBound(length, maximumLength, reason);
|
|
75
|
+
return tokenizer.readBuffer(buffer, {
|
|
76
|
+
...options,
|
|
77
|
+
length: safeLength,
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
async function decompressDeflateRawWithLimit(data, {maximumLength = maximumZipEntrySizeInBytes} = {}) {
|
|
82
|
+
const input = new ReadableStream({
|
|
83
|
+
start(controller) {
|
|
84
|
+
controller.enqueue(data);
|
|
85
|
+
controller.close();
|
|
86
|
+
},
|
|
87
|
+
});
|
|
88
|
+
const output = input.pipeThrough(new DecompressionStream('deflate-raw'));
|
|
89
|
+
const reader = output.getReader();
|
|
90
|
+
const chunks = [];
|
|
91
|
+
let totalLength = 0;
|
|
92
|
+
|
|
93
|
+
try {
|
|
94
|
+
for (;;) {
|
|
95
|
+
const {done, value} = await reader.read();
|
|
96
|
+
if (done) {
|
|
97
|
+
break;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
totalLength += value.length;
|
|
101
|
+
if (totalLength > maximumLength) {
|
|
102
|
+
await reader.cancel();
|
|
103
|
+
throw new Error(`ZIP entry decompressed data exceeds ${maximumLength} bytes`);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
chunks.push(value);
|
|
107
|
+
}
|
|
108
|
+
} finally {
|
|
109
|
+
reader.releaseLock();
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const uncompressedData = new Uint8Array(totalLength);
|
|
113
|
+
let offset = 0;
|
|
114
|
+
for (const chunk of chunks) {
|
|
115
|
+
uncompressedData.set(chunk, offset);
|
|
116
|
+
offset += chunk.length;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return uncompressedData;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const zipDataDescriptorSignature = 0x08_07_4B_50;
|
|
123
|
+
const zipDataDescriptorLengthInBytes = 16;
|
|
124
|
+
const zipDataDescriptorOverlapLengthInBytes = zipDataDescriptorLengthInBytes - 1;
|
|
125
|
+
|
|
126
|
+
function findZipDataDescriptorOffset(buffer, bytesConsumed) {
|
|
127
|
+
if (buffer.length < zipDataDescriptorLengthInBytes) {
|
|
128
|
+
return -1;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const lastPossibleDescriptorOffset = buffer.length - zipDataDescriptorLengthInBytes;
|
|
132
|
+
for (let index = 0; index <= lastPossibleDescriptorOffset; index++) {
|
|
133
|
+
if (
|
|
134
|
+
Token.UINT32_LE.get(buffer, index) === zipDataDescriptorSignature
|
|
135
|
+
&& Token.UINT32_LE.get(buffer, index + 8) === bytesConsumed + index
|
|
136
|
+
) {
|
|
137
|
+
return index;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return -1;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function mergeByteChunks(chunks, totalLength) {
|
|
145
|
+
const merged = new Uint8Array(totalLength);
|
|
146
|
+
let offset = 0;
|
|
147
|
+
|
|
148
|
+
for (const chunk of chunks) {
|
|
149
|
+
merged.set(chunk, offset);
|
|
150
|
+
offset += chunk.length;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
return merged;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
async function readZipDataDescriptorEntryWithLimit(zipHandler, {shouldBuffer, maximumLength = maximumZipEntrySizeInBytes} = {}) {
|
|
157
|
+
const {syncBuffer} = zipHandler;
|
|
158
|
+
const {length: syncBufferLength} = syncBuffer;
|
|
159
|
+
const chunks = [];
|
|
160
|
+
let bytesConsumed = 0;
|
|
161
|
+
|
|
162
|
+
for (;;) {
|
|
163
|
+
const length = await zipHandler.tokenizer.peekBuffer(syncBuffer, {mayBeLess: true});
|
|
164
|
+
const dataDescriptorOffset = findZipDataDescriptorOffset(syncBuffer.subarray(0, length), bytesConsumed);
|
|
165
|
+
const retainedLength = dataDescriptorOffset >= 0
|
|
166
|
+
? 0
|
|
167
|
+
: (
|
|
168
|
+
length === syncBufferLength
|
|
169
|
+
? Math.min(zipDataDescriptorOverlapLengthInBytes, length - 1)
|
|
170
|
+
: 0
|
|
171
|
+
);
|
|
172
|
+
const chunkLength = dataDescriptorOffset >= 0 ? dataDescriptorOffset : length - retainedLength;
|
|
173
|
+
|
|
174
|
+
if (chunkLength === 0) {
|
|
175
|
+
break;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
bytesConsumed += chunkLength;
|
|
179
|
+
if (bytesConsumed > maximumLength) {
|
|
180
|
+
throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
if (shouldBuffer) {
|
|
184
|
+
const data = new Uint8Array(chunkLength);
|
|
185
|
+
await zipHandler.tokenizer.readBuffer(data);
|
|
186
|
+
chunks.push(data);
|
|
187
|
+
} else {
|
|
188
|
+
await zipHandler.tokenizer.ignore(chunkLength);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
if (dataDescriptorOffset >= 0) {
|
|
192
|
+
break;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
if (!shouldBuffer) {
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
return mergeByteChunks(chunks, bytesConsumed);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
async function readZipEntryData(zipHandler, zipHeader, {shouldBuffer} = {}) {
|
|
204
|
+
if (
|
|
205
|
+
zipHeader.dataDescriptor
|
|
206
|
+
&& zipHeader.compressedSize === 0
|
|
207
|
+
) {
|
|
208
|
+
return readZipDataDescriptorEntryWithLimit(zipHandler, {shouldBuffer});
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if (!shouldBuffer) {
|
|
212
|
+
await zipHandler.tokenizer.ignore(zipHeader.compressedSize);
|
|
213
|
+
return;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const maximumLength = getMaximumZipBufferedReadLength(zipHandler.tokenizer);
|
|
217
|
+
if (
|
|
218
|
+
!Number.isFinite(zipHeader.compressedSize)
|
|
219
|
+
|| zipHeader.compressedSize < 0
|
|
220
|
+
|| zipHeader.compressedSize > maximumLength
|
|
221
|
+
) {
|
|
222
|
+
throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
const fileData = new Uint8Array(zipHeader.compressedSize);
|
|
226
|
+
await zipHandler.tokenizer.readBuffer(fileData);
|
|
227
|
+
return fileData;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Override the default inflate to enforce decompression size limits, since @tokenizer/inflate does not expose a configuration hook for this.
|
|
231
|
+
ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
|
|
232
|
+
if (zipHeader.compressedMethod === 0) {
|
|
233
|
+
return callback(fileData);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
if (zipHeader.compressedMethod !== 8) {
|
|
237
|
+
throw new Error(`Unsupported ZIP compression method: ${zipHeader.compressedMethod}`);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
const uncompressedData = await decompressDeflateRawWithLimit(fileData, {maximumLength: maximumZipEntrySizeInBytes});
|
|
241
|
+
return callback(uncompressedData);
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
ZipHandler.prototype.unzip = async function (fileCallback) {
|
|
245
|
+
let stop = false;
|
|
246
|
+
let zipEntryCount = 0;
|
|
247
|
+
do {
|
|
248
|
+
const zipHeader = await this.readLocalFileHeader();
|
|
249
|
+
if (!zipHeader) {
|
|
250
|
+
break;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
zipEntryCount++;
|
|
254
|
+
if (zipEntryCount > maximumZipEntryCount) {
|
|
255
|
+
throw new Error(`ZIP entry count exceeds ${maximumZipEntryCount}`);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const next = fileCallback(zipHeader);
|
|
259
|
+
stop = Boolean(next.stop);
|
|
260
|
+
await this.tokenizer.ignore(zipHeader.extraFieldLength);
|
|
261
|
+
const fileData = await readZipEntryData(this, zipHeader, {
|
|
262
|
+
shouldBuffer: Boolean(next.handler),
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
if (next.handler) {
|
|
266
|
+
await this.inflate(zipHeader, fileData, next.handler);
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
if (zipHeader.dataDescriptor) {
|
|
270
|
+
const dataDescriptor = new Uint8Array(zipDataDescriptorLengthInBytes);
|
|
271
|
+
await this.tokenizer.readBuffer(dataDescriptor);
|
|
272
|
+
if (Token.UINT32_LE.get(dataDescriptor, 0) !== zipDataDescriptorSignature) {
|
|
273
|
+
throw new Error(`Expected data-descriptor-signature at position ${this.tokenizer.position - dataDescriptor.length}`);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
} while (!stop);
|
|
277
|
+
};
|
|
278
|
+
|
|
279
|
+
function createByteLimitedReadableStream(stream, maximumBytes) {
|
|
280
|
+
const reader = stream.getReader();
|
|
281
|
+
let emittedBytes = 0;
|
|
282
|
+
let sourceDone = false;
|
|
283
|
+
let sourceCanceled = false;
|
|
284
|
+
|
|
285
|
+
const cancelSource = async reason => {
|
|
286
|
+
if (
|
|
287
|
+
sourceDone
|
|
288
|
+
|| sourceCanceled
|
|
289
|
+
) {
|
|
290
|
+
return;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
sourceCanceled = true;
|
|
294
|
+
await reader.cancel(reason);
|
|
295
|
+
};
|
|
296
|
+
|
|
297
|
+
return new ReadableStream({
|
|
298
|
+
async pull(controller) {
|
|
299
|
+
if (emittedBytes >= maximumBytes) {
|
|
300
|
+
controller.close();
|
|
301
|
+
await cancelSource();
|
|
302
|
+
return;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
const {done, value} = await reader.read();
|
|
306
|
+
if (
|
|
307
|
+
done
|
|
308
|
+
|| !value
|
|
309
|
+
) {
|
|
310
|
+
sourceDone = true;
|
|
311
|
+
controller.close();
|
|
312
|
+
return;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
const remainingBytes = maximumBytes - emittedBytes;
|
|
316
|
+
if (value.length > remainingBytes) {
|
|
317
|
+
controller.enqueue(value.subarray(0, remainingBytes));
|
|
318
|
+
emittedBytes += remainingBytes;
|
|
319
|
+
controller.close();
|
|
320
|
+
await cancelSource();
|
|
321
|
+
return;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
controller.enqueue(value);
|
|
325
|
+
emittedBytes += value.length;
|
|
326
|
+
},
|
|
327
|
+
async cancel(reason) {
|
|
328
|
+
await cancelSource(reason);
|
|
329
|
+
},
|
|
330
|
+
});
|
|
331
|
+
}
|
|
17
332
|
|
|
18
333
|
export async function fileTypeFromStream(stream, options) {
|
|
19
334
|
return new FileTypeParser(options).fromStream(stream);
|
|
@@ -180,6 +495,195 @@ function _check(buffer, headers, options) {
|
|
|
180
495
|
return true;
|
|
181
496
|
}
|
|
182
497
|
|
|
498
|
+
export function normalizeSampleSize(sampleSize) {
|
|
499
|
+
// Accept odd caller input, but preserve valid caller-requested probe depth.
|
|
500
|
+
if (!Number.isFinite(sampleSize)) {
|
|
501
|
+
return reasonableDetectionSizeInBytes;
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
return Math.max(1, Math.trunc(sampleSize));
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
function normalizeMpegOffsetTolerance(mpegOffsetTolerance) {
|
|
508
|
+
// This value controls scan depth and therefore worst-case CPU work.
|
|
509
|
+
if (!Number.isFinite(mpegOffsetTolerance)) {
|
|
510
|
+
return 0;
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
return Math.max(0, Math.min(maximumMpegOffsetTolerance, Math.trunc(mpegOffsetTolerance)));
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
function getKnownFileSizeOrMaximum(fileSize) {
|
|
517
|
+
if (!Number.isFinite(fileSize)) {
|
|
518
|
+
return Number.MAX_SAFE_INTEGER;
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
return Math.max(0, fileSize);
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
function hasUnknownFileSize(tokenizer) {
|
|
525
|
+
const fileSize = tokenizer.fileInfo.size;
|
|
526
|
+
return (
|
|
527
|
+
!Number.isFinite(fileSize)
|
|
528
|
+
|| fileSize === Number.MAX_SAFE_INTEGER
|
|
529
|
+
);
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
function hasExceededUnknownSizeScanBudget(tokenizer, startOffset, maximumBytes) {
|
|
533
|
+
return (
|
|
534
|
+
hasUnknownFileSize(tokenizer)
|
|
535
|
+
&& tokenizer.position - startOffset > maximumBytes
|
|
536
|
+
);
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
function getMaximumZipBufferedReadLength(tokenizer) {
|
|
540
|
+
const fileSize = tokenizer.fileInfo.size;
|
|
541
|
+
const remainingBytes = Number.isFinite(fileSize)
|
|
542
|
+
? Math.max(0, fileSize - tokenizer.position)
|
|
543
|
+
: Number.MAX_SAFE_INTEGER;
|
|
544
|
+
|
|
545
|
+
return Math.min(remainingBytes, maximumZipBufferedReadSizeInBytes);
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
function isRecoverableZipError(error) {
|
|
549
|
+
if (error instanceof strtok3.EndOfStreamError) {
|
|
550
|
+
return true;
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
if (error instanceof ParserHardLimitError) {
|
|
554
|
+
return true;
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
if (!(error instanceof Error)) {
|
|
558
|
+
return false;
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
if (recoverableZipErrorMessages.has(error.message)) {
|
|
562
|
+
return true;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
if (recoverableZipErrorCodes.has(error.code)) {
|
|
566
|
+
return true;
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
for (const prefix of recoverableZipErrorMessagePrefixes) {
|
|
570
|
+
if (error.message.startsWith(prefix)) {
|
|
571
|
+
return true;
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
return false;
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
function canReadZipEntryForDetection(zipHeader, maximumSize = maximumZipEntrySizeInBytes) {
|
|
579
|
+
const sizes = [zipHeader.compressedSize, zipHeader.uncompressedSize];
|
|
580
|
+
for (const size of sizes) {
|
|
581
|
+
if (
|
|
582
|
+
!Number.isFinite(size)
|
|
583
|
+
|| size < 0
|
|
584
|
+
|| size > maximumSize
|
|
585
|
+
) {
|
|
586
|
+
return false;
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
return true;
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
function createOpenXmlZipDetectionState() {
|
|
594
|
+
return {
|
|
595
|
+
hasContentTypesEntry: false,
|
|
596
|
+
hasParsedContentTypesEntry: false,
|
|
597
|
+
isParsingContentTypes: false,
|
|
598
|
+
hasUnparseableContentTypes: false,
|
|
599
|
+
hasWordDirectory: false,
|
|
600
|
+
hasPresentationDirectory: false,
|
|
601
|
+
hasSpreadsheetDirectory: false,
|
|
602
|
+
hasThreeDimensionalModelEntry: false,
|
|
603
|
+
};
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
function updateOpenXmlZipDetectionStateFromFilename(openXmlState, filename) {
|
|
607
|
+
if (filename.startsWith('word/')) {
|
|
608
|
+
openXmlState.hasWordDirectory = true;
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
if (filename.startsWith('ppt/')) {
|
|
612
|
+
openXmlState.hasPresentationDirectory = true;
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
if (filename.startsWith('xl/')) {
|
|
616
|
+
openXmlState.hasSpreadsheetDirectory = true;
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
if (
|
|
620
|
+
filename.startsWith('3D/')
|
|
621
|
+
&& filename.endsWith('.model')
|
|
622
|
+
) {
|
|
623
|
+
openXmlState.hasThreeDimensionalModelEntry = true;
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
function getOpenXmlFileTypeFromZipEntries(openXmlState) {
|
|
628
|
+
// Only use directory-name heuristic when [Content_Types].xml was present in the archive
|
|
629
|
+
// but its handler was skipped (not invoked, not currently running, and not already resolved).
|
|
630
|
+
// This avoids guessing from directory names when content-type parsing already gave a definitive answer or failed.
|
|
631
|
+
if (
|
|
632
|
+
!openXmlState.hasContentTypesEntry
|
|
633
|
+
|| openXmlState.hasUnparseableContentTypes
|
|
634
|
+
|| openXmlState.isParsingContentTypes
|
|
635
|
+
|| openXmlState.hasParsedContentTypesEntry
|
|
636
|
+
) {
|
|
637
|
+
return;
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
if (openXmlState.hasWordDirectory) {
|
|
641
|
+
return {
|
|
642
|
+
ext: 'docx',
|
|
643
|
+
mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
644
|
+
};
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
if (openXmlState.hasPresentationDirectory) {
|
|
648
|
+
return {
|
|
649
|
+
ext: 'pptx',
|
|
650
|
+
mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
651
|
+
};
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
if (openXmlState.hasSpreadsheetDirectory) {
|
|
655
|
+
return {
|
|
656
|
+
ext: 'xlsx',
|
|
657
|
+
mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
658
|
+
};
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
if (openXmlState.hasThreeDimensionalModelEntry) {
|
|
662
|
+
return {
|
|
663
|
+
ext: '3mf',
|
|
664
|
+
mime: 'model/3mf',
|
|
665
|
+
};
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
function getOpenXmlMimeTypeFromContentTypesXml(xmlContent) {
|
|
670
|
+
// We only need the `ContentType="...main+xml"` value, so a small string scan is enough and avoids full XML parsing.
|
|
671
|
+
const endPosition = xmlContent.indexOf('.main+xml"');
|
|
672
|
+
if (endPosition === -1) {
|
|
673
|
+
const mimeType = 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml';
|
|
674
|
+
if (xmlContent.includes(`ContentType="${mimeType}"`)) {
|
|
675
|
+
return mimeType;
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
return;
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
const truncatedContent = xmlContent.slice(0, endPosition);
|
|
682
|
+
const firstQuotePosition = truncatedContent.lastIndexOf('"');
|
|
683
|
+
// If no quote is found, `lastIndexOf` returns -1 and this intentionally falls back to the full truncated prefix.
|
|
684
|
+
return truncatedContent.slice(firstQuotePosition + 1);
|
|
685
|
+
}
|
|
686
|
+
|
|
183
687
|
export async function fileTypeFromTokenizer(tokenizer, options) {
|
|
184
688
|
return new FileTypeParser(options).fromTokenizer(tokenizer);
|
|
185
689
|
}
|
|
@@ -190,25 +694,47 @@ export async function fileTypeStream(webStream, options) {
|
|
|
190
694
|
|
|
191
695
|
export class FileTypeParser {
|
|
192
696
|
constructor(options) {
|
|
697
|
+
const normalizedMpegOffsetTolerance = normalizeMpegOffsetTolerance(options?.mpegOffsetTolerance);
|
|
193
698
|
this.options = {
|
|
194
|
-
mpegOffsetTolerance: 0,
|
|
195
699
|
...options,
|
|
700
|
+
mpegOffsetTolerance: normalizedMpegOffsetTolerance,
|
|
196
701
|
};
|
|
197
702
|
|
|
198
|
-
this.detectors = [...(options
|
|
703
|
+
this.detectors = [...(this.options.customDetectors ?? []),
|
|
199
704
|
{id: 'core', detect: this.detectConfident},
|
|
200
705
|
{id: 'core.imprecise', detect: this.detectImprecise}];
|
|
201
706
|
this.tokenizerOptions = {
|
|
202
|
-
abortSignal: options
|
|
707
|
+
abortSignal: this.options.signal,
|
|
203
708
|
};
|
|
709
|
+
this.gzipProbeDepth = 0;
|
|
204
710
|
}
|
|
205
711
|
|
|
206
|
-
|
|
207
|
-
|
|
712
|
+
getTokenizerOptions() {
|
|
713
|
+
return {
|
|
714
|
+
...this.tokenizerOptions,
|
|
715
|
+
};
|
|
716
|
+
}
|
|
208
717
|
|
|
718
|
+
async fromTokenizer(tokenizer, detectionReentryCount = 0) {
|
|
719
|
+
this.detectionReentryCount = detectionReentryCount;
|
|
720
|
+
const initialPosition = tokenizer.position;
|
|
209
721
|
// Iterate through all file-type detectors
|
|
210
722
|
for (const detector of this.detectors) {
|
|
211
|
-
|
|
723
|
+
let fileType;
|
|
724
|
+
try {
|
|
725
|
+
fileType = await detector.detect(tokenizer);
|
|
726
|
+
} catch (error) {
|
|
727
|
+
if (error instanceof strtok3.EndOfStreamError) {
|
|
728
|
+
return;
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
if (error instanceof ParserHardLimitError) {
|
|
732
|
+
return;
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
throw error;
|
|
736
|
+
}
|
|
737
|
+
|
|
212
738
|
if (fileType) {
|
|
213
739
|
return fileType;
|
|
214
740
|
}
|
|
@@ -230,11 +756,11 @@ export class FileTypeParser {
|
|
|
230
756
|
return;
|
|
231
757
|
}
|
|
232
758
|
|
|
233
|
-
return this.fromTokenizer(strtok3.fromBuffer(buffer, this.
|
|
759
|
+
return this.fromTokenizer(strtok3.fromBuffer(buffer, this.getTokenizerOptions()));
|
|
234
760
|
}
|
|
235
761
|
|
|
236
762
|
async fromBlob(blob) {
|
|
237
|
-
const tokenizer = strtok3.fromBlob(blob, this.
|
|
763
|
+
const tokenizer = strtok3.fromBlob(blob, this.getTokenizerOptions());
|
|
238
764
|
try {
|
|
239
765
|
return await this.fromTokenizer(tokenizer);
|
|
240
766
|
} finally {
|
|
@@ -243,7 +769,7 @@ export class FileTypeParser {
|
|
|
243
769
|
}
|
|
244
770
|
|
|
245
771
|
async fromStream(stream) {
|
|
246
|
-
const tokenizer = strtok3.fromWebStream(stream, this.
|
|
772
|
+
const tokenizer = strtok3.fromWebStream(stream, this.getTokenizerOptions());
|
|
247
773
|
try {
|
|
248
774
|
return await this.fromTokenizer(tokenizer);
|
|
249
775
|
} finally {
|
|
@@ -252,7 +778,7 @@ export class FileTypeParser {
|
|
|
252
778
|
}
|
|
253
779
|
|
|
254
780
|
async toDetectionStream(stream, options) {
|
|
255
|
-
const
|
|
781
|
+
const sampleSize = normalizeSampleSize(options?.sampleSize ?? reasonableDetectionSizeInBytes);
|
|
256
782
|
let detectedFileType;
|
|
257
783
|
let firstChunk;
|
|
258
784
|
|
|
@@ -393,8 +919,13 @@ export class FileTypeParser {
|
|
|
393
919
|
// -- 3-byte signatures --
|
|
394
920
|
|
|
395
921
|
if (this.check([0xEF, 0xBB, 0xBF])) { // UTF-8-BOM
|
|
922
|
+
if (this.detectionReentryCount >= maximumDetectionReentryCount) {
|
|
923
|
+
return;
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
this.detectionReentryCount++;
|
|
396
927
|
// Strip off UTF-8-BOM
|
|
397
|
-
this.tokenizer.ignore(3);
|
|
928
|
+
await this.tokenizer.ignore(3);
|
|
398
929
|
return this.detectConfident(tokenizer);
|
|
399
930
|
}
|
|
400
931
|
|
|
@@ -413,28 +944,35 @@ export class FileTypeParser {
|
|
|
413
944
|
}
|
|
414
945
|
|
|
415
946
|
if (this.check([0x1F, 0x8B, 0x8])) {
|
|
416
|
-
|
|
947
|
+
if (this.gzipProbeDepth >= maximumNestedGzipProbeDepth) {
|
|
948
|
+
return {
|
|
949
|
+
ext: 'gz',
|
|
950
|
+
mime: 'application/gzip',
|
|
951
|
+
};
|
|
952
|
+
}
|
|
417
953
|
|
|
418
|
-
const
|
|
419
|
-
|
|
954
|
+
const gzipHandler = new GzipHandler(tokenizer);
|
|
955
|
+
const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes);
|
|
956
|
+
let compressedFileType;
|
|
420
957
|
try {
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
958
|
+
this.gzipProbeDepth++;
|
|
959
|
+
compressedFileType = await this.fromStream(limitedInflatedStream);
|
|
960
|
+
} catch (error) {
|
|
961
|
+
if (error?.name === 'AbortError') {
|
|
962
|
+
throw error;
|
|
426
963
|
}
|
|
427
964
|
|
|
428
|
-
|
|
429
|
-
return {
|
|
430
|
-
ext: 'tar.gz',
|
|
431
|
-
mime: 'application/gzip',
|
|
432
|
-
};
|
|
433
|
-
}
|
|
965
|
+
// Decompression or inner-detection failures are expected for non-tar gzip files.
|
|
434
966
|
} finally {
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
967
|
+
this.gzipProbeDepth--;
|
|
968
|
+
}
|
|
969
|
+
|
|
970
|
+
// We only need enough inflated bytes to confidently decide whether this is tar.gz.
|
|
971
|
+
if (compressedFileType?.ext === 'tar') {
|
|
972
|
+
return {
|
|
973
|
+
ext: 'tar.gz',
|
|
974
|
+
mime: 'application/gzip',
|
|
975
|
+
};
|
|
438
976
|
}
|
|
439
977
|
|
|
440
978
|
return {
|
|
@@ -451,18 +989,54 @@ export class FileTypeParser {
|
|
|
451
989
|
}
|
|
452
990
|
|
|
453
991
|
if (this.checkString('ID3')) {
|
|
454
|
-
await tokenizer
|
|
992
|
+
await safeIgnore(tokenizer, 6, {
|
|
993
|
+
maximumLength: 6,
|
|
994
|
+
reason: 'ID3 header prefix',
|
|
995
|
+
}); // Skip ID3 header until the header size
|
|
455
996
|
const id3HeaderLength = await tokenizer.readToken(uint32SyncSafeToken);
|
|
997
|
+
const isUnknownFileSize = hasUnknownFileSize(tokenizer);
|
|
998
|
+
if (
|
|
999
|
+
!Number.isFinite(id3HeaderLength)
|
|
1000
|
+
|| id3HeaderLength < 0
|
|
1001
|
+
// Keep ID3 probing bounded for unknown-size streams to avoid attacker-controlled large skips.
|
|
1002
|
+
|| (
|
|
1003
|
+
isUnknownFileSize
|
|
1004
|
+
&& id3HeaderLength > maximumId3HeaderSizeInBytes
|
|
1005
|
+
)
|
|
1006
|
+
) {
|
|
1007
|
+
return;
|
|
1008
|
+
}
|
|
1009
|
+
|
|
456
1010
|
if (tokenizer.position + id3HeaderLength > tokenizer.fileInfo.size) {
|
|
457
|
-
|
|
1011
|
+
if (isUnknownFileSize) {
|
|
1012
|
+
return;
|
|
1013
|
+
}
|
|
1014
|
+
|
|
458
1015
|
return {
|
|
459
1016
|
ext: 'mp3',
|
|
460
1017
|
mime: 'audio/mpeg',
|
|
461
1018
|
};
|
|
462
1019
|
}
|
|
463
1020
|
|
|
464
|
-
|
|
465
|
-
|
|
1021
|
+
try {
|
|
1022
|
+
await safeIgnore(tokenizer, id3HeaderLength, {
|
|
1023
|
+
maximumLength: isUnknownFileSize ? maximumId3HeaderSizeInBytes : tokenizer.fileInfo.size,
|
|
1024
|
+
reason: 'ID3 payload',
|
|
1025
|
+
});
|
|
1026
|
+
} catch (error) {
|
|
1027
|
+
if (error instanceof strtok3.EndOfStreamError) {
|
|
1028
|
+
return;
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
throw error;
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
if (this.detectionReentryCount >= maximumDetectionReentryCount) {
|
|
1035
|
+
return;
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
this.detectionReentryCount++;
|
|
1039
|
+
return this.fromTokenizer(tokenizer, this.detectionReentryCount); // Skip ID3 header, recursion
|
|
466
1040
|
}
|
|
467
1041
|
|
|
468
1042
|
// Musepack, SV7
|
|
@@ -547,72 +1121,104 @@ export class FileTypeParser {
|
|
|
547
1121
|
// Need to be before the `zip` check
|
|
548
1122
|
if (this.check([0x50, 0x4B, 0x3, 0x4])) { // Local file header signature
|
|
549
1123
|
let fileType;
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
mime: 'application/java-archive',
|
|
564
|
-
};
|
|
565
|
-
return {
|
|
566
|
-
stop: true,
|
|
567
|
-
};
|
|
568
|
-
case 'mimetype':
|
|
1124
|
+
const openXmlState = createOpenXmlZipDetectionState();
|
|
1125
|
+
|
|
1126
|
+
try {
|
|
1127
|
+
await new ZipHandler(tokenizer).unzip(zipHeader => {
|
|
1128
|
+
updateOpenXmlZipDetectionStateFromFilename(openXmlState, zipHeader.filename);
|
|
1129
|
+
|
|
1130
|
+
const isOpenXmlContentTypesEntry = zipHeader.filename === '[Content_Types].xml';
|
|
1131
|
+
const openXmlFileTypeFromEntries = getOpenXmlFileTypeFromZipEntries(openXmlState);
|
|
1132
|
+
if (
|
|
1133
|
+
!isOpenXmlContentTypesEntry
|
|
1134
|
+
&& openXmlFileTypeFromEntries
|
|
1135
|
+
) {
|
|
1136
|
+
fileType = openXmlFileTypeFromEntries;
|
|
569
1137
|
return {
|
|
570
|
-
async handler(fileData) {
|
|
571
|
-
// Use TextDecoder to decode the UTF-8 encoded data
|
|
572
|
-
const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
|
|
573
|
-
fileType = getFileTypeFromMimeType(mimeType);
|
|
574
|
-
},
|
|
575
1138
|
stop: true,
|
|
576
1139
|
};
|
|
1140
|
+
}
|
|
577
1141
|
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
1142
|
+
switch (zipHeader.filename) {
|
|
1143
|
+
case 'META-INF/mozilla.rsa':
|
|
1144
|
+
fileType = {
|
|
1145
|
+
ext: 'xpi',
|
|
1146
|
+
mime: 'application/x-xpinstall',
|
|
1147
|
+
};
|
|
1148
|
+
return {
|
|
1149
|
+
stop: true,
|
|
1150
|
+
};
|
|
1151
|
+
case 'META-INF/MANIFEST.MF':
|
|
1152
|
+
fileType = {
|
|
1153
|
+
ext: 'jar',
|
|
1154
|
+
mime: 'application/java-archive',
|
|
1155
|
+
};
|
|
1156
|
+
return {
|
|
1157
|
+
stop: true,
|
|
1158
|
+
};
|
|
1159
|
+
case 'mimetype':
|
|
1160
|
+
if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
|
|
1161
|
+
return {};
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1164
|
+
return {
|
|
1165
|
+
async handler(fileData) {
|
|
1166
|
+
// Use TextDecoder to decode the UTF-8 encoded data
|
|
1167
|
+
const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
|
|
1168
|
+
fileType = getFileTypeFromMimeType(mimeType);
|
|
1169
|
+
},
|
|
1170
|
+
stop: true,
|
|
1171
|
+
};
|
|
1172
|
+
|
|
1173
|
+
case '[Content_Types].xml': {
|
|
1174
|
+
openXmlState.hasContentTypesEntry = true;
|
|
1175
|
+
|
|
1176
|
+
if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
|
|
1177
|
+
openXmlState.hasUnparseableContentTypes = true;
|
|
1178
|
+
return {};
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
openXmlState.isParsingContentTypes = true;
|
|
1182
|
+
return {
|
|
1183
|
+
async handler(fileData) {
|
|
1184
|
+
// Use TextDecoder to decode the UTF-8 encoded data
|
|
1185
|
+
const xmlContent = new TextDecoder('utf-8').decode(fileData);
|
|
1186
|
+
const mimeType = getOpenXmlMimeTypeFromContentTypesXml(xmlContent);
|
|
1187
|
+
if (mimeType) {
|
|
587
1188
|
fileType = getFileTypeFromMimeType(mimeType);
|
|
588
1189
|
}
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
}
|
|
595
|
-
},
|
|
596
|
-
stop: true,
|
|
597
|
-
};
|
|
598
|
-
default:
|
|
599
|
-
if (/classes\d*\.dex/.test(zipHeader.filename)) {
|
|
600
|
-
fileType = {
|
|
601
|
-
ext: 'apk',
|
|
602
|
-
mime: 'application/vnd.android.package-archive',
|
|
1190
|
+
|
|
1191
|
+
openXmlState.hasParsedContentTypesEntry = true;
|
|
1192
|
+
openXmlState.isParsingContentTypes = false;
|
|
1193
|
+
},
|
|
1194
|
+
stop: true,
|
|
603
1195
|
};
|
|
604
|
-
return {stop: true};
|
|
605
1196
|
}
|
|
606
1197
|
|
|
607
|
-
|
|
1198
|
+
default:
|
|
1199
|
+
if (/classes\d*\.dex/.test(zipHeader.filename)) {
|
|
1200
|
+
fileType = {
|
|
1201
|
+
ext: 'apk',
|
|
1202
|
+
mime: 'application/vnd.android.package-archive',
|
|
1203
|
+
};
|
|
1204
|
+
return {stop: true};
|
|
1205
|
+
}
|
|
1206
|
+
|
|
1207
|
+
return {};
|
|
1208
|
+
}
|
|
1209
|
+
});
|
|
1210
|
+
} catch (error) {
|
|
1211
|
+
if (!isRecoverableZipError(error)) {
|
|
1212
|
+
throw error;
|
|
608
1213
|
}
|
|
609
|
-
|
|
610
|
-
if (
|
|
611
|
-
|
|
1214
|
+
|
|
1215
|
+
if (openXmlState.isParsingContentTypes) {
|
|
1216
|
+
openXmlState.isParsingContentTypes = false;
|
|
1217
|
+
openXmlState.hasUnparseableContentTypes = true;
|
|
612
1218
|
}
|
|
613
|
-
}
|
|
1219
|
+
}
|
|
614
1220
|
|
|
615
|
-
return fileType ?? {
|
|
1221
|
+
return fileType ?? getOpenXmlFileTypeFromZipEntries(openXmlState) ?? {
|
|
616
1222
|
ext: 'zip',
|
|
617
1223
|
mime: 'application/zip',
|
|
618
1224
|
};
|
|
@@ -817,7 +1423,10 @@ export class FileTypeParser {
|
|
|
817
1423
|
}
|
|
818
1424
|
|
|
819
1425
|
const id = new Uint8Array(ic + 1);
|
|
820
|
-
await tokenizer
|
|
1426
|
+
await safeReadBuffer(tokenizer, id, undefined, {
|
|
1427
|
+
maximumLength: id.length,
|
|
1428
|
+
reason: 'EBML field',
|
|
1429
|
+
});
|
|
821
1430
|
return id;
|
|
822
1431
|
}
|
|
823
1432
|
|
|
@@ -838,20 +1447,53 @@ export class FileTypeParser {
|
|
|
838
1447
|
}
|
|
839
1448
|
|
|
840
1449
|
async function readChildren(children) {
|
|
1450
|
+
let ebmlElementCount = 0;
|
|
841
1451
|
while (children > 0) {
|
|
1452
|
+
ebmlElementCount++;
|
|
1453
|
+
if (ebmlElementCount > maximumEbmlElementCount) {
|
|
1454
|
+
return;
|
|
1455
|
+
}
|
|
1456
|
+
|
|
1457
|
+
const previousPosition = tokenizer.position;
|
|
842
1458
|
const element = await readElement();
|
|
1459
|
+
|
|
843
1460
|
if (element.id === 0x42_82) {
|
|
844
|
-
|
|
1461
|
+
// `DocType` is a short string ("webm", "matroska", ...), reject implausible lengths to avoid large allocations.
|
|
1462
|
+
if (element.len > maximumEbmlDocumentTypeSizeInBytes) {
|
|
1463
|
+
return;
|
|
1464
|
+
}
|
|
1465
|
+
|
|
1466
|
+
const documentTypeLength = getSafeBound(element.len, maximumEbmlDocumentTypeSizeInBytes, 'EBML DocType');
|
|
1467
|
+
const rawValue = await tokenizer.readToken(new Token.StringType(documentTypeLength));
|
|
845
1468
|
return rawValue.replaceAll(/\00.*$/g, ''); // Return DocType
|
|
846
1469
|
}
|
|
847
1470
|
|
|
848
|
-
|
|
1471
|
+
if (
|
|
1472
|
+
hasUnknownFileSize(tokenizer)
|
|
1473
|
+
&& (
|
|
1474
|
+
!Number.isFinite(element.len)
|
|
1475
|
+
|| element.len < 0
|
|
1476
|
+
|| element.len > maximumEbmlElementPayloadSizeInBytes
|
|
1477
|
+
)
|
|
1478
|
+
) {
|
|
1479
|
+
return;
|
|
1480
|
+
}
|
|
1481
|
+
|
|
1482
|
+
await safeIgnore(tokenizer, element.len, {
|
|
1483
|
+
maximumLength: hasUnknownFileSize(tokenizer) ? maximumEbmlElementPayloadSizeInBytes : tokenizer.fileInfo.size,
|
|
1484
|
+
reason: 'EBML payload',
|
|
1485
|
+
}); // ignore payload
|
|
849
1486
|
--children;
|
|
1487
|
+
|
|
1488
|
+
// Safeguard against malformed files: bail if the position did not advance.
|
|
1489
|
+
if (tokenizer.position <= previousPosition) {
|
|
1490
|
+
return;
|
|
1491
|
+
}
|
|
850
1492
|
}
|
|
851
1493
|
}
|
|
852
1494
|
|
|
853
|
-
const
|
|
854
|
-
const documentType = await readChildren(
|
|
1495
|
+
const rootElement = await readElement();
|
|
1496
|
+
const documentType = await readChildren(rootElement.len);
|
|
855
1497
|
|
|
856
1498
|
switch (documentType) {
|
|
857
1499
|
case 'webm':
|
|
@@ -1203,6 +1845,16 @@ export class FileTypeParser {
|
|
|
1203
1845
|
// -- 8-byte signatures --
|
|
1204
1846
|
|
|
1205
1847
|
if (this.check([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])) {
|
|
1848
|
+
const pngFileType = {
|
|
1849
|
+
ext: 'png',
|
|
1850
|
+
mime: 'image/png',
|
|
1851
|
+
};
|
|
1852
|
+
|
|
1853
|
+
const apngFileType = {
|
|
1854
|
+
ext: 'apng',
|
|
1855
|
+
mime: 'image/apng',
|
|
1856
|
+
};
|
|
1857
|
+
|
|
1206
1858
|
// APNG format (https://wiki.mozilla.org/APNG_Specification)
|
|
1207
1859
|
// 1. Find the first IDAT (image data) chunk (49 44 41 54)
|
|
1208
1860
|
// 2. Check if there is an "acTL" chunk before the IDAT one (61 63 54 4C)
|
|
@@ -1220,7 +1872,20 @@ export class FileTypeParser {
|
|
|
1220
1872
|
};
|
|
1221
1873
|
}
|
|
1222
1874
|
|
|
1875
|
+
const isUnknownPngStream = hasUnknownFileSize(tokenizer);
|
|
1876
|
+
const pngScanStart = tokenizer.position;
|
|
1877
|
+
let pngChunkCount = 0;
|
|
1223
1878
|
do {
|
|
1879
|
+
pngChunkCount++;
|
|
1880
|
+
if (pngChunkCount > maximumPngChunkCount) {
|
|
1881
|
+
break;
|
|
1882
|
+
}
|
|
1883
|
+
|
|
1884
|
+
if (hasExceededUnknownSizeScanBudget(tokenizer, pngScanStart, maximumPngChunkSizeInBytes)) {
|
|
1885
|
+
break;
|
|
1886
|
+
}
|
|
1887
|
+
|
|
1888
|
+
const previousPosition = tokenizer.position;
|
|
1224
1889
|
const chunk = await readChunkHeader();
|
|
1225
1890
|
if (chunk.length < 0) {
|
|
1226
1891
|
return; // Invalid chunk length
|
|
@@ -1228,24 +1893,45 @@ export class FileTypeParser {
|
|
|
1228
1893
|
|
|
1229
1894
|
switch (chunk.type) {
|
|
1230
1895
|
case 'IDAT':
|
|
1231
|
-
return
|
|
1232
|
-
ext: 'png',
|
|
1233
|
-
mime: 'image/png',
|
|
1234
|
-
};
|
|
1896
|
+
return pngFileType;
|
|
1235
1897
|
case 'acTL':
|
|
1236
|
-
return
|
|
1237
|
-
ext: 'apng',
|
|
1238
|
-
mime: 'image/apng',
|
|
1239
|
-
};
|
|
1898
|
+
return apngFileType;
|
|
1240
1899
|
default:
|
|
1241
|
-
|
|
1900
|
+
if (
|
|
1901
|
+
isUnknownPngStream
|
|
1902
|
+
&& chunk.length > maximumPngChunkSizeInBytes
|
|
1903
|
+
) {
|
|
1904
|
+
// Avoid huge attacker-controlled skips when probing unknown-size streams.
|
|
1905
|
+
return;
|
|
1906
|
+
}
|
|
1907
|
+
|
|
1908
|
+
try {
|
|
1909
|
+
await safeIgnore(tokenizer, chunk.length + 4, {
|
|
1910
|
+
maximumLength: isUnknownPngStream ? maximumPngChunkSizeInBytes + 4 : tokenizer.fileInfo.size,
|
|
1911
|
+
reason: 'PNG chunk payload',
|
|
1912
|
+
}); // Ignore chunk-data + CRC
|
|
1913
|
+
} catch (error) {
|
|
1914
|
+
if (
|
|
1915
|
+
!isUnknownPngStream
|
|
1916
|
+
&& (
|
|
1917
|
+
error instanceof ParserHardLimitError
|
|
1918
|
+
|| error instanceof strtok3.EndOfStreamError
|
|
1919
|
+
)
|
|
1920
|
+
) {
|
|
1921
|
+
return pngFileType;
|
|
1922
|
+
}
|
|
1923
|
+
|
|
1924
|
+
throw error;
|
|
1925
|
+
}
|
|
1926
|
+
}
|
|
1927
|
+
|
|
1928
|
+
// Safeguard against malformed files: bail if the position did not advance.
|
|
1929
|
+
if (tokenizer.position <= previousPosition) {
|
|
1930
|
+
break;
|
|
1242
1931
|
}
|
|
1243
1932
|
} while (tokenizer.position + 8 < tokenizer.fileInfo.size);
|
|
1244
1933
|
|
|
1245
|
-
return
|
|
1246
|
-
ext: 'png',
|
|
1247
|
-
mime: 'image/png',
|
|
1248
|
-
};
|
|
1934
|
+
return pngFileType;
|
|
1249
1935
|
}
|
|
1250
1936
|
|
|
1251
1937
|
if (this.check([0x41, 0x52, 0x52, 0x4F, 0x57, 0x31, 0x00, 0x00])) {
|
|
@@ -1403,45 +2089,101 @@ export class FileTypeParser {
|
|
|
1403
2089
|
|
|
1404
2090
|
// ASF_Header_Object first 80 bytes
|
|
1405
2091
|
if (this.check([0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11, 0xA6, 0xD9])) {
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
2092
|
+
let isMalformedAsf = false;
|
|
2093
|
+
try {
|
|
2094
|
+
async function readHeader() {
|
|
2095
|
+
const guid = new Uint8Array(16);
|
|
2096
|
+
await safeReadBuffer(tokenizer, guid, undefined, {
|
|
2097
|
+
maximumLength: guid.length,
|
|
2098
|
+
reason: 'ASF header GUID',
|
|
2099
|
+
});
|
|
2100
|
+
return {
|
|
2101
|
+
id: guid,
|
|
2102
|
+
size: Number(await tokenizer.readToken(Token.UINT64_LE)),
|
|
2103
|
+
};
|
|
2104
|
+
}
|
|
1414
2105
|
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
2106
|
+
await safeIgnore(tokenizer, 30, {
|
|
2107
|
+
maximumLength: 30,
|
|
2108
|
+
reason: 'ASF header prelude',
|
|
2109
|
+
});
|
|
2110
|
+
const isUnknownFileSize = hasUnknownFileSize(tokenizer);
|
|
2111
|
+
const asfHeaderScanStart = tokenizer.position;
|
|
2112
|
+
let asfHeaderObjectCount = 0;
|
|
2113
|
+
while (tokenizer.position + 24 < tokenizer.fileInfo.size) {
|
|
2114
|
+
asfHeaderObjectCount++;
|
|
2115
|
+
if (asfHeaderObjectCount > maximumAsfHeaderObjectCount) {
|
|
2116
|
+
break;
|
|
2117
|
+
}
|
|
1424
2118
|
|
|
1425
|
-
if (
|
|
1426
|
-
|
|
1427
|
-
return {
|
|
1428
|
-
ext: 'asf',
|
|
1429
|
-
mime: 'audio/x-ms-asf',
|
|
1430
|
-
};
|
|
2119
|
+
if (hasExceededUnknownSizeScanBudget(tokenizer, asfHeaderScanStart, maximumUntrustedSkipSizeInBytes)) {
|
|
2120
|
+
break;
|
|
1431
2121
|
}
|
|
1432
2122
|
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
2123
|
+
const previousPosition = tokenizer.position;
|
|
2124
|
+
const header = await readHeader();
|
|
2125
|
+
let payload = header.size - 24;
|
|
2126
|
+
if (
|
|
2127
|
+
!Number.isFinite(payload)
|
|
2128
|
+
|| payload < 0
|
|
2129
|
+
) {
|
|
2130
|
+
isMalformedAsf = true;
|
|
2131
|
+
break;
|
|
1439
2132
|
}
|
|
1440
2133
|
|
|
1441
|
-
|
|
2134
|
+
if (_check(header.id, [0x91, 0x07, 0xDC, 0xB7, 0xB7, 0xA9, 0xCF, 0x11, 0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65])) {
|
|
2135
|
+
// Sync on Stream-Properties-Object (B7DC0791-A9B7-11CF-8EE6-00C00C205365)
|
|
2136
|
+
const typeId = new Uint8Array(16);
|
|
2137
|
+
payload -= await safeReadBuffer(tokenizer, typeId, undefined, {
|
|
2138
|
+
maximumLength: typeId.length,
|
|
2139
|
+
reason: 'ASF stream type GUID',
|
|
2140
|
+
});
|
|
2141
|
+
|
|
2142
|
+
if (_check(typeId, [0x40, 0x9E, 0x69, 0xF8, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
|
|
2143
|
+
// Found audio:
|
|
2144
|
+
return {
|
|
2145
|
+
ext: 'asf',
|
|
2146
|
+
mime: 'audio/x-ms-asf',
|
|
2147
|
+
};
|
|
2148
|
+
}
|
|
2149
|
+
|
|
2150
|
+
if (_check(typeId, [0xC0, 0xEF, 0x19, 0xBC, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
|
|
2151
|
+
// Found video:
|
|
2152
|
+
return {
|
|
2153
|
+
ext: 'asf',
|
|
2154
|
+
mime: 'video/x-ms-asf',
|
|
2155
|
+
};
|
|
2156
|
+
}
|
|
2157
|
+
|
|
2158
|
+
break;
|
|
2159
|
+
}
|
|
2160
|
+
|
|
2161
|
+
await safeIgnore(tokenizer, payload, {
|
|
2162
|
+
maximumLength: isUnknownFileSize ? maximumUntrustedSkipSizeInBytes : tokenizer.fileInfo.size,
|
|
2163
|
+
reason: 'ASF header payload',
|
|
2164
|
+
});
|
|
2165
|
+
|
|
2166
|
+
// Safeguard against malformed files: break if the position did not advance.
|
|
2167
|
+
if (tokenizer.position <= previousPosition) {
|
|
2168
|
+
isMalformedAsf = true;
|
|
2169
|
+
break;
|
|
2170
|
+
}
|
|
1442
2171
|
}
|
|
2172
|
+
} catch (error) {
|
|
2173
|
+
if (
|
|
2174
|
+
error instanceof strtok3.EndOfStreamError
|
|
2175
|
+
|| error instanceof ParserHardLimitError
|
|
2176
|
+
) {
|
|
2177
|
+
if (hasUnknownFileSize(tokenizer)) {
|
|
2178
|
+
isMalformedAsf = true;
|
|
2179
|
+
}
|
|
2180
|
+
} else {
|
|
2181
|
+
throw error;
|
|
2182
|
+
}
|
|
2183
|
+
}
|
|
1443
2184
|
|
|
1444
|
-
|
|
2185
|
+
if (isMalformedAsf) {
|
|
2186
|
+
return;
|
|
1445
2187
|
}
|
|
1446
2188
|
|
|
1447
2189
|
// Default to ASF generic extension
|
|
@@ -1760,9 +2502,10 @@ export class FileTypeParser {
|
|
|
1760
2502
|
// Detections with limited supporting data, resulting in a higher likelihood of false positives
|
|
1761
2503
|
detectImprecise = async tokenizer => {
|
|
1762
2504
|
this.buffer = new Uint8Array(reasonableDetectionSizeInBytes);
|
|
2505
|
+
const fileSize = getKnownFileSizeOrMaximum(tokenizer.fileInfo.size);
|
|
1763
2506
|
|
|
1764
2507
|
// Read initial sample size of 8 bytes
|
|
1765
|
-
await tokenizer.peekBuffer(this.buffer, {length: Math.min(8,
|
|
2508
|
+
await tokenizer.peekBuffer(this.buffer, {length: Math.min(8, fileSize), mayBeLess: true});
|
|
1766
2509
|
|
|
1767
2510
|
if (
|
|
1768
2511
|
this.check([0x0, 0x0, 0x1, 0xBA])
|
|
@@ -1796,7 +2539,7 @@ export class FileTypeParser {
|
|
|
1796
2539
|
}
|
|
1797
2540
|
|
|
1798
2541
|
// Adjust buffer to `mpegOffsetTolerance`
|
|
1799
|
-
await tokenizer.peekBuffer(this.buffer, {length: Math.min(2 + this.options.mpegOffsetTolerance,
|
|
2542
|
+
await tokenizer.peekBuffer(this.buffer, {length: Math.min(2 + this.options.mpegOffsetTolerance, fileSize), mayBeLess: true});
|
|
1800
2543
|
|
|
1801
2544
|
// Check MPEG 1 or 2 Layer 3 header, or 'layer 0' for ADTS (MPEG sync-word 0xFFE)
|
|
1802
2545
|
if (this.buffer.length >= (2 + this.options.mpegOffsetTolerance)) {
|
|
@@ -1811,7 +2554,7 @@ export class FileTypeParser {
|
|
|
1811
2554
|
|
|
1812
2555
|
async readTiffTag(bigEndian) {
|
|
1813
2556
|
const tagId = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
|
|
1814
|
-
this.tokenizer.ignore(10);
|
|
2557
|
+
await this.tokenizer.ignore(10);
|
|
1815
2558
|
switch (tagId) {
|
|
1816
2559
|
case 50_341:
|
|
1817
2560
|
return {
|
|
@@ -1829,6 +2572,17 @@ export class FileTypeParser {
|
|
|
1829
2572
|
|
|
1830
2573
|
async readTiffIFD(bigEndian) {
|
|
1831
2574
|
const numberOfTags = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
|
|
2575
|
+
if (numberOfTags > maximumTiffTagCount) {
|
|
2576
|
+
return;
|
|
2577
|
+
}
|
|
2578
|
+
|
|
2579
|
+
if (
|
|
2580
|
+
hasUnknownFileSize(this.tokenizer)
|
|
2581
|
+
&& (2 + (numberOfTags * 12)) > maximumTiffIfdOffsetInBytes
|
|
2582
|
+
) {
|
|
2583
|
+
return;
|
|
2584
|
+
}
|
|
2585
|
+
|
|
1832
2586
|
for (let n = 0; n < numberOfTags; ++n) {
|
|
1833
2587
|
const fileType = await this.readTiffTag(bigEndian);
|
|
1834
2588
|
if (fileType) {
|
|
@@ -1838,6 +2592,11 @@ export class FileTypeParser {
|
|
|
1838
2592
|
}
|
|
1839
2593
|
|
|
1840
2594
|
async readTiffHeader(bigEndian) {
|
|
2595
|
+
const tiffFileType = {
|
|
2596
|
+
ext: 'tif',
|
|
2597
|
+
mime: 'image/tiff',
|
|
2598
|
+
};
|
|
2599
|
+
|
|
1841
2600
|
const version = (bigEndian ? Token.UINT16_BE : Token.UINT16_LE).get(this.buffer, 2);
|
|
1842
2601
|
const ifdOffset = (bigEndian ? Token.UINT32_BE : Token.UINT32_LE).get(this.buffer, 4);
|
|
1843
2602
|
|
|
@@ -1866,19 +2625,37 @@ export class FileTypeParser {
|
|
|
1866
2625
|
}
|
|
1867
2626
|
}
|
|
1868
2627
|
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
2628
|
+
const maximumTiffOffset = hasUnknownFileSize(this.tokenizer) ? maximumTiffIfdOffsetInBytes : this.tokenizer.fileInfo.size;
|
|
2629
|
+
|
|
2630
|
+
try {
|
|
2631
|
+
await safeIgnore(this.tokenizer, ifdOffset, {
|
|
2632
|
+
maximumLength: maximumTiffOffset,
|
|
2633
|
+
reason: 'TIFF IFD offset',
|
|
2634
|
+
});
|
|
2635
|
+
} catch (error) {
|
|
2636
|
+
if (error instanceof strtok3.EndOfStreamError) {
|
|
2637
|
+
return;
|
|
2638
|
+
}
|
|
2639
|
+
|
|
2640
|
+
throw error;
|
|
2641
|
+
}
|
|
2642
|
+
|
|
2643
|
+
let fileType;
|
|
2644
|
+
try {
|
|
2645
|
+
fileType = await this.readTiffIFD(bigEndian);
|
|
2646
|
+
} catch (error) {
|
|
2647
|
+
if (error instanceof strtok3.EndOfStreamError) {
|
|
2648
|
+
return;
|
|
2649
|
+
}
|
|
2650
|
+
|
|
2651
|
+
throw error;
|
|
2652
|
+
}
|
|
2653
|
+
|
|
2654
|
+
return fileType ?? tiffFileType;
|
|
1875
2655
|
}
|
|
1876
2656
|
|
|
1877
2657
|
if (version === 43) { // Big TIFF file header
|
|
1878
|
-
return
|
|
1879
|
-
ext: 'tif',
|
|
1880
|
-
mime: 'image/tiff',
|
|
1881
|
-
};
|
|
2658
|
+
return tiffFileType;
|
|
1882
2659
|
}
|
|
1883
2660
|
}
|
|
1884
2661
|
|