file-type 21.3.3 → 22.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +36 -53
- package/readme.md +35 -105
- package/source/detectors/asf.js +127 -0
- package/source/detectors/ebml.js +120 -0
- package/source/detectors/png.js +123 -0
- package/source/detectors/zip.js +643 -0
- package/{core.d.ts → source/index.d.ts} +49 -22
- package/{core.js → source/index.js} +242 -1122
- package/source/index.test-d.ts +53 -0
- package/source/parser.js +65 -0
- package/{supported.js → source/supported.js} +14 -6
- package/{util.js → source/tokens.js} +2 -2
- package/index.d.ts +0 -98
- package/index.js +0 -126
|
@@ -4,315 +4,91 @@ Primary entry point, Node.js specific entry point is index.js
|
|
|
4
4
|
|
|
5
5
|
import * as Token from 'token-types';
|
|
6
6
|
import * as strtok3 from 'strtok3/core';
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
7
|
+
import {GzipHandler} from '@tokenizer/inflate';
|
|
8
|
+
import {concatUint8Arrays} from 'uint8array-extras';
|
|
9
9
|
import {
|
|
10
10
|
stringToBytes,
|
|
11
11
|
tarHeaderChecksumMatches,
|
|
12
12
|
uint32SyncSafeToken,
|
|
13
|
-
} from './
|
|
13
|
+
} from './tokens.js';
|
|
14
14
|
import {extensions, mimeTypes} from './supported.js';
|
|
15
|
+
import {
|
|
16
|
+
maximumUntrustedSkipSizeInBytes,
|
|
17
|
+
ParserHardLimitError,
|
|
18
|
+
safeIgnore,
|
|
19
|
+
checkBytes,
|
|
20
|
+
hasUnknownFileSize,
|
|
21
|
+
} from './parser.js';
|
|
22
|
+
import {detectZip} from './detectors/zip.js';
|
|
23
|
+
import {detectEbml} from './detectors/ebml.js';
|
|
24
|
+
import {detectPng} from './detectors/png.js';
|
|
25
|
+
import {detectAsf} from './detectors/asf.js';
|
|
15
26
|
|
|
16
27
|
export const reasonableDetectionSizeInBytes = 4100; // A fair amount of file-types are detectable within this range.
|
|
17
|
-
// Keep defensive limits small enough to avoid accidental memory spikes from untrusted inputs.
|
|
18
28
|
const maximumMpegOffsetTolerance = reasonableDetectionSizeInBytes - 2;
|
|
19
|
-
const maximumZipEntrySizeInBytes = 1024 * 1024;
|
|
20
|
-
const maximumZipEntryCount = 1024;
|
|
21
|
-
const maximumZipBufferedReadSizeInBytes = (2 ** 31) - 1;
|
|
22
|
-
const maximumUntrustedSkipSizeInBytes = 16 * 1024 * 1024;
|
|
23
|
-
const maximumUnknownSizePayloadProbeSizeInBytes = maximumZipEntrySizeInBytes;
|
|
24
|
-
const maximumZipTextEntrySizeInBytes = maximumZipEntrySizeInBytes;
|
|
25
29
|
const maximumNestedGzipDetectionSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
26
30
|
const maximumNestedGzipProbeDepth = 1;
|
|
31
|
+
const unknownSizeGzipProbeTimeoutInMilliseconds = 100;
|
|
27
32
|
const maximumId3HeaderSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
28
|
-
const maximumEbmlDocumentTypeSizeInBytes = 64;
|
|
29
|
-
const maximumEbmlElementPayloadSizeInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
|
|
30
|
-
const maximumEbmlElementCount = 256;
|
|
31
|
-
const maximumPngChunkCount = 512;
|
|
32
|
-
const maximumPngStreamScanBudgetInBytes = maximumUntrustedSkipSizeInBytes;
|
|
33
|
-
const maximumAsfHeaderObjectCount = 512;
|
|
34
33
|
const maximumTiffTagCount = 512;
|
|
35
34
|
const maximumDetectionReentryCount = 256;
|
|
36
|
-
const
|
|
37
|
-
const maximumAsfHeaderPayloadSizeInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
|
|
38
|
-
const maximumTiffStreamIfdOffsetInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
|
|
35
|
+
const maximumTiffStreamIfdOffsetInBytes = 1024 * 1024;
|
|
39
36
|
const maximumTiffIfdOffsetInBytes = maximumUntrustedSkipSizeInBytes;
|
|
40
|
-
const recoverableZipErrorMessages = new Set([
|
|
41
|
-
'Unexpected signature',
|
|
42
|
-
'Encrypted ZIP',
|
|
43
|
-
'Expected Central-File-Header signature',
|
|
44
|
-
]);
|
|
45
|
-
const recoverableZipErrorMessagePrefixes = [
|
|
46
|
-
'ZIP entry count exceeds ',
|
|
47
|
-
'Unsupported ZIP compression method:',
|
|
48
|
-
'ZIP entry compressed data exceeds ',
|
|
49
|
-
'ZIP entry decompressed data exceeds ',
|
|
50
|
-
];
|
|
51
|
-
const recoverableZipErrorCodes = new Set([
|
|
52
|
-
'Z_BUF_ERROR',
|
|
53
|
-
'Z_DATA_ERROR',
|
|
54
|
-
'ERR_INVALID_STATE',
|
|
55
|
-
]);
|
|
56
|
-
|
|
57
|
-
class ParserHardLimitError extends Error {}
|
|
58
|
-
|
|
59
|
-
function getSafeBound(value, maximum, reason) {
|
|
60
|
-
if (
|
|
61
|
-
!Number.isFinite(value)
|
|
62
|
-
|| value < 0
|
|
63
|
-
|| value > maximum
|
|
64
|
-
) {
|
|
65
|
-
throw new ParserHardLimitError(`${reason} has invalid size ${value} (maximum ${maximum} bytes)`);
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
return value;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
async function safeIgnore(tokenizer, length, {maximumLength = maximumUntrustedSkipSizeInBytes, reason = 'skip'} = {}) {
|
|
72
|
-
const safeLength = getSafeBound(length, maximumLength, reason);
|
|
73
|
-
await tokenizer.ignore(safeLength);
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
async function safeReadBuffer(tokenizer, buffer, options, {maximumLength = buffer.length, reason = 'read'} = {}) {
|
|
77
|
-
const length = options?.length ?? buffer.length;
|
|
78
|
-
const safeLength = getSafeBound(length, maximumLength, reason);
|
|
79
|
-
return tokenizer.readBuffer(buffer, {
|
|
80
|
-
...options,
|
|
81
|
-
length: safeLength,
|
|
82
|
-
});
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
async function decompressDeflateRawWithLimit(data, {maximumLength = maximumZipEntrySizeInBytes} = {}) {
|
|
86
|
-
const input = new ReadableStream({
|
|
87
|
-
start(controller) {
|
|
88
|
-
controller.enqueue(data);
|
|
89
|
-
controller.close();
|
|
90
|
-
},
|
|
91
|
-
});
|
|
92
|
-
const output = input.pipeThrough(new DecompressionStream('deflate-raw'));
|
|
93
|
-
const reader = output.getReader();
|
|
94
|
-
const chunks = [];
|
|
95
|
-
let totalLength = 0;
|
|
96
|
-
|
|
97
|
-
try {
|
|
98
|
-
for (;;) {
|
|
99
|
-
const {done, value} = await reader.read();
|
|
100
|
-
if (done) {
|
|
101
|
-
break;
|
|
102
|
-
}
|
|
103
37
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
chunks.push(value);
|
|
111
|
-
}
|
|
112
|
-
} finally {
|
|
113
|
-
reader.releaseLock();
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
const uncompressedData = new Uint8Array(totalLength);
|
|
117
|
-
let offset = 0;
|
|
118
|
-
for (const chunk of chunks) {
|
|
119
|
-
uncompressedData.set(chunk, offset);
|
|
120
|
-
offset += chunk.length;
|
|
38
|
+
export function normalizeSampleSize(sampleSize) {
|
|
39
|
+
// `sampleSize` is an explicit caller-controlled tuning knob, not untrusted file input.
|
|
40
|
+
// Preserve valid caller-requested probe depth here; applications must bound attacker-derived option values themselves.
|
|
41
|
+
if (!Number.isFinite(sampleSize)) {
|
|
42
|
+
return reasonableDetectionSizeInBytes;
|
|
121
43
|
}
|
|
122
44
|
|
|
123
|
-
return
|
|
45
|
+
return Math.max(1, Math.trunc(sampleSize));
|
|
124
46
|
}
|
|
125
47
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
function findZipDataDescriptorOffset(buffer, bytesConsumed) {
|
|
131
|
-
if (buffer.length < zipDataDescriptorLengthInBytes) {
|
|
132
|
-
return -1;
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
const lastPossibleDescriptorOffset = buffer.length - zipDataDescriptorLengthInBytes;
|
|
136
|
-
for (let index = 0; index <= lastPossibleDescriptorOffset; index++) {
|
|
137
|
-
if (
|
|
138
|
-
Token.UINT32_LE.get(buffer, index) === zipDataDescriptorSignature
|
|
139
|
-
&& Token.UINT32_LE.get(buffer, index + 8) === bytesConsumed + index
|
|
140
|
-
) {
|
|
141
|
-
return index;
|
|
142
|
-
}
|
|
48
|
+
function normalizeMpegOffsetTolerance(mpegOffsetTolerance) {
|
|
49
|
+
// This value controls scan depth and therefore worst-case CPU work.
|
|
50
|
+
if (!Number.isFinite(mpegOffsetTolerance)) {
|
|
51
|
+
return 0;
|
|
143
52
|
}
|
|
144
53
|
|
|
145
|
-
return
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
function isPngAncillaryChunk(type) {
|
|
149
|
-
return (type.codePointAt(0) & 0x20) !== 0;
|
|
54
|
+
return Math.max(0, Math.min(maximumMpegOffsetTolerance, Math.trunc(mpegOffsetTolerance)));
|
|
150
55
|
}
|
|
151
56
|
|
|
152
|
-
function
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
for (const chunk of chunks) {
|
|
157
|
-
merged.set(chunk, offset);
|
|
158
|
-
offset += chunk.length;
|
|
57
|
+
function getKnownFileSizeOrMaximum(fileSize) {
|
|
58
|
+
if (!Number.isFinite(fileSize)) {
|
|
59
|
+
return Number.MAX_SAFE_INTEGER;
|
|
159
60
|
}
|
|
160
61
|
|
|
161
|
-
return
|
|
62
|
+
return Math.max(0, fileSize);
|
|
162
63
|
}
|
|
163
64
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
const length = await zipHandler.tokenizer.peekBuffer(syncBuffer, {mayBeLess: true});
|
|
172
|
-
const dataDescriptorOffset = findZipDataDescriptorOffset(syncBuffer.subarray(0, length), bytesConsumed);
|
|
173
|
-
const retainedLength = dataDescriptorOffset >= 0
|
|
174
|
-
? 0
|
|
175
|
-
: (
|
|
176
|
-
length === syncBufferLength
|
|
177
|
-
? Math.min(zipDataDescriptorOverlapLengthInBytes, length - 1)
|
|
178
|
-
: 0
|
|
179
|
-
);
|
|
180
|
-
const chunkLength = dataDescriptorOffset >= 0 ? dataDescriptorOffset : length - retainedLength;
|
|
181
|
-
|
|
182
|
-
if (chunkLength === 0) {
|
|
183
|
-
break;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
bytesConsumed += chunkLength;
|
|
187
|
-
if (bytesConsumed > maximumLength) {
|
|
188
|
-
throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
if (shouldBuffer) {
|
|
192
|
-
const data = new Uint8Array(chunkLength);
|
|
193
|
-
await zipHandler.tokenizer.readBuffer(data);
|
|
194
|
-
chunks.push(data);
|
|
195
|
-
} else {
|
|
196
|
-
await zipHandler.tokenizer.ignore(chunkLength);
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
if (dataDescriptorOffset >= 0) {
|
|
200
|
-
break;
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
if (!hasUnknownFileSize(zipHandler.tokenizer)) {
|
|
205
|
-
zipHandler.knownSizeDescriptorScannedBytes += bytesConsumed;
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
if (!shouldBuffer) {
|
|
209
|
-
return;
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
return mergeByteChunks(chunks, bytesConsumed);
|
|
65
|
+
// Wrap stream in an identity TransformStream to avoid BYOB readers.
|
|
66
|
+
// Node.js has a bug where calling controller.close() inside a BYOB stream's
|
|
67
|
+
// pull() callback does not resolve pending reader.read() calls, causing
|
|
68
|
+
// permanent hangs on streams shorter than the requested read size.
|
|
69
|
+
// Using a default (non-BYOB) reader via TransformStream avoids this.
|
|
70
|
+
function toDefaultStream(stream) {
|
|
71
|
+
return stream.pipeThrough(new TransformStream());
|
|
213
72
|
}
|
|
214
73
|
|
|
215
|
-
function
|
|
216
|
-
if (
|
|
217
|
-
return
|
|
74
|
+
function readWithSignal(reader, signal) {
|
|
75
|
+
if (signal === undefined) {
|
|
76
|
+
return reader.read();
|
|
218
77
|
}
|
|
219
78
|
|
|
220
|
-
|
|
79
|
+
signal.throwIfAborted();
|
|
80
|
+
|
|
81
|
+
return Promise.race([
|
|
82
|
+
reader.read(),
|
|
83
|
+
new Promise((_resolve, reject) => {
|
|
84
|
+
signal.addEventListener('abort', () => {
|
|
85
|
+
reject(signal.reason);
|
|
86
|
+
reader.cancel(signal.reason).catch(() => {});
|
|
87
|
+
}, {once: true});
|
|
88
|
+
}),
|
|
89
|
+
]);
|
|
221
90
|
}
|
|
222
91
|
|
|
223
|
-
async function readZipEntryData(zipHandler, zipHeader, {shouldBuffer, maximumDescriptorLength = maximumZipEntrySizeInBytes} = {}) {
|
|
224
|
-
if (
|
|
225
|
-
zipHeader.dataDescriptor
|
|
226
|
-
&& zipHeader.compressedSize === 0
|
|
227
|
-
) {
|
|
228
|
-
return readZipDataDescriptorEntryWithLimit(zipHandler, {
|
|
229
|
-
shouldBuffer,
|
|
230
|
-
maximumLength: maximumDescriptorLength,
|
|
231
|
-
});
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
if (!shouldBuffer) {
|
|
235
|
-
await safeIgnore(zipHandler.tokenizer, zipHeader.compressedSize, {
|
|
236
|
-
maximumLength: hasUnknownFileSize(zipHandler.tokenizer) ? maximumZipEntrySizeInBytes : zipHandler.tokenizer.fileInfo.size,
|
|
237
|
-
reason: 'ZIP entry compressed data',
|
|
238
|
-
});
|
|
239
|
-
return;
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
const maximumLength = getMaximumZipBufferedReadLength(zipHandler.tokenizer);
|
|
243
|
-
if (
|
|
244
|
-
!Number.isFinite(zipHeader.compressedSize)
|
|
245
|
-
|| zipHeader.compressedSize < 0
|
|
246
|
-
|| zipHeader.compressedSize > maximumLength
|
|
247
|
-
) {
|
|
248
|
-
throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
const fileData = new Uint8Array(zipHeader.compressedSize);
|
|
252
|
-
await zipHandler.tokenizer.readBuffer(fileData);
|
|
253
|
-
return fileData;
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
// Override the default inflate to enforce decompression size limits, since @tokenizer/inflate does not expose a configuration hook for this.
|
|
257
|
-
ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
|
|
258
|
-
if (zipHeader.compressedMethod === 0) {
|
|
259
|
-
return callback(fileData);
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
if (zipHeader.compressedMethod !== 8) {
|
|
263
|
-
throw new Error(`Unsupported ZIP compression method: ${zipHeader.compressedMethod}`);
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
const uncompressedData = await decompressDeflateRawWithLimit(fileData, {maximumLength: maximumZipEntrySizeInBytes});
|
|
267
|
-
return callback(uncompressedData);
|
|
268
|
-
};
|
|
269
|
-
|
|
270
|
-
ZipHandler.prototype.unzip = async function (fileCallback) {
|
|
271
|
-
let stop = false;
|
|
272
|
-
let zipEntryCount = 0;
|
|
273
|
-
const zipScanStart = this.tokenizer.position;
|
|
274
|
-
this.knownSizeDescriptorScannedBytes = 0;
|
|
275
|
-
do {
|
|
276
|
-
if (hasExceededUnknownSizeScanBudget(this.tokenizer, zipScanStart, maximumUntrustedSkipSizeInBytes)) {
|
|
277
|
-
throw new ParserHardLimitError(`ZIP stream probing exceeds ${maximumUntrustedSkipSizeInBytes} bytes`);
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
const zipHeader = await this.readLocalFileHeader();
|
|
281
|
-
if (!zipHeader) {
|
|
282
|
-
break;
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
zipEntryCount++;
|
|
286
|
-
if (zipEntryCount > maximumZipEntryCount) {
|
|
287
|
-
throw new Error(`ZIP entry count exceeds ${maximumZipEntryCount}`);
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
const next = fileCallback(zipHeader);
|
|
291
|
-
stop = Boolean(next.stop);
|
|
292
|
-
await this.tokenizer.ignore(zipHeader.extraFieldLength);
|
|
293
|
-
const fileData = await readZipEntryData(this, zipHeader, {
|
|
294
|
-
shouldBuffer: Boolean(next.handler),
|
|
295
|
-
maximumDescriptorLength: Math.min(maximumZipEntrySizeInBytes, getRemainingZipScanBudget(this, zipScanStart)),
|
|
296
|
-
});
|
|
297
|
-
|
|
298
|
-
if (next.handler) {
|
|
299
|
-
await this.inflate(zipHeader, fileData, next.handler);
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
if (zipHeader.dataDescriptor) {
|
|
303
|
-
const dataDescriptor = new Uint8Array(zipDataDescriptorLengthInBytes);
|
|
304
|
-
await this.tokenizer.readBuffer(dataDescriptor);
|
|
305
|
-
if (Token.UINT32_LE.get(dataDescriptor, 0) !== zipDataDescriptorSignature) {
|
|
306
|
-
throw new Error(`Expected data-descriptor-signature at position ${this.tokenizer.position - dataDescriptor.length}`);
|
|
307
|
-
}
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
if (hasExceededUnknownSizeScanBudget(this.tokenizer, zipScanStart, maximumUntrustedSkipSizeInBytes)) {
|
|
311
|
-
throw new ParserHardLimitError(`ZIP stream probing exceeds ${maximumUntrustedSkipSizeInBytes} bytes`);
|
|
312
|
-
}
|
|
313
|
-
} while (!stop);
|
|
314
|
-
};
|
|
315
|
-
|
|
316
92
|
function createByteLimitedReadableStream(stream, maximumBytes) {
|
|
317
93
|
const reader = stream.getReader();
|
|
318
94
|
let emittedBytes = 0;
|
|
@@ -379,348 +155,6 @@ export async function fileTypeFromBlob(blob, options) {
|
|
|
379
155
|
return new FileTypeParser(options).fromBlob(blob);
|
|
380
156
|
}
|
|
381
157
|
|
|
382
|
-
function getFileTypeFromMimeType(mimeType) {
|
|
383
|
-
mimeType = mimeType.toLowerCase();
|
|
384
|
-
switch (mimeType) {
|
|
385
|
-
case 'application/epub+zip':
|
|
386
|
-
return {
|
|
387
|
-
ext: 'epub',
|
|
388
|
-
mime: mimeType,
|
|
389
|
-
};
|
|
390
|
-
case 'application/vnd.oasis.opendocument.text':
|
|
391
|
-
return {
|
|
392
|
-
ext: 'odt',
|
|
393
|
-
mime: mimeType,
|
|
394
|
-
};
|
|
395
|
-
case 'application/vnd.oasis.opendocument.text-template':
|
|
396
|
-
return {
|
|
397
|
-
ext: 'ott',
|
|
398
|
-
mime: mimeType,
|
|
399
|
-
};
|
|
400
|
-
case 'application/vnd.oasis.opendocument.spreadsheet':
|
|
401
|
-
return {
|
|
402
|
-
ext: 'ods',
|
|
403
|
-
mime: mimeType,
|
|
404
|
-
};
|
|
405
|
-
case 'application/vnd.oasis.opendocument.spreadsheet-template':
|
|
406
|
-
return {
|
|
407
|
-
ext: 'ots',
|
|
408
|
-
mime: mimeType,
|
|
409
|
-
};
|
|
410
|
-
case 'application/vnd.oasis.opendocument.presentation':
|
|
411
|
-
return {
|
|
412
|
-
ext: 'odp',
|
|
413
|
-
mime: mimeType,
|
|
414
|
-
};
|
|
415
|
-
case 'application/vnd.oasis.opendocument.presentation-template':
|
|
416
|
-
return {
|
|
417
|
-
ext: 'otp',
|
|
418
|
-
mime: mimeType,
|
|
419
|
-
};
|
|
420
|
-
case 'application/vnd.oasis.opendocument.graphics':
|
|
421
|
-
return {
|
|
422
|
-
ext: 'odg',
|
|
423
|
-
mime: mimeType,
|
|
424
|
-
};
|
|
425
|
-
case 'application/vnd.oasis.opendocument.graphics-template':
|
|
426
|
-
return {
|
|
427
|
-
ext: 'otg',
|
|
428
|
-
mime: mimeType,
|
|
429
|
-
};
|
|
430
|
-
case 'application/vnd.openxmlformats-officedocument.presentationml.slideshow':
|
|
431
|
-
return {
|
|
432
|
-
ext: 'ppsx',
|
|
433
|
-
mime: mimeType,
|
|
434
|
-
};
|
|
435
|
-
case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
|
|
436
|
-
return {
|
|
437
|
-
ext: 'xlsx',
|
|
438
|
-
mime: mimeType,
|
|
439
|
-
};
|
|
440
|
-
case 'application/vnd.ms-excel.sheet.macroenabled':
|
|
441
|
-
return {
|
|
442
|
-
ext: 'xlsm',
|
|
443
|
-
mime: 'application/vnd.ms-excel.sheet.macroenabled.12',
|
|
444
|
-
};
|
|
445
|
-
case 'application/vnd.openxmlformats-officedocument.spreadsheetml.template':
|
|
446
|
-
return {
|
|
447
|
-
ext: 'xltx',
|
|
448
|
-
mime: mimeType,
|
|
449
|
-
};
|
|
450
|
-
case 'application/vnd.ms-excel.template.macroenabled':
|
|
451
|
-
return {
|
|
452
|
-
ext: 'xltm',
|
|
453
|
-
mime: 'application/vnd.ms-excel.template.macroenabled.12',
|
|
454
|
-
};
|
|
455
|
-
case 'application/vnd.ms-powerpoint.slideshow.macroenabled':
|
|
456
|
-
return {
|
|
457
|
-
ext: 'ppsm',
|
|
458
|
-
mime: 'application/vnd.ms-powerpoint.slideshow.macroenabled.12',
|
|
459
|
-
};
|
|
460
|
-
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
|
|
461
|
-
return {
|
|
462
|
-
ext: 'docx',
|
|
463
|
-
mime: mimeType,
|
|
464
|
-
};
|
|
465
|
-
case 'application/vnd.ms-word.document.macroenabled':
|
|
466
|
-
return {
|
|
467
|
-
ext: 'docm',
|
|
468
|
-
mime: 'application/vnd.ms-word.document.macroenabled.12',
|
|
469
|
-
};
|
|
470
|
-
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.template':
|
|
471
|
-
return {
|
|
472
|
-
ext: 'dotx',
|
|
473
|
-
mime: mimeType,
|
|
474
|
-
};
|
|
475
|
-
case 'application/vnd.ms-word.template.macroenabledtemplate':
|
|
476
|
-
return {
|
|
477
|
-
ext: 'dotm',
|
|
478
|
-
mime: 'application/vnd.ms-word.template.macroenabled.12',
|
|
479
|
-
};
|
|
480
|
-
case 'application/vnd.openxmlformats-officedocument.presentationml.template':
|
|
481
|
-
return {
|
|
482
|
-
ext: 'potx',
|
|
483
|
-
mime: mimeType,
|
|
484
|
-
};
|
|
485
|
-
case 'application/vnd.ms-powerpoint.template.macroenabled':
|
|
486
|
-
return {
|
|
487
|
-
ext: 'potm',
|
|
488
|
-
mime: 'application/vnd.ms-powerpoint.template.macroenabled.12',
|
|
489
|
-
};
|
|
490
|
-
case 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
|
|
491
|
-
return {
|
|
492
|
-
ext: 'pptx',
|
|
493
|
-
mime: mimeType,
|
|
494
|
-
};
|
|
495
|
-
case 'application/vnd.ms-powerpoint.presentation.macroenabled':
|
|
496
|
-
return {
|
|
497
|
-
ext: 'pptm',
|
|
498
|
-
mime: 'application/vnd.ms-powerpoint.presentation.macroenabled.12',
|
|
499
|
-
};
|
|
500
|
-
case 'application/vnd.ms-visio.drawing':
|
|
501
|
-
return {
|
|
502
|
-
ext: 'vsdx',
|
|
503
|
-
mime: 'application/vnd.visio',
|
|
504
|
-
};
|
|
505
|
-
case 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml':
|
|
506
|
-
return {
|
|
507
|
-
ext: '3mf',
|
|
508
|
-
mime: 'model/3mf',
|
|
509
|
-
};
|
|
510
|
-
default:
|
|
511
|
-
}
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
function _check(buffer, headers, options) {
|
|
515
|
-
options = {
|
|
516
|
-
offset: 0,
|
|
517
|
-
...options,
|
|
518
|
-
};
|
|
519
|
-
|
|
520
|
-
for (const [index, header] of headers.entries()) {
|
|
521
|
-
// If a bitmask is set
|
|
522
|
-
if (options.mask) {
|
|
523
|
-
// If header doesn't equal `buf` with bits masked off
|
|
524
|
-
if (header !== (options.mask[index] & buffer[index + options.offset])) {
|
|
525
|
-
return false;
|
|
526
|
-
}
|
|
527
|
-
} else if (header !== buffer[index + options.offset]) {
|
|
528
|
-
return false;
|
|
529
|
-
}
|
|
530
|
-
}
|
|
531
|
-
|
|
532
|
-
return true;
|
|
533
|
-
}
|
|
534
|
-
|
|
535
|
-
export function normalizeSampleSize(sampleSize) {
|
|
536
|
-
// Accept odd caller input, but preserve valid caller-requested probe depth.
|
|
537
|
-
if (!Number.isFinite(sampleSize)) {
|
|
538
|
-
return reasonableDetectionSizeInBytes;
|
|
539
|
-
}
|
|
540
|
-
|
|
541
|
-
return Math.max(1, Math.trunc(sampleSize));
|
|
542
|
-
}
|
|
543
|
-
|
|
544
|
-
function normalizeMpegOffsetTolerance(mpegOffsetTolerance) {
|
|
545
|
-
// This value controls scan depth and therefore worst-case CPU work.
|
|
546
|
-
if (!Number.isFinite(mpegOffsetTolerance)) {
|
|
547
|
-
return 0;
|
|
548
|
-
}
|
|
549
|
-
|
|
550
|
-
return Math.max(0, Math.min(maximumMpegOffsetTolerance, Math.trunc(mpegOffsetTolerance)));
|
|
551
|
-
}
|
|
552
|
-
|
|
553
|
-
function getKnownFileSizeOrMaximum(fileSize) {
|
|
554
|
-
if (!Number.isFinite(fileSize)) {
|
|
555
|
-
return Number.MAX_SAFE_INTEGER;
|
|
556
|
-
}
|
|
557
|
-
|
|
558
|
-
return Math.max(0, fileSize);
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
function hasUnknownFileSize(tokenizer) {
|
|
562
|
-
const fileSize = tokenizer.fileInfo.size;
|
|
563
|
-
return (
|
|
564
|
-
!Number.isFinite(fileSize)
|
|
565
|
-
|| fileSize === Number.MAX_SAFE_INTEGER
|
|
566
|
-
);
|
|
567
|
-
}
|
|
568
|
-
|
|
569
|
-
function hasExceededUnknownSizeScanBudget(tokenizer, startOffset, maximumBytes) {
|
|
570
|
-
return (
|
|
571
|
-
hasUnknownFileSize(tokenizer)
|
|
572
|
-
&& tokenizer.position - startOffset > maximumBytes
|
|
573
|
-
);
|
|
574
|
-
}
|
|
575
|
-
|
|
576
|
-
function getMaximumZipBufferedReadLength(tokenizer) {
|
|
577
|
-
const fileSize = tokenizer.fileInfo.size;
|
|
578
|
-
const remainingBytes = Number.isFinite(fileSize)
|
|
579
|
-
? Math.max(0, fileSize - tokenizer.position)
|
|
580
|
-
: Number.MAX_SAFE_INTEGER;
|
|
581
|
-
|
|
582
|
-
return Math.min(remainingBytes, maximumZipBufferedReadSizeInBytes);
|
|
583
|
-
}
|
|
584
|
-
|
|
585
|
-
function isRecoverableZipError(error) {
|
|
586
|
-
if (error instanceof strtok3.EndOfStreamError) {
|
|
587
|
-
return true;
|
|
588
|
-
}
|
|
589
|
-
|
|
590
|
-
if (error instanceof ParserHardLimitError) {
|
|
591
|
-
return true;
|
|
592
|
-
}
|
|
593
|
-
|
|
594
|
-
if (!(error instanceof Error)) {
|
|
595
|
-
return false;
|
|
596
|
-
}
|
|
597
|
-
|
|
598
|
-
if (recoverableZipErrorMessages.has(error.message)) {
|
|
599
|
-
return true;
|
|
600
|
-
}
|
|
601
|
-
|
|
602
|
-
if (recoverableZipErrorCodes.has(error.code)) {
|
|
603
|
-
return true;
|
|
604
|
-
}
|
|
605
|
-
|
|
606
|
-
for (const prefix of recoverableZipErrorMessagePrefixes) {
|
|
607
|
-
if (error.message.startsWith(prefix)) {
|
|
608
|
-
return true;
|
|
609
|
-
}
|
|
610
|
-
}
|
|
611
|
-
|
|
612
|
-
return false;
|
|
613
|
-
}
|
|
614
|
-
|
|
615
|
-
function canReadZipEntryForDetection(zipHeader, maximumSize = maximumZipEntrySizeInBytes) {
|
|
616
|
-
const sizes = [zipHeader.compressedSize, zipHeader.uncompressedSize];
|
|
617
|
-
for (const size of sizes) {
|
|
618
|
-
if (
|
|
619
|
-
!Number.isFinite(size)
|
|
620
|
-
|| size < 0
|
|
621
|
-
|| size > maximumSize
|
|
622
|
-
) {
|
|
623
|
-
return false;
|
|
624
|
-
}
|
|
625
|
-
}
|
|
626
|
-
|
|
627
|
-
return true;
|
|
628
|
-
}
|
|
629
|
-
|
|
630
|
-
function createOpenXmlZipDetectionState() {
|
|
631
|
-
return {
|
|
632
|
-
hasContentTypesEntry: false,
|
|
633
|
-
hasParsedContentTypesEntry: false,
|
|
634
|
-
isParsingContentTypes: false,
|
|
635
|
-
hasUnparseableContentTypes: false,
|
|
636
|
-
hasWordDirectory: false,
|
|
637
|
-
hasPresentationDirectory: false,
|
|
638
|
-
hasSpreadsheetDirectory: false,
|
|
639
|
-
hasThreeDimensionalModelEntry: false,
|
|
640
|
-
};
|
|
641
|
-
}
|
|
642
|
-
|
|
643
|
-
function updateOpenXmlZipDetectionStateFromFilename(openXmlState, filename) {
|
|
644
|
-
if (filename.startsWith('word/')) {
|
|
645
|
-
openXmlState.hasWordDirectory = true;
|
|
646
|
-
}
|
|
647
|
-
|
|
648
|
-
if (filename.startsWith('ppt/')) {
|
|
649
|
-
openXmlState.hasPresentationDirectory = true;
|
|
650
|
-
}
|
|
651
|
-
|
|
652
|
-
if (filename.startsWith('xl/')) {
|
|
653
|
-
openXmlState.hasSpreadsheetDirectory = true;
|
|
654
|
-
}
|
|
655
|
-
|
|
656
|
-
if (
|
|
657
|
-
filename.startsWith('3D/')
|
|
658
|
-
&& filename.endsWith('.model')
|
|
659
|
-
) {
|
|
660
|
-
openXmlState.hasThreeDimensionalModelEntry = true;
|
|
661
|
-
}
|
|
662
|
-
}
|
|
663
|
-
|
|
664
|
-
function getOpenXmlFileTypeFromZipEntries(openXmlState) {
|
|
665
|
-
// Only use directory-name heuristic when [Content_Types].xml was present in the archive
|
|
666
|
-
// but its handler was skipped (not invoked, not currently running, and not already resolved).
|
|
667
|
-
// This avoids guessing from directory names when content-type parsing already gave a definitive answer or failed.
|
|
668
|
-
if (
|
|
669
|
-
!openXmlState.hasContentTypesEntry
|
|
670
|
-
|| openXmlState.hasUnparseableContentTypes
|
|
671
|
-
|| openXmlState.isParsingContentTypes
|
|
672
|
-
|| openXmlState.hasParsedContentTypesEntry
|
|
673
|
-
) {
|
|
674
|
-
return;
|
|
675
|
-
}
|
|
676
|
-
|
|
677
|
-
if (openXmlState.hasWordDirectory) {
|
|
678
|
-
return {
|
|
679
|
-
ext: 'docx',
|
|
680
|
-
mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
681
|
-
};
|
|
682
|
-
}
|
|
683
|
-
|
|
684
|
-
if (openXmlState.hasPresentationDirectory) {
|
|
685
|
-
return {
|
|
686
|
-
ext: 'pptx',
|
|
687
|
-
mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
688
|
-
};
|
|
689
|
-
}
|
|
690
|
-
|
|
691
|
-
if (openXmlState.hasSpreadsheetDirectory) {
|
|
692
|
-
return {
|
|
693
|
-
ext: 'xlsx',
|
|
694
|
-
mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
695
|
-
};
|
|
696
|
-
}
|
|
697
|
-
|
|
698
|
-
if (openXmlState.hasThreeDimensionalModelEntry) {
|
|
699
|
-
return {
|
|
700
|
-
ext: '3mf',
|
|
701
|
-
mime: 'model/3mf',
|
|
702
|
-
};
|
|
703
|
-
}
|
|
704
|
-
}
|
|
705
|
-
|
|
706
|
-
function getOpenXmlMimeTypeFromContentTypesXml(xmlContent) {
|
|
707
|
-
// We only need the `ContentType="...main+xml"` value, so a small string scan is enough and avoids full XML parsing.
|
|
708
|
-
const endPosition = xmlContent.indexOf('.main+xml"');
|
|
709
|
-
if (endPosition === -1) {
|
|
710
|
-
const mimeType = 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml';
|
|
711
|
-
if (xmlContent.includes(`ContentType="${mimeType}"`)) {
|
|
712
|
-
return mimeType;
|
|
713
|
-
}
|
|
714
|
-
|
|
715
|
-
return;
|
|
716
|
-
}
|
|
717
|
-
|
|
718
|
-
const truncatedContent = xmlContent.slice(0, endPosition);
|
|
719
|
-
const firstQuotePosition = truncatedContent.lastIndexOf('"');
|
|
720
|
-
// If no quote is found, `lastIndexOf` returns -1 and this intentionally falls back to the full truncated prefix.
|
|
721
|
-
return truncatedContent.slice(firstQuotePosition + 1);
|
|
722
|
-
}
|
|
723
|
-
|
|
724
158
|
export async function fileTypeFromTokenizer(tokenizer, options) {
|
|
725
159
|
return new FileTypeParser(options).fromTokenizer(tokenizer);
|
|
726
160
|
}
|
|
@@ -752,7 +186,11 @@ export class FileTypeParser {
|
|
|
752
186
|
};
|
|
753
187
|
}
|
|
754
188
|
|
|
755
|
-
|
|
189
|
+
createTokenizerFromWebStream(stream) {
|
|
190
|
+
return strtok3.fromWebStream(toDefaultStream(stream), this.getTokenizerOptions());
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
async parseTokenizer(tokenizer, detectionReentryCount = 0) {
|
|
756
194
|
this.detectionReentryCount = detectionReentryCount;
|
|
757
195
|
const initialPosition = tokenizer.position;
|
|
758
196
|
// Iterate through all file-type detectors
|
|
@@ -782,6 +220,14 @@ export class FileTypeParser {
|
|
|
782
220
|
}
|
|
783
221
|
}
|
|
784
222
|
|
|
223
|
+
async fromTokenizer(tokenizer) {
|
|
224
|
+
try {
|
|
225
|
+
return await this.parseTokenizer(tokenizer);
|
|
226
|
+
} finally {
|
|
227
|
+
await tokenizer.close();
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
785
231
|
async fromBuffer(input) {
|
|
786
232
|
if (!(input instanceof Uint8Array || input instanceof ArrayBuffer)) {
|
|
787
233
|
throw new TypeError(`Expected the \`input\` argument to be of type \`Uint8Array\` or \`ArrayBuffer\`, got \`${typeof input}\``);
|
|
@@ -797,58 +243,107 @@ export class FileTypeParser {
|
|
|
797
243
|
}
|
|
798
244
|
|
|
799
245
|
async fromBlob(blob) {
|
|
246
|
+
this.options.signal?.throwIfAborted();
|
|
800
247
|
const tokenizer = strtok3.fromBlob(blob, this.getTokenizerOptions());
|
|
801
|
-
|
|
802
|
-
return await this.fromTokenizer(tokenizer);
|
|
803
|
-
} finally {
|
|
804
|
-
await tokenizer.close();
|
|
805
|
-
}
|
|
248
|
+
return this.fromTokenizer(tokenizer);
|
|
806
249
|
}
|
|
807
250
|
|
|
808
251
|
async fromStream(stream) {
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
252
|
+
this.options.signal?.throwIfAborted();
|
|
253
|
+
const tokenizer = this.createTokenizerFromWebStream(stream);
|
|
254
|
+
return this.fromTokenizer(tokenizer);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
async fromFile(path) {
|
|
258
|
+
this.options.signal?.throwIfAborted();
|
|
259
|
+
// TODO: Remove this when `strtok3.fromFile()` safely rejects non-regular filesystem objects without a pathname race.
|
|
260
|
+
const [{default: fsPromises}, {FileTokenizer}] = await Promise.all([
|
|
261
|
+
import('node:fs/promises'),
|
|
262
|
+
import('strtok3'),
|
|
263
|
+
]);
|
|
264
|
+
const fileHandle = await fsPromises.open(path, fsPromises.constants.O_RDONLY | fsPromises.constants.O_NONBLOCK);
|
|
265
|
+
const fileStat = await fileHandle.stat();
|
|
266
|
+
if (!fileStat.isFile()) {
|
|
267
|
+
await fileHandle.close();
|
|
268
|
+
return;
|
|
814
269
|
}
|
|
270
|
+
|
|
271
|
+
const tokenizer = new FileTokenizer(fileHandle, {
|
|
272
|
+
...this.getTokenizerOptions(),
|
|
273
|
+
fileInfo: {path, size: fileStat.size},
|
|
274
|
+
});
|
|
275
|
+
return this.fromTokenizer(tokenizer);
|
|
815
276
|
}
|
|
816
277
|
|
|
817
278
|
async toDetectionStream(stream, options) {
|
|
279
|
+
this.options.signal?.throwIfAborted();
|
|
818
280
|
const sampleSize = normalizeSampleSize(options?.sampleSize ?? reasonableDetectionSizeInBytes);
|
|
819
281
|
let detectedFileType;
|
|
820
|
-
let
|
|
282
|
+
let streamEnded = false;
|
|
821
283
|
|
|
822
|
-
const reader = stream.getReader(
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
const {value: chunk, done} = await reader.read(new Uint8Array(sampleSize));
|
|
826
|
-
firstChunk = chunk;
|
|
827
|
-
if (!done && chunk) {
|
|
828
|
-
try {
|
|
829
|
-
// Attempt to detect the file type from the chunk
|
|
830
|
-
detectedFileType = await this.fromBuffer(chunk.subarray(0, sampleSize));
|
|
831
|
-
} catch (error) {
|
|
832
|
-
if (!(error instanceof strtok3.EndOfStreamError)) {
|
|
833
|
-
throw error; // Re-throw non-EndOfStreamError
|
|
834
|
-
}
|
|
284
|
+
const reader = stream.getReader();
|
|
285
|
+
const chunks = [];
|
|
286
|
+
let totalSize = 0;
|
|
835
287
|
|
|
836
|
-
|
|
288
|
+
try {
|
|
289
|
+
while (totalSize < sampleSize) {
|
|
290
|
+
const {value, done} = await readWithSignal(reader, this.options.signal);
|
|
291
|
+
if (done || !value) {
|
|
292
|
+
streamEnded = true;
|
|
293
|
+
break;
|
|
837
294
|
}
|
|
295
|
+
|
|
296
|
+
chunks.push(value);
|
|
297
|
+
totalSize += value.length;
|
|
838
298
|
}
|
|
839
299
|
|
|
840
|
-
|
|
300
|
+
if (
|
|
301
|
+
!streamEnded
|
|
302
|
+
&& totalSize === sampleSize
|
|
303
|
+
) {
|
|
304
|
+
const {value, done} = await readWithSignal(reader, this.options.signal);
|
|
305
|
+
if (done || !value) {
|
|
306
|
+
streamEnded = true;
|
|
307
|
+
} else {
|
|
308
|
+
chunks.push(value);
|
|
309
|
+
totalSize += value.length;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
841
312
|
} finally {
|
|
842
|
-
reader.releaseLock();
|
|
313
|
+
reader.releaseLock();
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
if (totalSize > 0) {
|
|
317
|
+
const sample = chunks.length === 1 ? chunks[0] : concatUint8Arrays(chunks);
|
|
318
|
+
try {
|
|
319
|
+
detectedFileType = await this.fromBuffer(sample.subarray(0, sampleSize));
|
|
320
|
+
} catch (error) {
|
|
321
|
+
if (!(error instanceof strtok3.EndOfStreamError)) {
|
|
322
|
+
throw error;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
detectedFileType = undefined;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
if (
|
|
329
|
+
!streamEnded
|
|
330
|
+
&& detectedFileType?.ext === 'pages'
|
|
331
|
+
) {
|
|
332
|
+
detectedFileType = {
|
|
333
|
+
ext: 'zip',
|
|
334
|
+
mime: 'application/zip',
|
|
335
|
+
};
|
|
336
|
+
}
|
|
843
337
|
}
|
|
844
338
|
|
|
845
|
-
//
|
|
339
|
+
// Prepend collected chunks and pipe the rest through
|
|
846
340
|
const transformStream = new TransformStream({
|
|
847
|
-
|
|
848
|
-
|
|
341
|
+
start(controller) {
|
|
342
|
+
for (const chunk of chunks) {
|
|
343
|
+
controller.enqueue(chunk);
|
|
344
|
+
}
|
|
849
345
|
},
|
|
850
346
|
transform(chunk, controller) {
|
|
851
|
-
// Pass through the chunks without modification
|
|
852
347
|
controller.enqueue(chunk);
|
|
853
348
|
},
|
|
854
349
|
});
|
|
@@ -859,8 +354,72 @@ export class FileTypeParser {
|
|
|
859
354
|
return newStream;
|
|
860
355
|
}
|
|
861
356
|
|
|
357
|
+
async detectGzip(tokenizer) {
|
|
358
|
+
if (this.gzipProbeDepth >= maximumNestedGzipProbeDepth) {
|
|
359
|
+
return {
|
|
360
|
+
ext: 'gz',
|
|
361
|
+
mime: 'application/gzip',
|
|
362
|
+
};
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
const gzipHandler = new GzipHandler(tokenizer);
|
|
366
|
+
const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes);
|
|
367
|
+
const hasUnknownSize = hasUnknownFileSize(tokenizer);
|
|
368
|
+
let timeout;
|
|
369
|
+
let probeSignal;
|
|
370
|
+
let probeParser;
|
|
371
|
+
let compressedFileType;
|
|
372
|
+
|
|
373
|
+
if (hasUnknownSize) {
|
|
374
|
+
const timeoutController = new AbortController();
|
|
375
|
+
timeout = setTimeout(() => {
|
|
376
|
+
timeoutController.abort(new DOMException(`Operation timed out after ${unknownSizeGzipProbeTimeoutInMilliseconds} ms`, 'TimeoutError'));
|
|
377
|
+
}, unknownSizeGzipProbeTimeoutInMilliseconds);
|
|
378
|
+
probeSignal = this.options.signal === undefined
|
|
379
|
+
? timeoutController.signal
|
|
380
|
+
: AbortSignal.any([this.options.signal, timeoutController.signal]);
|
|
381
|
+
probeParser = new FileTypeParser({
|
|
382
|
+
...this.options,
|
|
383
|
+
signal: probeSignal,
|
|
384
|
+
});
|
|
385
|
+
probeParser.gzipProbeDepth = this.gzipProbeDepth + 1;
|
|
386
|
+
} else {
|
|
387
|
+
this.gzipProbeDepth++;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
try {
|
|
391
|
+
compressedFileType = await (probeParser ?? this).fromStream(limitedInflatedStream);
|
|
392
|
+
} catch (error) {
|
|
393
|
+
if (
|
|
394
|
+
error?.name === 'AbortError'
|
|
395
|
+
&& probeSignal?.reason?.name !== 'TimeoutError'
|
|
396
|
+
) {
|
|
397
|
+
throw error;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// Timeout, decompression, or inner-detection failures are expected for non-tar gzip files.
|
|
401
|
+
} finally {
|
|
402
|
+
clearTimeout(timeout);
|
|
403
|
+
if (!hasUnknownSize) {
|
|
404
|
+
this.gzipProbeDepth--;
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
if (compressedFileType?.ext === 'tar') {
|
|
409
|
+
return {
|
|
410
|
+
ext: 'tar.gz',
|
|
411
|
+
mime: 'application/gzip',
|
|
412
|
+
};
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
return {
|
|
416
|
+
ext: 'gz',
|
|
417
|
+
mime: 'application/gzip',
|
|
418
|
+
};
|
|
419
|
+
}
|
|
420
|
+
|
|
862
421
|
check(header, options) {
|
|
863
|
-
return
|
|
422
|
+
return checkBytes(this.buffer, header, options);
|
|
864
423
|
}
|
|
865
424
|
|
|
866
425
|
checkString(header, options) {
|
|
@@ -878,6 +437,13 @@ export class FileTypeParser {
|
|
|
878
437
|
|
|
879
438
|
this.tokenizer = tokenizer;
|
|
880
439
|
|
|
440
|
+
if (hasUnknownFileSize(tokenizer)) {
|
|
441
|
+
await tokenizer.peekBuffer(this.buffer, {length: 3, mayBeLess: true});
|
|
442
|
+
if (this.check([0x1F, 0x8B, 0x8])) {
|
|
443
|
+
return this.detectGzip(tokenizer);
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
|
|
881
447
|
await tokenizer.peekBuffer(this.buffer, {length: 32, mayBeLess: true});
|
|
882
448
|
|
|
883
449
|
// -- 2-byte signatures --
|
|
@@ -981,41 +547,7 @@ export class FileTypeParser {
|
|
|
981
547
|
}
|
|
982
548
|
|
|
983
549
|
if (this.check([0x1F, 0x8B, 0x8])) {
|
|
984
|
-
|
|
985
|
-
return {
|
|
986
|
-
ext: 'gz',
|
|
987
|
-
mime: 'application/gzip',
|
|
988
|
-
};
|
|
989
|
-
}
|
|
990
|
-
|
|
991
|
-
const gzipHandler = new GzipHandler(tokenizer);
|
|
992
|
-
const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes);
|
|
993
|
-
let compressedFileType;
|
|
994
|
-
try {
|
|
995
|
-
this.gzipProbeDepth++;
|
|
996
|
-
compressedFileType = await this.fromStream(limitedInflatedStream);
|
|
997
|
-
} catch (error) {
|
|
998
|
-
if (error?.name === 'AbortError') {
|
|
999
|
-
throw error;
|
|
1000
|
-
}
|
|
1001
|
-
|
|
1002
|
-
// Decompression or inner-detection failures are expected for non-tar gzip files.
|
|
1003
|
-
} finally {
|
|
1004
|
-
this.gzipProbeDepth--;
|
|
1005
|
-
}
|
|
1006
|
-
|
|
1007
|
-
// We only need enough inflated bytes to confidently decide whether this is tar.gz.
|
|
1008
|
-
if (compressedFileType?.ext === 'tar') {
|
|
1009
|
-
return {
|
|
1010
|
-
ext: 'tar.gz',
|
|
1011
|
-
mime: 'application/gzip',
|
|
1012
|
-
};
|
|
1013
|
-
}
|
|
1014
|
-
|
|
1015
|
-
return {
|
|
1016
|
-
ext: 'gz',
|
|
1017
|
-
mime: 'application/gzip',
|
|
1018
|
-
};
|
|
550
|
+
return this.detectGzip(tokenizer);
|
|
1019
551
|
}
|
|
1020
552
|
|
|
1021
553
|
if (this.check([0x42, 0x5A, 0x68])) {
|
|
@@ -1034,7 +566,7 @@ export class FileTypeParser {
|
|
|
1034
566
|
const isUnknownFileSize = hasUnknownFileSize(tokenizer);
|
|
1035
567
|
if (
|
|
1036
568
|
!Number.isFinite(id3HeaderLength)
|
|
1037
|
-
|
|
569
|
+
|| id3HeaderLength < 0
|
|
1038
570
|
// Keep ID3 probing bounded for unknown-size streams to avoid attacker-controlled large skips.
|
|
1039
571
|
|| (
|
|
1040
572
|
isUnknownFileSize
|
|
@@ -1076,7 +608,7 @@ export class FileTypeParser {
|
|
|
1076
608
|
}
|
|
1077
609
|
|
|
1078
610
|
this.detectionReentryCount++;
|
|
1079
|
-
return this.
|
|
611
|
+
return this.parseTokenizer(tokenizer, this.detectionReentryCount); // Skip ID3 header, recursion
|
|
1080
612
|
}
|
|
1081
613
|
|
|
1082
614
|
// Musepack, SV7
|
|
@@ -1160,108 +692,7 @@ export class FileTypeParser {
|
|
|
1160
692
|
// Zip-based file formats
|
|
1161
693
|
// Need to be before the `zip` check
|
|
1162
694
|
if (this.check([0x50, 0x4B, 0x3, 0x4])) { // Local file header signature
|
|
1163
|
-
|
|
1164
|
-
const openXmlState = createOpenXmlZipDetectionState();
|
|
1165
|
-
|
|
1166
|
-
try {
|
|
1167
|
-
await new ZipHandler(tokenizer).unzip(zipHeader => {
|
|
1168
|
-
updateOpenXmlZipDetectionStateFromFilename(openXmlState, zipHeader.filename);
|
|
1169
|
-
|
|
1170
|
-
const isOpenXmlContentTypesEntry = zipHeader.filename === '[Content_Types].xml';
|
|
1171
|
-
const openXmlFileTypeFromEntries = getOpenXmlFileTypeFromZipEntries(openXmlState);
|
|
1172
|
-
if (
|
|
1173
|
-
!isOpenXmlContentTypesEntry
|
|
1174
|
-
&& openXmlFileTypeFromEntries
|
|
1175
|
-
) {
|
|
1176
|
-
fileType = openXmlFileTypeFromEntries;
|
|
1177
|
-
return {
|
|
1178
|
-
stop: true,
|
|
1179
|
-
};
|
|
1180
|
-
}
|
|
1181
|
-
|
|
1182
|
-
switch (zipHeader.filename) {
|
|
1183
|
-
case 'META-INF/mozilla.rsa':
|
|
1184
|
-
fileType = {
|
|
1185
|
-
ext: 'xpi',
|
|
1186
|
-
mime: 'application/x-xpinstall',
|
|
1187
|
-
};
|
|
1188
|
-
return {
|
|
1189
|
-
stop: true,
|
|
1190
|
-
};
|
|
1191
|
-
case 'META-INF/MANIFEST.MF':
|
|
1192
|
-
fileType = {
|
|
1193
|
-
ext: 'jar',
|
|
1194
|
-
mime: 'application/java-archive',
|
|
1195
|
-
};
|
|
1196
|
-
return {
|
|
1197
|
-
stop: true,
|
|
1198
|
-
};
|
|
1199
|
-
case 'mimetype':
|
|
1200
|
-
if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
|
|
1201
|
-
return {};
|
|
1202
|
-
}
|
|
1203
|
-
|
|
1204
|
-
return {
|
|
1205
|
-
async handler(fileData) {
|
|
1206
|
-
// Use TextDecoder to decode the UTF-8 encoded data
|
|
1207
|
-
const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
|
|
1208
|
-
fileType = getFileTypeFromMimeType(mimeType);
|
|
1209
|
-
},
|
|
1210
|
-
stop: true,
|
|
1211
|
-
};
|
|
1212
|
-
|
|
1213
|
-
case '[Content_Types].xml': {
|
|
1214
|
-
openXmlState.hasContentTypesEntry = true;
|
|
1215
|
-
|
|
1216
|
-
if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
|
|
1217
|
-
openXmlState.hasUnparseableContentTypes = true;
|
|
1218
|
-
return {};
|
|
1219
|
-
}
|
|
1220
|
-
|
|
1221
|
-
openXmlState.isParsingContentTypes = true;
|
|
1222
|
-
return {
|
|
1223
|
-
async handler(fileData) {
|
|
1224
|
-
// Use TextDecoder to decode the UTF-8 encoded data
|
|
1225
|
-
const xmlContent = new TextDecoder('utf-8').decode(fileData);
|
|
1226
|
-
const mimeType = getOpenXmlMimeTypeFromContentTypesXml(xmlContent);
|
|
1227
|
-
if (mimeType) {
|
|
1228
|
-
fileType = getFileTypeFromMimeType(mimeType);
|
|
1229
|
-
}
|
|
1230
|
-
|
|
1231
|
-
openXmlState.hasParsedContentTypesEntry = true;
|
|
1232
|
-
openXmlState.isParsingContentTypes = false;
|
|
1233
|
-
},
|
|
1234
|
-
stop: true,
|
|
1235
|
-
};
|
|
1236
|
-
}
|
|
1237
|
-
|
|
1238
|
-
default:
|
|
1239
|
-
if (/classes\d*\.dex/.test(zipHeader.filename)) {
|
|
1240
|
-
fileType = {
|
|
1241
|
-
ext: 'apk',
|
|
1242
|
-
mime: 'application/vnd.android.package-archive',
|
|
1243
|
-
};
|
|
1244
|
-
return {stop: true};
|
|
1245
|
-
}
|
|
1246
|
-
|
|
1247
|
-
return {};
|
|
1248
|
-
}
|
|
1249
|
-
});
|
|
1250
|
-
} catch (error) {
|
|
1251
|
-
if (!isRecoverableZipError(error)) {
|
|
1252
|
-
throw error;
|
|
1253
|
-
}
|
|
1254
|
-
|
|
1255
|
-
if (openXmlState.isParsingContentTypes) {
|
|
1256
|
-
openXmlState.isParsingContentTypes = false;
|
|
1257
|
-
openXmlState.hasUnparseableContentTypes = true;
|
|
1258
|
-
}
|
|
1259
|
-
}
|
|
1260
|
-
|
|
1261
|
-
return fileType ?? getOpenXmlFileTypeFromZipEntries(openXmlState) ?? {
|
|
1262
|
-
ext: 'zip',
|
|
1263
|
-
mime: 'application/zip',
|
|
1264
|
-
};
|
|
695
|
+
return detectZip(tokenizer);
|
|
1265
696
|
}
|
|
1266
697
|
|
|
1267
698
|
if (this.checkString('OggS')) {
|
|
@@ -1271,7 +702,7 @@ export class FileTypeParser {
|
|
|
1271
702
|
await tokenizer.readBuffer(type);
|
|
1272
703
|
|
|
1273
704
|
// Needs to be before `ogg` check
|
|
1274
|
-
if (
|
|
705
|
+
if (checkBytes(type, [0x4F, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64])) {
|
|
1275
706
|
return {
|
|
1276
707
|
ext: 'opus',
|
|
1277
708
|
mime: 'audio/ogg; codecs=opus',
|
|
@@ -1279,7 +710,7 @@ export class FileTypeParser {
|
|
|
1279
710
|
}
|
|
1280
711
|
|
|
1281
712
|
// If ' theora' in header.
|
|
1282
|
-
if (
|
|
713
|
+
if (checkBytes(type, [0x80, 0x74, 0x68, 0x65, 0x6F, 0x72, 0x61])) {
|
|
1283
714
|
return {
|
|
1284
715
|
ext: 'ogv',
|
|
1285
716
|
mime: 'video/ogg',
|
|
@@ -1287,7 +718,7 @@ export class FileTypeParser {
|
|
|
1287
718
|
}
|
|
1288
719
|
|
|
1289
720
|
// If '\x01video' in header.
|
|
1290
|
-
if (
|
|
721
|
+
if (checkBytes(type, [0x01, 0x76, 0x69, 0x64, 0x65, 0x6F, 0x00])) {
|
|
1291
722
|
return {
|
|
1292
723
|
ext: 'ogm',
|
|
1293
724
|
mime: 'video/ogg',
|
|
@@ -1295,7 +726,7 @@ export class FileTypeParser {
|
|
|
1295
726
|
}
|
|
1296
727
|
|
|
1297
728
|
// If ' FLAC' in header https://xiph.org/flac/faq.html
|
|
1298
|
-
if (
|
|
729
|
+
if (checkBytes(type, [0x7F, 0x46, 0x4C, 0x41, 0x43])) {
|
|
1299
730
|
return {
|
|
1300
731
|
ext: 'oga',
|
|
1301
732
|
mime: 'audio/ogg',
|
|
@@ -1303,7 +734,7 @@ export class FileTypeParser {
|
|
|
1303
734
|
}
|
|
1304
735
|
|
|
1305
736
|
// 'Speex ' in header https://en.wikipedia.org/wiki/Speex
|
|
1306
|
-
if (
|
|
737
|
+
if (checkBytes(type, [0x53, 0x70, 0x65, 0x65, 0x78, 0x20, 0x20])) {
|
|
1307
738
|
return {
|
|
1308
739
|
ext: 'spx',
|
|
1309
740
|
mime: 'audio/ogg',
|
|
@@ -1311,7 +742,7 @@ export class FileTypeParser {
|
|
|
1311
742
|
}
|
|
1312
743
|
|
|
1313
744
|
// If '\x01vorbis' in header
|
|
1314
|
-
if (
|
|
745
|
+
if (checkBytes(type, [0x01, 0x76, 0x6F, 0x72, 0x62, 0x69, 0x73])) {
|
|
1315
746
|
return {
|
|
1316
747
|
ext: 'ogg',
|
|
1317
748
|
mime: 'audio/ogg',
|
|
@@ -1387,7 +818,7 @@ export class FileTypeParser {
|
|
|
1387
818
|
if (this.checkString('LZIP')) {
|
|
1388
819
|
return {
|
|
1389
820
|
ext: 'lz',
|
|
1390
|
-
mime: 'application/
|
|
821
|
+
mime: 'application/lzip',
|
|
1391
822
|
};
|
|
1392
823
|
}
|
|
1393
824
|
|
|
@@ -1452,110 +883,7 @@ export class FileTypeParser {
|
|
|
1452
883
|
|
|
1453
884
|
// https://github.com/file/file/blob/master/magic/Magdir/matroska
|
|
1454
885
|
if (this.check([0x1A, 0x45, 0xDF, 0xA3])) { // Root element: EBML
|
|
1455
|
-
|
|
1456
|
-
const msb = await tokenizer.peekNumber(Token.UINT8);
|
|
1457
|
-
let mask = 0x80;
|
|
1458
|
-
let ic = 0; // 0 = A, 1 = B, 2 = C, 3 = D
|
|
1459
|
-
|
|
1460
|
-
while ((msb & mask) === 0 && mask !== 0) {
|
|
1461
|
-
++ic;
|
|
1462
|
-
mask >>= 1;
|
|
1463
|
-
}
|
|
1464
|
-
|
|
1465
|
-
const id = new Uint8Array(ic + 1);
|
|
1466
|
-
await safeReadBuffer(tokenizer, id, undefined, {
|
|
1467
|
-
maximumLength: id.length,
|
|
1468
|
-
reason: 'EBML field',
|
|
1469
|
-
});
|
|
1470
|
-
return id;
|
|
1471
|
-
}
|
|
1472
|
-
|
|
1473
|
-
async function readElement() {
|
|
1474
|
-
const idField = await readField();
|
|
1475
|
-
const lengthField = await readField();
|
|
1476
|
-
|
|
1477
|
-
lengthField[0] ^= 0x80 >> (lengthField.length - 1);
|
|
1478
|
-
const nrLength = Math.min(6, lengthField.length); // JavaScript can max read 6 bytes integer
|
|
1479
|
-
|
|
1480
|
-
const idView = new DataView(idField.buffer);
|
|
1481
|
-
const lengthView = new DataView(lengthField.buffer, lengthField.length - nrLength, nrLength);
|
|
1482
|
-
|
|
1483
|
-
return {
|
|
1484
|
-
id: getUintBE(idView),
|
|
1485
|
-
len: getUintBE(lengthView),
|
|
1486
|
-
};
|
|
1487
|
-
}
|
|
1488
|
-
|
|
1489
|
-
async function readChildren(children) {
|
|
1490
|
-
let ebmlElementCount = 0;
|
|
1491
|
-
while (children > 0) {
|
|
1492
|
-
ebmlElementCount++;
|
|
1493
|
-
if (ebmlElementCount > maximumEbmlElementCount) {
|
|
1494
|
-
return;
|
|
1495
|
-
}
|
|
1496
|
-
|
|
1497
|
-
if (hasExceededUnknownSizeScanBudget(tokenizer, ebmlScanStart, maximumUntrustedSkipSizeInBytes)) {
|
|
1498
|
-
return;
|
|
1499
|
-
}
|
|
1500
|
-
|
|
1501
|
-
const previousPosition = tokenizer.position;
|
|
1502
|
-
const element = await readElement();
|
|
1503
|
-
|
|
1504
|
-
if (element.id === 0x42_82) {
|
|
1505
|
-
// `DocType` is a short string ("webm", "matroska", ...), reject implausible lengths to avoid large allocations.
|
|
1506
|
-
if (element.len > maximumEbmlDocumentTypeSizeInBytes) {
|
|
1507
|
-
return;
|
|
1508
|
-
}
|
|
1509
|
-
|
|
1510
|
-
const documentTypeLength = getSafeBound(element.len, maximumEbmlDocumentTypeSizeInBytes, 'EBML DocType');
|
|
1511
|
-
const rawValue = await tokenizer.readToken(new Token.StringType(documentTypeLength));
|
|
1512
|
-
return rawValue.replaceAll(/\00.*$/g, ''); // Return DocType
|
|
1513
|
-
}
|
|
1514
|
-
|
|
1515
|
-
if (
|
|
1516
|
-
hasUnknownFileSize(tokenizer)
|
|
1517
|
-
&& (
|
|
1518
|
-
!Number.isFinite(element.len)
|
|
1519
|
-
|| element.len < 0
|
|
1520
|
-
|| element.len > maximumEbmlElementPayloadSizeInBytes
|
|
1521
|
-
)
|
|
1522
|
-
) {
|
|
1523
|
-
return;
|
|
1524
|
-
}
|
|
1525
|
-
|
|
1526
|
-
await safeIgnore(tokenizer, element.len, {
|
|
1527
|
-
maximumLength: hasUnknownFileSize(tokenizer) ? maximumEbmlElementPayloadSizeInBytes : tokenizer.fileInfo.size,
|
|
1528
|
-
reason: 'EBML payload',
|
|
1529
|
-
}); // ignore payload
|
|
1530
|
-
--children;
|
|
1531
|
-
|
|
1532
|
-
// Safeguard against malformed files: bail if the position did not advance.
|
|
1533
|
-
if (tokenizer.position <= previousPosition) {
|
|
1534
|
-
return;
|
|
1535
|
-
}
|
|
1536
|
-
}
|
|
1537
|
-
}
|
|
1538
|
-
|
|
1539
|
-
const rootElement = await readElement();
|
|
1540
|
-
const ebmlScanStart = tokenizer.position;
|
|
1541
|
-
const documentType = await readChildren(rootElement.len);
|
|
1542
|
-
|
|
1543
|
-
switch (documentType) {
|
|
1544
|
-
case 'webm':
|
|
1545
|
-
return {
|
|
1546
|
-
ext: 'webm',
|
|
1547
|
-
mime: 'video/webm',
|
|
1548
|
-
};
|
|
1549
|
-
|
|
1550
|
-
case 'matroska':
|
|
1551
|
-
return {
|
|
1552
|
-
ext: 'mkv',
|
|
1553
|
-
mime: 'video/matroska',
|
|
1554
|
-
};
|
|
1555
|
-
|
|
1556
|
-
default:
|
|
1557
|
-
return;
|
|
1558
|
-
}
|
|
886
|
+
return detectEbml(tokenizer);
|
|
1559
887
|
}
|
|
1560
888
|
|
|
1561
889
|
if (this.checkString('SQLi')) {
|
|
@@ -1653,7 +981,7 @@ export class FileTypeParser {
|
|
|
1653
981
|
if (this.check([0x04, 0x22, 0x4D, 0x18])) {
|
|
1654
982
|
return {
|
|
1655
983
|
ext: 'lz4',
|
|
1656
|
-
mime: 'application/x-lz4', //
|
|
984
|
+
mime: 'application/x-lz4', // Informal, used by freedesktop.org shared-mime-info
|
|
1657
985
|
};
|
|
1658
986
|
}
|
|
1659
987
|
|
|
@@ -1688,7 +1016,7 @@ export class FileTypeParser {
|
|
|
1688
1016
|
};
|
|
1689
1017
|
}
|
|
1690
1018
|
|
|
1691
|
-
if (this.checkString(
|
|
1019
|
+
if (this.checkString(String.raw`{\rtf`)) {
|
|
1692
1020
|
return {
|
|
1693
1021
|
ext: 'rtf',
|
|
1694
1022
|
mime: 'application/rtf',
|
|
@@ -1789,7 +1117,7 @@ export class FileTypeParser {
|
|
|
1789
1117
|
if (this.checkString('DRACO')) {
|
|
1790
1118
|
return {
|
|
1791
1119
|
ext: 'drc',
|
|
1792
|
-
mime: 'application/
|
|
1120
|
+
mime: 'application/x-ft-draco',
|
|
1793
1121
|
};
|
|
1794
1122
|
}
|
|
1795
1123
|
|
|
@@ -1835,7 +1163,7 @@ export class FileTypeParser {
|
|
|
1835
1163
|
|
|
1836
1164
|
if (this.checkString('AC')) {
|
|
1837
1165
|
const version = new Token.StringType(4, 'latin1').get(this.buffer, 2);
|
|
1838
|
-
if (
|
|
1166
|
+
if (/^\d+$/v.test(version) && version >= 1000 && version <= 1050) {
|
|
1839
1167
|
return {
|
|
1840
1168
|
ext: 'dwg',
|
|
1841
1169
|
mime: 'image/vnd.dwg',
|
|
@@ -1890,110 +1218,7 @@ export class FileTypeParser {
|
|
|
1890
1218
|
// -- 8-byte signatures --
|
|
1891
1219
|
|
|
1892
1220
|
if (this.check([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])) {
|
|
1893
|
-
|
|
1894
|
-
ext: 'png',
|
|
1895
|
-
mime: 'image/png',
|
|
1896
|
-
};
|
|
1897
|
-
|
|
1898
|
-
const apngFileType = {
|
|
1899
|
-
ext: 'apng',
|
|
1900
|
-
mime: 'image/apng',
|
|
1901
|
-
};
|
|
1902
|
-
|
|
1903
|
-
// APNG format (https://wiki.mozilla.org/APNG_Specification)
|
|
1904
|
-
// 1. Find the first IDAT (image data) chunk (49 44 41 54)
|
|
1905
|
-
// 2. Check if there is an "acTL" chunk before the IDAT one (61 63 54 4C)
|
|
1906
|
-
|
|
1907
|
-
// Offset calculated as follows:
|
|
1908
|
-
// - 8 bytes: PNG signature
|
|
1909
|
-
// - 4 (length) + 4 (chunk type) + 13 (chunk data) + 4 (CRC): IHDR chunk
|
|
1910
|
-
|
|
1911
|
-
await tokenizer.ignore(8); // ignore PNG signature
|
|
1912
|
-
|
|
1913
|
-
async function readChunkHeader() {
|
|
1914
|
-
return {
|
|
1915
|
-
length: await tokenizer.readToken(Token.INT32_BE),
|
|
1916
|
-
type: await tokenizer.readToken(new Token.StringType(4, 'latin1')),
|
|
1917
|
-
};
|
|
1918
|
-
}
|
|
1919
|
-
|
|
1920
|
-
const isUnknownPngStream = hasUnknownFileSize(tokenizer);
|
|
1921
|
-
const pngScanStart = tokenizer.position;
|
|
1922
|
-
let pngChunkCount = 0;
|
|
1923
|
-
let hasSeenImageHeader = false;
|
|
1924
|
-
do {
|
|
1925
|
-
pngChunkCount++;
|
|
1926
|
-
if (pngChunkCount > maximumPngChunkCount) {
|
|
1927
|
-
break;
|
|
1928
|
-
}
|
|
1929
|
-
|
|
1930
|
-
if (hasExceededUnknownSizeScanBudget(tokenizer, pngScanStart, maximumPngStreamScanBudgetInBytes)) {
|
|
1931
|
-
break;
|
|
1932
|
-
}
|
|
1933
|
-
|
|
1934
|
-
const previousPosition = tokenizer.position;
|
|
1935
|
-
const chunk = await readChunkHeader();
|
|
1936
|
-
if (chunk.length < 0) {
|
|
1937
|
-
return; // Invalid chunk length
|
|
1938
|
-
}
|
|
1939
|
-
|
|
1940
|
-
if (chunk.type === 'IHDR') {
|
|
1941
|
-
// PNG requires the first real image header to be a 13-byte IHDR chunk.
|
|
1942
|
-
if (chunk.length !== 13) {
|
|
1943
|
-
return;
|
|
1944
|
-
}
|
|
1945
|
-
|
|
1946
|
-
hasSeenImageHeader = true;
|
|
1947
|
-
}
|
|
1948
|
-
|
|
1949
|
-
switch (chunk.type) {
|
|
1950
|
-
case 'IDAT':
|
|
1951
|
-
return pngFileType;
|
|
1952
|
-
case 'acTL':
|
|
1953
|
-
return apngFileType;
|
|
1954
|
-
default:
|
|
1955
|
-
if (
|
|
1956
|
-
!hasSeenImageHeader
|
|
1957
|
-
&& chunk.type !== 'CgBI'
|
|
1958
|
-
) {
|
|
1959
|
-
return;
|
|
1960
|
-
}
|
|
1961
|
-
|
|
1962
|
-
if (
|
|
1963
|
-
isUnknownPngStream
|
|
1964
|
-
&& chunk.length > maximumPngChunkSizeInBytes
|
|
1965
|
-
) {
|
|
1966
|
-
// Avoid huge attacker-controlled skips when probing unknown-size streams.
|
|
1967
|
-
return hasSeenImageHeader && isPngAncillaryChunk(chunk.type) ? pngFileType : undefined;
|
|
1968
|
-
}
|
|
1969
|
-
|
|
1970
|
-
try {
|
|
1971
|
-
await safeIgnore(tokenizer, chunk.length + 4, {
|
|
1972
|
-
maximumLength: isUnknownPngStream ? maximumPngChunkSizeInBytes + 4 : tokenizer.fileInfo.size,
|
|
1973
|
-
reason: 'PNG chunk payload',
|
|
1974
|
-
}); // Ignore chunk-data + CRC
|
|
1975
|
-
} catch (error) {
|
|
1976
|
-
if (
|
|
1977
|
-
!isUnknownPngStream
|
|
1978
|
-
&& (
|
|
1979
|
-
error instanceof ParserHardLimitError
|
|
1980
|
-
|| error instanceof strtok3.EndOfStreamError
|
|
1981
|
-
)
|
|
1982
|
-
) {
|
|
1983
|
-
return pngFileType;
|
|
1984
|
-
}
|
|
1985
|
-
|
|
1986
|
-
throw error;
|
|
1987
|
-
}
|
|
1988
|
-
}
|
|
1989
|
-
|
|
1990
|
-
// Safeguard against malformed files: bail if the position did not advance.
|
|
1991
|
-
if (tokenizer.position <= previousPosition) {
|
|
1992
|
-
break;
|
|
1993
|
-
}
|
|
1994
|
-
} while (tokenizer.position + 8 < tokenizer.fileInfo.size);
|
|
1995
|
-
|
|
1996
|
-
return pngFileType;
|
|
1221
|
+
return detectPng(tokenizer);
|
|
1997
1222
|
}
|
|
1998
1223
|
|
|
1999
1224
|
if (this.check([0x41, 0x52, 0x52, 0x4F, 0x57, 0x31, 0x00, 0x00])) {
|
|
@@ -2151,116 +1376,7 @@ export class FileTypeParser {
|
|
|
2151
1376
|
|
|
2152
1377
|
// ASF_Header_Object first 80 bytes
|
|
2153
1378
|
if (this.check([0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11, 0xA6, 0xD9])) {
|
|
2154
|
-
|
|
2155
|
-
try {
|
|
2156
|
-
async function readHeader() {
|
|
2157
|
-
const guid = new Uint8Array(16);
|
|
2158
|
-
await safeReadBuffer(tokenizer, guid, undefined, {
|
|
2159
|
-
maximumLength: guid.length,
|
|
2160
|
-
reason: 'ASF header GUID',
|
|
2161
|
-
});
|
|
2162
|
-
return {
|
|
2163
|
-
id: guid,
|
|
2164
|
-
size: Number(await tokenizer.readToken(Token.UINT64_LE)),
|
|
2165
|
-
};
|
|
2166
|
-
}
|
|
2167
|
-
|
|
2168
|
-
await safeIgnore(tokenizer, 30, {
|
|
2169
|
-
maximumLength: 30,
|
|
2170
|
-
reason: 'ASF header prelude',
|
|
2171
|
-
});
|
|
2172
|
-
const isUnknownFileSize = hasUnknownFileSize(tokenizer);
|
|
2173
|
-
const asfHeaderScanStart = tokenizer.position;
|
|
2174
|
-
let asfHeaderObjectCount = 0;
|
|
2175
|
-
while (tokenizer.position + 24 < tokenizer.fileInfo.size) {
|
|
2176
|
-
asfHeaderObjectCount++;
|
|
2177
|
-
if (asfHeaderObjectCount > maximumAsfHeaderObjectCount) {
|
|
2178
|
-
break;
|
|
2179
|
-
}
|
|
2180
|
-
|
|
2181
|
-
if (hasExceededUnknownSizeScanBudget(tokenizer, asfHeaderScanStart, maximumUntrustedSkipSizeInBytes)) {
|
|
2182
|
-
break;
|
|
2183
|
-
}
|
|
2184
|
-
|
|
2185
|
-
const previousPosition = tokenizer.position;
|
|
2186
|
-
const header = await readHeader();
|
|
2187
|
-
let payload = header.size - 24;
|
|
2188
|
-
if (
|
|
2189
|
-
!Number.isFinite(payload)
|
|
2190
|
-
|| payload < 0
|
|
2191
|
-
) {
|
|
2192
|
-
isMalformedAsf = true;
|
|
2193
|
-
break;
|
|
2194
|
-
}
|
|
2195
|
-
|
|
2196
|
-
if (_check(header.id, [0x91, 0x07, 0xDC, 0xB7, 0xB7, 0xA9, 0xCF, 0x11, 0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65])) {
|
|
2197
|
-
// Sync on Stream-Properties-Object (B7DC0791-A9B7-11CF-8EE6-00C00C205365)
|
|
2198
|
-
const typeId = new Uint8Array(16);
|
|
2199
|
-
payload -= await safeReadBuffer(tokenizer, typeId, undefined, {
|
|
2200
|
-
maximumLength: typeId.length,
|
|
2201
|
-
reason: 'ASF stream type GUID',
|
|
2202
|
-
});
|
|
2203
|
-
|
|
2204
|
-
if (_check(typeId, [0x40, 0x9E, 0x69, 0xF8, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
|
|
2205
|
-
// Found audio:
|
|
2206
|
-
return {
|
|
2207
|
-
ext: 'asf',
|
|
2208
|
-
mime: 'audio/x-ms-asf',
|
|
2209
|
-
};
|
|
2210
|
-
}
|
|
2211
|
-
|
|
2212
|
-
if (_check(typeId, [0xC0, 0xEF, 0x19, 0xBC, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
|
|
2213
|
-
// Found video:
|
|
2214
|
-
return {
|
|
2215
|
-
ext: 'asf',
|
|
2216
|
-
mime: 'video/x-ms-asf',
|
|
2217
|
-
};
|
|
2218
|
-
}
|
|
2219
|
-
|
|
2220
|
-
break;
|
|
2221
|
-
}
|
|
2222
|
-
|
|
2223
|
-
if (
|
|
2224
|
-
isUnknownFileSize
|
|
2225
|
-
&& payload > maximumAsfHeaderPayloadSizeInBytes
|
|
2226
|
-
) {
|
|
2227
|
-
isMalformedAsf = true;
|
|
2228
|
-
break;
|
|
2229
|
-
}
|
|
2230
|
-
|
|
2231
|
-
await safeIgnore(tokenizer, payload, {
|
|
2232
|
-
maximumLength: isUnknownFileSize ? maximumAsfHeaderPayloadSizeInBytes : tokenizer.fileInfo.size,
|
|
2233
|
-
reason: 'ASF header payload',
|
|
2234
|
-
});
|
|
2235
|
-
|
|
2236
|
-
// Safeguard against malformed files: break if the position did not advance.
|
|
2237
|
-
if (tokenizer.position <= previousPosition) {
|
|
2238
|
-
isMalformedAsf = true;
|
|
2239
|
-
break;
|
|
2240
|
-
}
|
|
2241
|
-
}
|
|
2242
|
-
} catch (error) {
|
|
2243
|
-
if (
|
|
2244
|
-
error instanceof strtok3.EndOfStreamError
|
|
2245
|
-
|| error instanceof ParserHardLimitError
|
|
2246
|
-
) {
|
|
2247
|
-
if (hasUnknownFileSize(tokenizer)) {
|
|
2248
|
-
isMalformedAsf = true;
|
|
2249
|
-
}
|
|
2250
|
-
} else {
|
|
2251
|
-
throw error;
|
|
2252
|
-
}
|
|
2253
|
-
}
|
|
2254
|
-
|
|
2255
|
-
if (isMalformedAsf) {
|
|
2256
|
-
return;
|
|
2257
|
-
}
|
|
2258
|
-
|
|
2259
|
-
// Default to ASF generic extension
|
|
2260
|
-
return {
|
|
2261
|
-
ext: 'asf',
|
|
2262
|
-
mime: 'application/vnd.ms-asf',
|
|
2263
|
-
};
|
|
1379
|
+
return detectAsf(tokenizer);
|
|
2264
1380
|
}
|
|
2265
1381
|
|
|
2266
1382
|
if (this.check([0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A])) {
|
|
@@ -2474,21 +1590,21 @@ export class FileTypeParser {
|
|
|
2474
1590
|
if (this.check([0x4C, 0x00, 0x00, 0x00, 0x01, 0x14, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46])) {
|
|
2475
1591
|
return {
|
|
2476
1592
|
ext: 'lnk',
|
|
2477
|
-
mime: 'application/x
|
|
1593
|
+
mime: 'application/x-ms-shortcut', // Informal, used by freedesktop.org shared-mime-info
|
|
2478
1594
|
};
|
|
2479
1595
|
}
|
|
2480
1596
|
|
|
2481
1597
|
if (this.check([0x62, 0x6F, 0x6F, 0x6B, 0x00, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x72, 0x6B, 0x00, 0x00, 0x00, 0x00])) {
|
|
2482
1598
|
return {
|
|
2483
1599
|
ext: 'alias',
|
|
2484
|
-
mime: 'application/x
|
|
1600
|
+
mime: 'application/x-ft-apple.alias',
|
|
2485
1601
|
};
|
|
2486
1602
|
}
|
|
2487
1603
|
|
|
2488
1604
|
if (this.checkString('Kaydara FBX Binary \u0000')) {
|
|
2489
1605
|
return {
|
|
2490
1606
|
ext: 'fbx',
|
|
2491
|
-
mime: 'application/x
|
|
1607
|
+
mime: 'application/x-ft-fbx',
|
|
2492
1608
|
};
|
|
2493
1609
|
}
|
|
2494
1610
|
|
|
@@ -2790,3 +1906,7 @@ export class FileTypeParser {
|
|
|
2790
1906
|
|
|
2791
1907
|
export const supportedExtensions = new Set(extensions);
|
|
2792
1908
|
export const supportedMimeTypes = new Set(mimeTypes);
|
|
1909
|
+
|
|
1910
|
+
export async function fileTypeFromFile(path, options) {
|
|
1911
|
+
return (new FileTypeParser(options)).fromFile(path);
|
|
1912
|
+
}
|