file-type 21.2.0 → 21.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core.js +734 -150
- package/index.js +26 -2
- package/package.json +4 -4
- package/readme.md +5 -18
package/core.js
CHANGED
|
@@ -14,6 +14,169 @@ import {
|
|
|
14
14
|
import {extensions, mimeTypes} from './supported.js';
|
|
15
15
|
|
|
16
16
|
export const reasonableDetectionSizeInBytes = 4100; // A fair amount of file-types are detectable within this range.
|
|
17
|
+
// Keep defensive limits small enough to avoid accidental memory spikes from untrusted inputs.
|
|
18
|
+
const maximumMpegOffsetTolerance = reasonableDetectionSizeInBytes - 2;
|
|
19
|
+
const maximumZipEntrySizeInBytes = 1024 * 1024;
|
|
20
|
+
const maximumUntrustedSkipSizeInBytes = 16 * 1024 * 1024;
|
|
21
|
+
const maximumNestedGzipDetectionSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
22
|
+
const maximumId3HeaderSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
23
|
+
const maximumEbmlDocumentTypeSizeInBytes = 64;
|
|
24
|
+
const maximumEbmlElementPayloadSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
25
|
+
const maximumEbmlElementCount = 256;
|
|
26
|
+
const maximumPngChunkSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
27
|
+
const maximumTiffIfdOffsetInBytes = maximumUntrustedSkipSizeInBytes;
|
|
28
|
+
const recoverableZipErrorMessages = new Set([
|
|
29
|
+
'Unexpected signature',
|
|
30
|
+
'Encrypted ZIP',
|
|
31
|
+
'Expected Central-File-Header signature',
|
|
32
|
+
]);
|
|
33
|
+
const recoverableZipErrorMessagePrefixes = [
|
|
34
|
+
'Unsupported ZIP compression method:',
|
|
35
|
+
'ZIP entry decompressed data exceeds ',
|
|
36
|
+
];
|
|
37
|
+
const recoverableZipErrorCodes = new Set([
|
|
38
|
+
'Z_BUF_ERROR',
|
|
39
|
+
'Z_DATA_ERROR',
|
|
40
|
+
'ERR_INVALID_STATE',
|
|
41
|
+
]);
|
|
42
|
+
|
|
43
|
+
class ParserHardLimitError extends Error {}
|
|
44
|
+
|
|
45
|
+
function getSafeBound(value, maximum, reason) {
|
|
46
|
+
if (
|
|
47
|
+
!Number.isFinite(value)
|
|
48
|
+
|| value < 0
|
|
49
|
+
|| value > maximum
|
|
50
|
+
) {
|
|
51
|
+
throw new ParserHardLimitError(`${reason} has invalid size ${value} (maximum ${maximum} bytes)`);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return value;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
async function safeIgnore(tokenizer, length, {maximumLength = maximumUntrustedSkipSizeInBytes, reason = 'skip'} = {}) {
|
|
58
|
+
const safeLength = getSafeBound(length, maximumLength, reason);
|
|
59
|
+
await tokenizer.ignore(safeLength);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async function safeReadBuffer(tokenizer, buffer, options, {maximumLength = buffer.length, reason = 'read'} = {}) {
|
|
63
|
+
const length = options?.length ?? buffer.length;
|
|
64
|
+
const safeLength = getSafeBound(length, maximumLength, reason);
|
|
65
|
+
return tokenizer.readBuffer(buffer, {
|
|
66
|
+
...options,
|
|
67
|
+
length: safeLength,
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async function decompressDeflateRawWithLimit(data, {maximumLength = maximumZipEntrySizeInBytes} = {}) {
|
|
72
|
+
const input = new ReadableStream({
|
|
73
|
+
start(controller) {
|
|
74
|
+
controller.enqueue(data);
|
|
75
|
+
controller.close();
|
|
76
|
+
},
|
|
77
|
+
});
|
|
78
|
+
const output = input.pipeThrough(new DecompressionStream('deflate-raw'));
|
|
79
|
+
const reader = output.getReader();
|
|
80
|
+
const chunks = [];
|
|
81
|
+
let totalLength = 0;
|
|
82
|
+
|
|
83
|
+
try {
|
|
84
|
+
for (;;) {
|
|
85
|
+
const {done, value} = await reader.read();
|
|
86
|
+
if (done) {
|
|
87
|
+
break;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
totalLength += value.length;
|
|
91
|
+
if (totalLength > maximumLength) {
|
|
92
|
+
await reader.cancel();
|
|
93
|
+
throw new Error(`ZIP entry decompressed data exceeds ${maximumLength} bytes`);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
chunks.push(value);
|
|
97
|
+
}
|
|
98
|
+
} finally {
|
|
99
|
+
reader.releaseLock();
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const uncompressedData = new Uint8Array(totalLength);
|
|
103
|
+
let offset = 0;
|
|
104
|
+
for (const chunk of chunks) {
|
|
105
|
+
uncompressedData.set(chunk, offset);
|
|
106
|
+
offset += chunk.length;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return uncompressedData;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Override the default inflate to enforce decompression size limits, since @tokenizer/inflate does not expose a configuration hook for this.
|
|
113
|
+
ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
|
|
114
|
+
if (zipHeader.compressedMethod === 0) {
|
|
115
|
+
return callback(fileData);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (zipHeader.compressedMethod !== 8) {
|
|
119
|
+
throw new Error(`Unsupported ZIP compression method: ${zipHeader.compressedMethod}`);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const maximumLength = hasUnknownFileSize(this.tokenizer) ? maximumZipEntrySizeInBytes : Number.MAX_SAFE_INTEGER;
|
|
123
|
+
const uncompressedData = await decompressDeflateRawWithLimit(fileData, {maximumLength});
|
|
124
|
+
return callback(uncompressedData);
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
function createByteLimitedReadableStream(stream, maximumBytes) {
|
|
128
|
+
const reader = stream.getReader();
|
|
129
|
+
let emittedBytes = 0;
|
|
130
|
+
let sourceDone = false;
|
|
131
|
+
let sourceCanceled = false;
|
|
132
|
+
|
|
133
|
+
const cancelSource = async reason => {
|
|
134
|
+
if (
|
|
135
|
+
sourceDone
|
|
136
|
+
|| sourceCanceled
|
|
137
|
+
) {
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
sourceCanceled = true;
|
|
142
|
+
await reader.cancel(reason);
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
return new ReadableStream({
|
|
146
|
+
async pull(controller) {
|
|
147
|
+
if (emittedBytes >= maximumBytes) {
|
|
148
|
+
controller.close();
|
|
149
|
+
await cancelSource();
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const {done, value} = await reader.read();
|
|
154
|
+
if (
|
|
155
|
+
done
|
|
156
|
+
|| !value
|
|
157
|
+
) {
|
|
158
|
+
sourceDone = true;
|
|
159
|
+
controller.close();
|
|
160
|
+
return;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const remainingBytes = maximumBytes - emittedBytes;
|
|
164
|
+
if (value.length > remainingBytes) {
|
|
165
|
+
controller.enqueue(value.subarray(0, remainingBytes));
|
|
166
|
+
emittedBytes += remainingBytes;
|
|
167
|
+
controller.close();
|
|
168
|
+
await cancelSource();
|
|
169
|
+
return;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
controller.enqueue(value);
|
|
173
|
+
emittedBytes += value.length;
|
|
174
|
+
},
|
|
175
|
+
async cancel(reason) {
|
|
176
|
+
await cancelSource(reason);
|
|
177
|
+
},
|
|
178
|
+
});
|
|
179
|
+
}
|
|
17
180
|
|
|
18
181
|
export async function fileTypeFromStream(stream, options) {
|
|
19
182
|
return new FileTypeParser(options).fromStream(stream);
|
|
@@ -180,6 +343,189 @@ function _check(buffer, headers, options) {
|
|
|
180
343
|
return true;
|
|
181
344
|
}
|
|
182
345
|
|
|
346
|
+
export function normalizeSampleSize(sampleSize) {
|
|
347
|
+
// Accept odd caller input, but preserve valid caller-requested probe depth.
|
|
348
|
+
if (!Number.isFinite(sampleSize)) {
|
|
349
|
+
return reasonableDetectionSizeInBytes;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
return Math.max(1, Math.trunc(sampleSize));
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
function normalizeMpegOffsetTolerance(mpegOffsetTolerance) {
|
|
356
|
+
// This value controls scan depth and therefore worst-case CPU work.
|
|
357
|
+
if (!Number.isFinite(mpegOffsetTolerance)) {
|
|
358
|
+
return 0;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
return Math.max(0, Math.min(maximumMpegOffsetTolerance, Math.trunc(mpegOffsetTolerance)));
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
function getKnownFileSizeOrMaximum(fileSize) {
|
|
365
|
+
if (!Number.isFinite(fileSize)) {
|
|
366
|
+
return Number.MAX_SAFE_INTEGER;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
return Math.max(0, fileSize);
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
function hasUnknownFileSize(tokenizer) {
|
|
373
|
+
const fileSize = tokenizer.fileInfo.size;
|
|
374
|
+
return (
|
|
375
|
+
!Number.isFinite(fileSize)
|
|
376
|
+
|| fileSize === Number.MAX_SAFE_INTEGER
|
|
377
|
+
);
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
function hasExceededUnknownSizeScanBudget(tokenizer, startOffset, maximumBytes) {
|
|
381
|
+
return (
|
|
382
|
+
hasUnknownFileSize(tokenizer)
|
|
383
|
+
&& tokenizer.position - startOffset > maximumBytes
|
|
384
|
+
);
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
function isRecoverableZipError(error) {
|
|
388
|
+
if (error instanceof strtok3.EndOfStreamError) {
|
|
389
|
+
return true;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
if (error instanceof ParserHardLimitError) {
|
|
393
|
+
return true;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
if (!(error instanceof Error)) {
|
|
397
|
+
return false;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
if (recoverableZipErrorMessages.has(error.message)) {
|
|
401
|
+
return true;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
if (
|
|
405
|
+
error instanceof TypeError
|
|
406
|
+
&& recoverableZipErrorCodes.has(error.code)
|
|
407
|
+
) {
|
|
408
|
+
return true;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
for (const prefix of recoverableZipErrorMessagePrefixes) {
|
|
412
|
+
if (error.message.startsWith(prefix)) {
|
|
413
|
+
return true;
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
return false;
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
function canReadZipEntryForDetection(zipHeader, maximumSize = maximumZipEntrySizeInBytes) {
|
|
421
|
+
const sizes = [zipHeader.compressedSize, zipHeader.uncompressedSize];
|
|
422
|
+
for (const size of sizes) {
|
|
423
|
+
if (
|
|
424
|
+
!Number.isFinite(size)
|
|
425
|
+
|| size < 0
|
|
426
|
+
|| size > maximumSize
|
|
427
|
+
) {
|
|
428
|
+
return false;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
return true;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
function createOpenXmlZipDetectionState() {
|
|
436
|
+
return {
|
|
437
|
+
hasContentTypesEntry: false,
|
|
438
|
+
hasParsedContentTypesEntry: false,
|
|
439
|
+
isParsingContentTypes: false,
|
|
440
|
+
hasUnparseableContentTypes: false,
|
|
441
|
+
hasWordDirectory: false,
|
|
442
|
+
hasPresentationDirectory: false,
|
|
443
|
+
hasSpreadsheetDirectory: false,
|
|
444
|
+
hasThreeDimensionalModelEntry: false,
|
|
445
|
+
};
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
function updateOpenXmlZipDetectionStateFromFilename(openXmlState, filename) {
|
|
449
|
+
if (filename.startsWith('word/')) {
|
|
450
|
+
openXmlState.hasWordDirectory = true;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
if (filename.startsWith('ppt/')) {
|
|
454
|
+
openXmlState.hasPresentationDirectory = true;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
if (filename.startsWith('xl/')) {
|
|
458
|
+
openXmlState.hasSpreadsheetDirectory = true;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
if (
|
|
462
|
+
filename.startsWith('3D/')
|
|
463
|
+
&& filename.endsWith('.model')
|
|
464
|
+
) {
|
|
465
|
+
openXmlState.hasThreeDimensionalModelEntry = true;
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
function getOpenXmlFileTypeFromZipEntries(openXmlState) {
|
|
470
|
+
// Only use directory-name heuristic when [Content_Types].xml was present in the archive
|
|
471
|
+
// but its handler was skipped (not invoked, not currently running, and not already resolved).
|
|
472
|
+
// This avoids guessing from directory names when content-type parsing already gave a definitive answer or failed.
|
|
473
|
+
if (
|
|
474
|
+
!openXmlState.hasContentTypesEntry
|
|
475
|
+
|| openXmlState.hasUnparseableContentTypes
|
|
476
|
+
|| openXmlState.isParsingContentTypes
|
|
477
|
+
|| openXmlState.hasParsedContentTypesEntry
|
|
478
|
+
) {
|
|
479
|
+
return;
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
if (openXmlState.hasWordDirectory) {
|
|
483
|
+
return {
|
|
484
|
+
ext: 'docx',
|
|
485
|
+
mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
486
|
+
};
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
if (openXmlState.hasPresentationDirectory) {
|
|
490
|
+
return {
|
|
491
|
+
ext: 'pptx',
|
|
492
|
+
mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
493
|
+
};
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
if (openXmlState.hasSpreadsheetDirectory) {
|
|
497
|
+
return {
|
|
498
|
+
ext: 'xlsx',
|
|
499
|
+
mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
500
|
+
};
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
if (openXmlState.hasThreeDimensionalModelEntry) {
|
|
504
|
+
return {
|
|
505
|
+
ext: '3mf',
|
|
506
|
+
mime: 'model/3mf',
|
|
507
|
+
};
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
function getOpenXmlMimeTypeFromContentTypesXml(xmlContent) {
|
|
512
|
+
// We only need the `ContentType="...main+xml"` value, so a small string scan is enough and avoids full XML parsing.
|
|
513
|
+
const endPosition = xmlContent.indexOf('.main+xml"');
|
|
514
|
+
if (endPosition === -1) {
|
|
515
|
+
const mimeType = 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml';
|
|
516
|
+
if (xmlContent.includes(`ContentType="${mimeType}"`)) {
|
|
517
|
+
return mimeType;
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
return;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
const truncatedContent = xmlContent.slice(0, endPosition);
|
|
524
|
+
const firstQuotePosition = truncatedContent.lastIndexOf('"');
|
|
525
|
+
// If no quote is found, `lastIndexOf` returns -1 and this intentionally falls back to the full truncated prefix.
|
|
526
|
+
return truncatedContent.slice(firstQuotePosition + 1);
|
|
527
|
+
}
|
|
528
|
+
|
|
183
529
|
export async function fileTypeFromTokenizer(tokenizer, options) {
|
|
184
530
|
return new FileTypeParser(options).fromTokenizer(tokenizer);
|
|
185
531
|
}
|
|
@@ -190,25 +536,39 @@ export async function fileTypeStream(webStream, options) {
|
|
|
190
536
|
|
|
191
537
|
export class FileTypeParser {
|
|
192
538
|
constructor(options) {
|
|
539
|
+
const normalizedMpegOffsetTolerance = normalizeMpegOffsetTolerance(options?.mpegOffsetTolerance);
|
|
193
540
|
this.options = {
|
|
194
|
-
mpegOffsetTolerance: 0,
|
|
195
541
|
...options,
|
|
542
|
+
mpegOffsetTolerance: normalizedMpegOffsetTolerance,
|
|
196
543
|
};
|
|
197
544
|
|
|
198
|
-
this.detectors = [...(options
|
|
545
|
+
this.detectors = [...(this.options.customDetectors ?? []),
|
|
199
546
|
{id: 'core', detect: this.detectConfident},
|
|
200
547
|
{id: 'core.imprecise', detect: this.detectImprecise}];
|
|
201
548
|
this.tokenizerOptions = {
|
|
202
|
-
abortSignal: options
|
|
549
|
+
abortSignal: this.options.signal,
|
|
203
550
|
};
|
|
204
551
|
}
|
|
205
552
|
|
|
206
553
|
async fromTokenizer(tokenizer) {
|
|
207
554
|
const initialPosition = tokenizer.position;
|
|
208
|
-
|
|
209
555
|
// Iterate through all file-type detectors
|
|
210
556
|
for (const detector of this.detectors) {
|
|
211
|
-
|
|
557
|
+
let fileType;
|
|
558
|
+
try {
|
|
559
|
+
fileType = await detector.detect(tokenizer);
|
|
560
|
+
} catch (error) {
|
|
561
|
+
if (error instanceof strtok3.EndOfStreamError) {
|
|
562
|
+
return;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
if (error instanceof ParserHardLimitError) {
|
|
566
|
+
return;
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
throw error;
|
|
570
|
+
}
|
|
571
|
+
|
|
212
572
|
if (fileType) {
|
|
213
573
|
return fileType;
|
|
214
574
|
}
|
|
@@ -252,7 +612,7 @@ export class FileTypeParser {
|
|
|
252
612
|
}
|
|
253
613
|
|
|
254
614
|
async toDetectionStream(stream, options) {
|
|
255
|
-
const
|
|
615
|
+
const sampleSize = normalizeSampleSize(options?.sampleSize ?? reasonableDetectionSizeInBytes);
|
|
256
616
|
let detectedFileType;
|
|
257
617
|
let firstChunk;
|
|
258
618
|
|
|
@@ -394,7 +754,7 @@ export class FileTypeParser {
|
|
|
394
754
|
|
|
395
755
|
if (this.check([0xEF, 0xBB, 0xBF])) { // UTF-8-BOM
|
|
396
756
|
// Strip off UTF-8-BOM
|
|
397
|
-
this.tokenizer.ignore(3);
|
|
757
|
+
await this.tokenizer.ignore(3);
|
|
398
758
|
return this.detectConfident(tokenizer);
|
|
399
759
|
}
|
|
400
760
|
|
|
@@ -414,27 +774,24 @@ export class FileTypeParser {
|
|
|
414
774
|
|
|
415
775
|
if (this.check([0x1F, 0x8B, 0x8])) {
|
|
416
776
|
const gzipHandler = new GzipHandler(tokenizer);
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
let shouldCancelStream = true;
|
|
777
|
+
const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes);
|
|
778
|
+
let compressedFileType;
|
|
420
779
|
try {
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
shouldCancelStream = false;
|
|
780
|
+
compressedFileType = await this.fromStream(limitedInflatedStream);
|
|
781
|
+
} catch (error) {
|
|
782
|
+
if (error?.name === 'AbortError') {
|
|
783
|
+
throw error;
|
|
426
784
|
}
|
|
427
785
|
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
}
|
|
786
|
+
// Decompression or inner-detection failures are expected for non-tar gzip files.
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
// We only need enough inflated bytes to confidently decide whether this is tar.gz.
|
|
790
|
+
if (compressedFileType?.ext === 'tar') {
|
|
791
|
+
return {
|
|
792
|
+
ext: 'tar.gz',
|
|
793
|
+
mime: 'application/gzip',
|
|
794
|
+
};
|
|
438
795
|
}
|
|
439
796
|
|
|
440
797
|
return {
|
|
@@ -451,17 +808,48 @@ export class FileTypeParser {
|
|
|
451
808
|
}
|
|
452
809
|
|
|
453
810
|
if (this.checkString('ID3')) {
|
|
454
|
-
await tokenizer
|
|
811
|
+
await safeIgnore(tokenizer, 6, {
|
|
812
|
+
maximumLength: 6,
|
|
813
|
+
reason: 'ID3 header prefix',
|
|
814
|
+
}); // Skip ID3 header until the header size
|
|
455
815
|
const id3HeaderLength = await tokenizer.readToken(uint32SyncSafeToken);
|
|
816
|
+
const isUnknownFileSize = hasUnknownFileSize(tokenizer);
|
|
817
|
+
if (
|
|
818
|
+
!Number.isFinite(id3HeaderLength)
|
|
819
|
+
|| id3HeaderLength < 0
|
|
820
|
+
// Keep ID3 probing bounded for unknown-size streams to avoid attacker-controlled large skips.
|
|
821
|
+
|| (
|
|
822
|
+
isUnknownFileSize
|
|
823
|
+
&& id3HeaderLength > maximumId3HeaderSizeInBytes
|
|
824
|
+
)
|
|
825
|
+
) {
|
|
826
|
+
return;
|
|
827
|
+
}
|
|
828
|
+
|
|
456
829
|
if (tokenizer.position + id3HeaderLength > tokenizer.fileInfo.size) {
|
|
457
|
-
|
|
830
|
+
if (isUnknownFileSize) {
|
|
831
|
+
return;
|
|
832
|
+
}
|
|
833
|
+
|
|
458
834
|
return {
|
|
459
835
|
ext: 'mp3',
|
|
460
836
|
mime: 'audio/mpeg',
|
|
461
837
|
};
|
|
462
838
|
}
|
|
463
839
|
|
|
464
|
-
|
|
840
|
+
try {
|
|
841
|
+
await safeIgnore(tokenizer, id3HeaderLength, {
|
|
842
|
+
maximumLength: isUnknownFileSize ? maximumId3HeaderSizeInBytes : tokenizer.fileInfo.size,
|
|
843
|
+
reason: 'ID3 payload',
|
|
844
|
+
});
|
|
845
|
+
} catch (error) {
|
|
846
|
+
if (error instanceof strtok3.EndOfStreamError) {
|
|
847
|
+
return;
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
throw error;
|
|
851
|
+
}
|
|
852
|
+
|
|
465
853
|
return this.fromTokenizer(tokenizer); // Skip ID3 header, recursion
|
|
466
854
|
}
|
|
467
855
|
|
|
@@ -547,72 +935,105 @@ export class FileTypeParser {
|
|
|
547
935
|
// Need to be before the `zip` check
|
|
548
936
|
if (this.check([0x50, 0x4B, 0x3, 0x4])) { // Local file header signature
|
|
549
937
|
let fileType;
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
mime: 'application/java-archive',
|
|
564
|
-
};
|
|
565
|
-
return {
|
|
566
|
-
stop: true,
|
|
567
|
-
};
|
|
568
|
-
case 'mimetype':
|
|
938
|
+
const openXmlState = createOpenXmlZipDetectionState();
|
|
939
|
+
|
|
940
|
+
try {
|
|
941
|
+
await new ZipHandler(tokenizer).unzip(zipHeader => {
|
|
942
|
+
updateOpenXmlZipDetectionStateFromFilename(openXmlState, zipHeader.filename);
|
|
943
|
+
|
|
944
|
+
const isOpenXmlContentTypesEntry = zipHeader.filename === '[Content_Types].xml';
|
|
945
|
+
const openXmlFileTypeFromEntries = getOpenXmlFileTypeFromZipEntries(openXmlState);
|
|
946
|
+
if (
|
|
947
|
+
!isOpenXmlContentTypesEntry
|
|
948
|
+
&& openXmlFileTypeFromEntries
|
|
949
|
+
) {
|
|
950
|
+
fileType = openXmlFileTypeFromEntries;
|
|
569
951
|
return {
|
|
570
|
-
async handler(fileData) {
|
|
571
|
-
// Use TextDecoder to decode the UTF-8 encoded data
|
|
572
|
-
const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
|
|
573
|
-
fileType = getFileTypeFromMimeType(mimeType);
|
|
574
|
-
},
|
|
575
952
|
stop: true,
|
|
576
953
|
};
|
|
954
|
+
}
|
|
577
955
|
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
956
|
+
switch (zipHeader.filename) {
|
|
957
|
+
case 'META-INF/mozilla.rsa':
|
|
958
|
+
fileType = {
|
|
959
|
+
ext: 'xpi',
|
|
960
|
+
mime: 'application/x-xpinstall',
|
|
961
|
+
};
|
|
962
|
+
return {
|
|
963
|
+
stop: true,
|
|
964
|
+
};
|
|
965
|
+
case 'META-INF/MANIFEST.MF':
|
|
966
|
+
fileType = {
|
|
967
|
+
ext: 'jar',
|
|
968
|
+
mime: 'application/java-archive',
|
|
969
|
+
};
|
|
970
|
+
return {
|
|
971
|
+
stop: true,
|
|
972
|
+
};
|
|
973
|
+
case 'mimetype':
|
|
974
|
+
if (!canReadZipEntryForDetection(zipHeader)) {
|
|
975
|
+
return {};
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
return {
|
|
979
|
+
async handler(fileData) {
|
|
980
|
+
// Use TextDecoder to decode the UTF-8 encoded data
|
|
981
|
+
const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
|
|
982
|
+
fileType = getFileTypeFromMimeType(mimeType);
|
|
983
|
+
},
|
|
984
|
+
stop: true,
|
|
985
|
+
};
|
|
986
|
+
|
|
987
|
+
case '[Content_Types].xml': {
|
|
988
|
+
openXmlState.hasContentTypesEntry = true;
|
|
989
|
+
|
|
990
|
+
const maximumContentTypesEntrySize = hasUnknownFileSize(tokenizer) ? maximumZipEntrySizeInBytes : Number.MAX_SAFE_INTEGER;
|
|
991
|
+
if (!canReadZipEntryForDetection(zipHeader, maximumContentTypesEntrySize)) {
|
|
992
|
+
openXmlState.hasUnparseableContentTypes = true;
|
|
993
|
+
return {};
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
openXmlState.isParsingContentTypes = true;
|
|
997
|
+
return {
|
|
998
|
+
async handler(fileData) {
|
|
999
|
+
// Use TextDecoder to decode the UTF-8 encoded data
|
|
1000
|
+
const xmlContent = new TextDecoder('utf-8').decode(fileData);
|
|
1001
|
+
const mimeType = getOpenXmlMimeTypeFromContentTypesXml(xmlContent);
|
|
1002
|
+
if (mimeType) {
|
|
587
1003
|
fileType = getFileTypeFromMimeType(mimeType);
|
|
588
1004
|
}
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
}
|
|
595
|
-
},
|
|
596
|
-
stop: true,
|
|
597
|
-
};
|
|
598
|
-
default:
|
|
599
|
-
if (/classes\d*\.dex/.test(zipHeader.filename)) {
|
|
600
|
-
fileType = {
|
|
601
|
-
ext: 'apk',
|
|
602
|
-
mime: 'application/vnd.android.package-archive',
|
|
1005
|
+
|
|
1006
|
+
openXmlState.hasParsedContentTypesEntry = true;
|
|
1007
|
+
openXmlState.isParsingContentTypes = false;
|
|
1008
|
+
},
|
|
1009
|
+
stop: true,
|
|
603
1010
|
};
|
|
604
|
-
return {stop: true};
|
|
605
1011
|
}
|
|
606
1012
|
|
|
607
|
-
|
|
1013
|
+
default:
|
|
1014
|
+
if (/classes\d*\.dex/.test(zipHeader.filename)) {
|
|
1015
|
+
fileType = {
|
|
1016
|
+
ext: 'apk',
|
|
1017
|
+
mime: 'application/vnd.android.package-archive',
|
|
1018
|
+
};
|
|
1019
|
+
return {stop: true};
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
return {};
|
|
1023
|
+
}
|
|
1024
|
+
});
|
|
1025
|
+
} catch (error) {
|
|
1026
|
+
if (!isRecoverableZipError(error)) {
|
|
1027
|
+
throw error;
|
|
608
1028
|
}
|
|
609
|
-
|
|
610
|
-
if (
|
|
611
|
-
|
|
1029
|
+
|
|
1030
|
+
if (openXmlState.isParsingContentTypes) {
|
|
1031
|
+
openXmlState.isParsingContentTypes = false;
|
|
1032
|
+
openXmlState.hasUnparseableContentTypes = true;
|
|
612
1033
|
}
|
|
613
|
-
}
|
|
1034
|
+
}
|
|
614
1035
|
|
|
615
|
-
return fileType ?? {
|
|
1036
|
+
return fileType ?? getOpenXmlFileTypeFromZipEntries(openXmlState) ?? {
|
|
616
1037
|
ext: 'zip',
|
|
617
1038
|
mime: 'application/zip',
|
|
618
1039
|
};
|
|
@@ -817,7 +1238,10 @@ export class FileTypeParser {
|
|
|
817
1238
|
}
|
|
818
1239
|
|
|
819
1240
|
const id = new Uint8Array(ic + 1);
|
|
820
|
-
await tokenizer
|
|
1241
|
+
await safeReadBuffer(tokenizer, id, undefined, {
|
|
1242
|
+
maximumLength: id.length,
|
|
1243
|
+
reason: 'EBML field',
|
|
1244
|
+
});
|
|
821
1245
|
return id;
|
|
822
1246
|
}
|
|
823
1247
|
|
|
@@ -838,20 +1262,47 @@ export class FileTypeParser {
|
|
|
838
1262
|
}
|
|
839
1263
|
|
|
840
1264
|
async function readChildren(children) {
|
|
1265
|
+
let ebmlElementCount = 0;
|
|
841
1266
|
while (children > 0) {
|
|
1267
|
+
ebmlElementCount++;
|
|
1268
|
+
if (ebmlElementCount > maximumEbmlElementCount) {
|
|
1269
|
+
return;
|
|
1270
|
+
}
|
|
1271
|
+
|
|
842
1272
|
const element = await readElement();
|
|
1273
|
+
|
|
843
1274
|
if (element.id === 0x42_82) {
|
|
844
|
-
|
|
1275
|
+
// `DocType` is a short string ("webm", "matroska", ...), reject implausible lengths to avoid large allocations.
|
|
1276
|
+
if (element.len > maximumEbmlDocumentTypeSizeInBytes) {
|
|
1277
|
+
return;
|
|
1278
|
+
}
|
|
1279
|
+
|
|
1280
|
+
const documentTypeLength = getSafeBound(element.len, maximumEbmlDocumentTypeSizeInBytes, 'EBML DocType');
|
|
1281
|
+
const rawValue = await tokenizer.readToken(new Token.StringType(documentTypeLength));
|
|
845
1282
|
return rawValue.replaceAll(/\00.*$/g, ''); // Return DocType
|
|
846
1283
|
}
|
|
847
1284
|
|
|
848
|
-
|
|
1285
|
+
if (
|
|
1286
|
+
hasUnknownFileSize(tokenizer)
|
|
1287
|
+
&& (
|
|
1288
|
+
!Number.isFinite(element.len)
|
|
1289
|
+
|| element.len < 0
|
|
1290
|
+
|| element.len > maximumEbmlElementPayloadSizeInBytes
|
|
1291
|
+
)
|
|
1292
|
+
) {
|
|
1293
|
+
return;
|
|
1294
|
+
}
|
|
1295
|
+
|
|
1296
|
+
await safeIgnore(tokenizer, element.len, {
|
|
1297
|
+
maximumLength: hasUnknownFileSize(tokenizer) ? maximumEbmlElementPayloadSizeInBytes : tokenizer.fileInfo.size,
|
|
1298
|
+
reason: 'EBML payload',
|
|
1299
|
+
}); // ignore payload
|
|
849
1300
|
--children;
|
|
850
1301
|
}
|
|
851
1302
|
}
|
|
852
1303
|
|
|
853
|
-
const
|
|
854
|
-
const documentType = await readChildren(
|
|
1304
|
+
const rootElement = await readElement();
|
|
1305
|
+
const documentType = await readChildren(rootElement.len);
|
|
855
1306
|
|
|
856
1307
|
switch (documentType) {
|
|
857
1308
|
case 'webm':
|
|
@@ -951,7 +1402,12 @@ export class FileTypeParser {
|
|
|
951
1402
|
};
|
|
952
1403
|
}
|
|
953
1404
|
|
|
954
|
-
if (
|
|
1405
|
+
if (
|
|
1406
|
+
this.check([0xFE, 0xED, 0xFA, 0xCE]) // 32-bit, big-endian
|
|
1407
|
+
|| this.check([0xFE, 0xED, 0xFA, 0xCF]) // 64-bit, big-endian
|
|
1408
|
+
|| this.check([0xCE, 0xFA, 0xED, 0xFE]) // 32-bit, little-endian
|
|
1409
|
+
|| this.check([0xCF, 0xFA, 0xED, 0xFE]) // 64-bit, little-endian
|
|
1410
|
+
) {
|
|
955
1411
|
return {
|
|
956
1412
|
ext: 'macho',
|
|
957
1413
|
mime: 'application/x-mach-binary',
|
|
@@ -1064,10 +1520,25 @@ export class FileTypeParser {
|
|
|
1064
1520
|
}
|
|
1065
1521
|
|
|
1066
1522
|
if (this.check([0xCA, 0xFE, 0xBA, 0xBE])) {
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1523
|
+
// Java bytecode and Mach-O universal binaries have the same magic number.
|
|
1524
|
+
// We disambiguate based on the next 4 bytes, as done by `file`.
|
|
1525
|
+
// See https://github.com/file/file/blob/master/magic/Magdir/cafebabe
|
|
1526
|
+
const machOArchitectureCount = Token.UINT32_BE.get(this.buffer, 4);
|
|
1527
|
+
const javaClassFileMajorVersion = Token.UINT16_BE.get(this.buffer, 6);
|
|
1528
|
+
|
|
1529
|
+
if (machOArchitectureCount > 0 && machOArchitectureCount <= 30) {
|
|
1530
|
+
return {
|
|
1531
|
+
ext: 'macho',
|
|
1532
|
+
mime: 'application/x-mach-binary',
|
|
1533
|
+
};
|
|
1534
|
+
}
|
|
1535
|
+
|
|
1536
|
+
if (javaClassFileMajorVersion > 30) {
|
|
1537
|
+
return {
|
|
1538
|
+
ext: 'class',
|
|
1539
|
+
mime: 'application/java-vm',
|
|
1540
|
+
};
|
|
1541
|
+
}
|
|
1071
1542
|
}
|
|
1072
1543
|
|
|
1073
1544
|
if (this.checkString('.RMF')) {
|
|
@@ -1183,6 +1654,16 @@ export class FileTypeParser {
|
|
|
1183
1654
|
// -- 8-byte signatures --
|
|
1184
1655
|
|
|
1185
1656
|
if (this.check([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])) {
|
|
1657
|
+
const pngFileType = {
|
|
1658
|
+
ext: 'png',
|
|
1659
|
+
mime: 'image/png',
|
|
1660
|
+
};
|
|
1661
|
+
|
|
1662
|
+
const apngFileType = {
|
|
1663
|
+
ext: 'apng',
|
|
1664
|
+
mime: 'image/apng',
|
|
1665
|
+
};
|
|
1666
|
+
|
|
1186
1667
|
// APNG format (https://wiki.mozilla.org/APNG_Specification)
|
|
1187
1668
|
// 1. Find the first IDAT (image data) chunk (49 44 41 54)
|
|
1188
1669
|
// 2. Check if there is an "acTL" chunk before the IDAT one (61 63 54 4C)
|
|
@@ -1200,7 +1681,13 @@ export class FileTypeParser {
|
|
|
1200
1681
|
};
|
|
1201
1682
|
}
|
|
1202
1683
|
|
|
1684
|
+
const isUnknownPngStream = hasUnknownFileSize(tokenizer);
|
|
1685
|
+
const pngScanStart = tokenizer.position;
|
|
1203
1686
|
do {
|
|
1687
|
+
if (hasExceededUnknownSizeScanBudget(tokenizer, pngScanStart, maximumPngChunkSizeInBytes)) {
|
|
1688
|
+
break;
|
|
1689
|
+
}
|
|
1690
|
+
|
|
1204
1691
|
const chunk = await readChunkHeader();
|
|
1205
1692
|
if (chunk.length < 0) {
|
|
1206
1693
|
return; // Invalid chunk length
|
|
@@ -1208,24 +1695,40 @@ export class FileTypeParser {
|
|
|
1208
1695
|
|
|
1209
1696
|
switch (chunk.type) {
|
|
1210
1697
|
case 'IDAT':
|
|
1211
|
-
return
|
|
1212
|
-
ext: 'png',
|
|
1213
|
-
mime: 'image/png',
|
|
1214
|
-
};
|
|
1698
|
+
return pngFileType;
|
|
1215
1699
|
case 'acTL':
|
|
1216
|
-
return
|
|
1217
|
-
ext: 'apng',
|
|
1218
|
-
mime: 'image/apng',
|
|
1219
|
-
};
|
|
1700
|
+
return apngFileType;
|
|
1220
1701
|
default:
|
|
1221
|
-
|
|
1702
|
+
if (
|
|
1703
|
+
isUnknownPngStream
|
|
1704
|
+
&& chunk.length > maximumPngChunkSizeInBytes
|
|
1705
|
+
) {
|
|
1706
|
+
// Avoid huge attacker-controlled skips when probing unknown-size streams.
|
|
1707
|
+
return;
|
|
1708
|
+
}
|
|
1709
|
+
|
|
1710
|
+
try {
|
|
1711
|
+
await safeIgnore(tokenizer, chunk.length + 4, {
|
|
1712
|
+
maximumLength: isUnknownPngStream ? maximumPngChunkSizeInBytes + 4 : tokenizer.fileInfo.size,
|
|
1713
|
+
reason: 'PNG chunk payload',
|
|
1714
|
+
}); // Ignore chunk-data + CRC
|
|
1715
|
+
} catch (error) {
|
|
1716
|
+
if (
|
|
1717
|
+
!isUnknownPngStream
|
|
1718
|
+
&& (
|
|
1719
|
+
error instanceof ParserHardLimitError
|
|
1720
|
+
|| error instanceof strtok3.EndOfStreamError
|
|
1721
|
+
)
|
|
1722
|
+
) {
|
|
1723
|
+
return pngFileType;
|
|
1724
|
+
}
|
|
1725
|
+
|
|
1726
|
+
throw error;
|
|
1727
|
+
}
|
|
1222
1728
|
}
|
|
1223
1729
|
} while (tokenizer.position + 8 < tokenizer.fileInfo.size);
|
|
1224
1730
|
|
|
1225
|
-
return
|
|
1226
|
-
ext: 'png',
|
|
1227
|
-
mime: 'image/png',
|
|
1228
|
-
};
|
|
1731
|
+
return pngFileType;
|
|
1229
1732
|
}
|
|
1230
1733
|
|
|
1231
1734
|
if (this.check([0x41, 0x52, 0x52, 0x4F, 0x57, 0x31, 0x00, 0x00])) {
|
|
@@ -1383,45 +1886,95 @@ export class FileTypeParser {
|
|
|
1383
1886
|
|
|
1384
1887
|
// ASF_Header_Object first 80 bytes
|
|
1385
1888
|
if (this.check([0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11, 0xA6, 0xD9])) {
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1889
|
+
let isMalformedAsf = false;
|
|
1890
|
+
try {
|
|
1891
|
+
async function readHeader() {
|
|
1892
|
+
const guid = new Uint8Array(16);
|
|
1893
|
+
await safeReadBuffer(tokenizer, guid, undefined, {
|
|
1894
|
+
maximumLength: guid.length,
|
|
1895
|
+
reason: 'ASF header GUID',
|
|
1896
|
+
});
|
|
1897
|
+
return {
|
|
1898
|
+
id: guid,
|
|
1899
|
+
size: Number(await tokenizer.readToken(Token.UINT64_LE)),
|
|
1900
|
+
};
|
|
1901
|
+
}
|
|
1394
1902
|
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
if (_check(typeId, [0x40, 0x9E, 0x69, 0xF8, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
|
|
1406
|
-
// Found audio:
|
|
1407
|
-
return {
|
|
1408
|
-
ext: 'asf',
|
|
1409
|
-
mime: 'audio/x-ms-asf',
|
|
1410
|
-
};
|
|
1903
|
+
await safeIgnore(tokenizer, 30, {
|
|
1904
|
+
maximumLength: 30,
|
|
1905
|
+
reason: 'ASF header prelude',
|
|
1906
|
+
});
|
|
1907
|
+
const isUnknownFileSize = hasUnknownFileSize(tokenizer);
|
|
1908
|
+
const asfHeaderScanStart = tokenizer.position;
|
|
1909
|
+
while (tokenizer.position + 24 < tokenizer.fileInfo.size) {
|
|
1910
|
+
if (hasExceededUnknownSizeScanBudget(tokenizer, asfHeaderScanStart, maximumUntrustedSkipSizeInBytes)) {
|
|
1911
|
+
break;
|
|
1411
1912
|
}
|
|
1412
1913
|
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1914
|
+
const previousPosition = tokenizer.position;
|
|
1915
|
+
const header = await readHeader();
|
|
1916
|
+
let payload = header.size - 24;
|
|
1917
|
+
if (
|
|
1918
|
+
!Number.isFinite(payload)
|
|
1919
|
+
|| payload < 0
|
|
1920
|
+
) {
|
|
1921
|
+
isMalformedAsf = true;
|
|
1922
|
+
break;
|
|
1419
1923
|
}
|
|
1420
1924
|
|
|
1421
|
-
|
|
1925
|
+
if (_check(header.id, [0x91, 0x07, 0xDC, 0xB7, 0xB7, 0xA9, 0xCF, 0x11, 0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65])) {
|
|
1926
|
+
// Sync on Stream-Properties-Object (B7DC0791-A9B7-11CF-8EE6-00C00C205365)
|
|
1927
|
+
const typeId = new Uint8Array(16);
|
|
1928
|
+
payload -= await safeReadBuffer(tokenizer, typeId, undefined, {
|
|
1929
|
+
maximumLength: typeId.length,
|
|
1930
|
+
reason: 'ASF stream type GUID',
|
|
1931
|
+
});
|
|
1932
|
+
|
|
1933
|
+
if (_check(typeId, [0x40, 0x9E, 0x69, 0xF8, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
|
|
1934
|
+
// Found audio:
|
|
1935
|
+
return {
|
|
1936
|
+
ext: 'asf',
|
|
1937
|
+
mime: 'audio/x-ms-asf',
|
|
1938
|
+
};
|
|
1939
|
+
}
|
|
1940
|
+
|
|
1941
|
+
if (_check(typeId, [0xC0, 0xEF, 0x19, 0xBC, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
|
|
1942
|
+
// Found video:
|
|
1943
|
+
return {
|
|
1944
|
+
ext: 'asf',
|
|
1945
|
+
mime: 'video/x-ms-asf',
|
|
1946
|
+
};
|
|
1947
|
+
}
|
|
1948
|
+
|
|
1949
|
+
break;
|
|
1950
|
+
}
|
|
1951
|
+
|
|
1952
|
+
await safeIgnore(tokenizer, payload, {
|
|
1953
|
+
maximumLength: isUnknownFileSize ? maximumUntrustedSkipSizeInBytes : tokenizer.fileInfo.size,
|
|
1954
|
+
reason: 'ASF header payload',
|
|
1955
|
+
});
|
|
1956
|
+
|
|
1957
|
+
// Safeguard against malformed files: break if the position did not advance.
|
|
1958
|
+
if (tokenizer.position <= previousPosition) {
|
|
1959
|
+
isMalformedAsf = true;
|
|
1960
|
+
break;
|
|
1961
|
+
}
|
|
1962
|
+
}
|
|
1963
|
+
} catch (error) {
|
|
1964
|
+
if (
|
|
1965
|
+
error instanceof strtok3.EndOfStreamError
|
|
1966
|
+
|| error instanceof ParserHardLimitError
|
|
1967
|
+
) {
|
|
1968
|
+
if (hasUnknownFileSize(tokenizer)) {
|
|
1969
|
+
isMalformedAsf = true;
|
|
1970
|
+
}
|
|
1971
|
+
} else {
|
|
1972
|
+
throw error;
|
|
1422
1973
|
}
|
|
1974
|
+
}
|
|
1423
1975
|
|
|
1424
|
-
|
|
1976
|
+
if (isMalformedAsf) {
|
|
1977
|
+
return;
|
|
1425
1978
|
}
|
|
1426
1979
|
|
|
1427
1980
|
// Default to ASF generic extension
|
|
@@ -1740,9 +2293,10 @@ export class FileTypeParser {
|
|
|
1740
2293
|
// Detections with limited supporting data, resulting in a higher likelihood of false positives
|
|
1741
2294
|
detectImprecise = async tokenizer => {
|
|
1742
2295
|
this.buffer = new Uint8Array(reasonableDetectionSizeInBytes);
|
|
2296
|
+
const fileSize = getKnownFileSizeOrMaximum(tokenizer.fileInfo.size);
|
|
1743
2297
|
|
|
1744
2298
|
// Read initial sample size of 8 bytes
|
|
1745
|
-
await tokenizer.peekBuffer(this.buffer, {length: Math.min(8,
|
|
2299
|
+
await tokenizer.peekBuffer(this.buffer, {length: Math.min(8, fileSize), mayBeLess: true});
|
|
1746
2300
|
|
|
1747
2301
|
if (
|
|
1748
2302
|
this.check([0x0, 0x0, 0x1, 0xBA])
|
|
@@ -1776,7 +2330,7 @@ export class FileTypeParser {
|
|
|
1776
2330
|
}
|
|
1777
2331
|
|
|
1778
2332
|
// Adjust buffer to `mpegOffsetTolerance`
|
|
1779
|
-
await tokenizer.peekBuffer(this.buffer, {length: Math.min(2 + this.options.mpegOffsetTolerance,
|
|
2333
|
+
await tokenizer.peekBuffer(this.buffer, {length: Math.min(2 + this.options.mpegOffsetTolerance, fileSize), mayBeLess: true});
|
|
1780
2334
|
|
|
1781
2335
|
// Check MPEG 1 or 2 Layer 3 header, or 'layer 0' for ADTS (MPEG sync-word 0xFFE)
|
|
1782
2336
|
if (this.buffer.length >= (2 + this.options.mpegOffsetTolerance)) {
|
|
@@ -1791,7 +2345,7 @@ export class FileTypeParser {
|
|
|
1791
2345
|
|
|
1792
2346
|
async readTiffTag(bigEndian) {
|
|
1793
2347
|
const tagId = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
|
|
1794
|
-
this.tokenizer.ignore(10);
|
|
2348
|
+
await this.tokenizer.ignore(10);
|
|
1795
2349
|
switch (tagId) {
|
|
1796
2350
|
case 50_341:
|
|
1797
2351
|
return {
|
|
@@ -1809,6 +2363,13 @@ export class FileTypeParser {
|
|
|
1809
2363
|
|
|
1810
2364
|
async readTiffIFD(bigEndian) {
|
|
1811
2365
|
const numberOfTags = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
|
|
2366
|
+
if (
|
|
2367
|
+
hasUnknownFileSize(this.tokenizer)
|
|
2368
|
+
&& (2 + (numberOfTags * 12)) > maximumTiffIfdOffsetInBytes
|
|
2369
|
+
) {
|
|
2370
|
+
return;
|
|
2371
|
+
}
|
|
2372
|
+
|
|
1812
2373
|
for (let n = 0; n < numberOfTags; ++n) {
|
|
1813
2374
|
const fileType = await this.readTiffTag(bigEndian);
|
|
1814
2375
|
if (fileType) {
|
|
@@ -1818,6 +2379,11 @@ export class FileTypeParser {
|
|
|
1818
2379
|
}
|
|
1819
2380
|
|
|
1820
2381
|
async readTiffHeader(bigEndian) {
|
|
2382
|
+
const tiffFileType = {
|
|
2383
|
+
ext: 'tif',
|
|
2384
|
+
mime: 'image/tiff',
|
|
2385
|
+
};
|
|
2386
|
+
|
|
1821
2387
|
const version = (bigEndian ? Token.UINT16_BE : Token.UINT16_LE).get(this.buffer, 2);
|
|
1822
2388
|
const ifdOffset = (bigEndian ? Token.UINT32_BE : Token.UINT32_LE).get(this.buffer, 4);
|
|
1823
2389
|
|
|
@@ -1846,19 +2412,37 @@ export class FileTypeParser {
|
|
|
1846
2412
|
}
|
|
1847
2413
|
}
|
|
1848
2414
|
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
2415
|
+
const maximumTiffOffset = hasUnknownFileSize(this.tokenizer) ? maximumTiffIfdOffsetInBytes : this.tokenizer.fileInfo.size;
|
|
2416
|
+
|
|
2417
|
+
try {
|
|
2418
|
+
await safeIgnore(this.tokenizer, ifdOffset, {
|
|
2419
|
+
maximumLength: maximumTiffOffset,
|
|
2420
|
+
reason: 'TIFF IFD offset',
|
|
2421
|
+
});
|
|
2422
|
+
} catch (error) {
|
|
2423
|
+
if (error instanceof strtok3.EndOfStreamError) {
|
|
2424
|
+
return;
|
|
2425
|
+
}
|
|
2426
|
+
|
|
2427
|
+
throw error;
|
|
2428
|
+
}
|
|
2429
|
+
|
|
2430
|
+
let fileType;
|
|
2431
|
+
try {
|
|
2432
|
+
fileType = await this.readTiffIFD(bigEndian);
|
|
2433
|
+
} catch (error) {
|
|
2434
|
+
if (error instanceof strtok3.EndOfStreamError) {
|
|
2435
|
+
return;
|
|
2436
|
+
}
|
|
2437
|
+
|
|
2438
|
+
throw error;
|
|
2439
|
+
}
|
|
2440
|
+
|
|
2441
|
+
return fileType ?? tiffFileType;
|
|
1855
2442
|
}
|
|
1856
2443
|
|
|
1857
2444
|
if (version === 43) { // Big TIFF file header
|
|
1858
|
-
return
|
|
1859
|
-
ext: 'tif',
|
|
1860
|
-
mime: 'image/tiff',
|
|
1861
|
-
};
|
|
2445
|
+
return tiffFileType;
|
|
1862
2446
|
}
|
|
1863
2447
|
}
|
|
1864
2448
|
|
package/index.js
CHANGED
|
@@ -5,13 +5,37 @@ Node.js specific entry point.
|
|
|
5
5
|
import {ReadableStream as WebReadableStream} from 'node:stream/web';
|
|
6
6
|
import {pipeline, PassThrough, Readable} from 'node:stream';
|
|
7
7
|
import * as strtok3 from 'strtok3';
|
|
8
|
-
import {
|
|
8
|
+
import {
|
|
9
|
+
FileTypeParser as DefaultFileTypeParser,
|
|
10
|
+
reasonableDetectionSizeInBytes,
|
|
11
|
+
normalizeSampleSize,
|
|
12
|
+
} from './core.js';
|
|
13
|
+
|
|
14
|
+
function isTokenizerStreamBoundsError(error) {
|
|
15
|
+
if (
|
|
16
|
+
!(error instanceof RangeError)
|
|
17
|
+
|| error.message !== 'offset is out of bounds'
|
|
18
|
+
|| typeof error.stack !== 'string'
|
|
19
|
+
) {
|
|
20
|
+
return false;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Some malformed or non-byte Node.js streams can surface this tokenizer-internal range error.
|
|
24
|
+
// Note: This stack-trace check is fragile and may break if strtok3 restructures its internals.
|
|
25
|
+
return /strtok3[/\\]lib[/\\]stream[/\\]/.test(error.stack);
|
|
26
|
+
}
|
|
9
27
|
|
|
10
28
|
export class FileTypeParser extends DefaultFileTypeParser {
|
|
11
29
|
async fromStream(stream) {
|
|
12
30
|
const tokenizer = await (stream instanceof WebReadableStream ? strtok3.fromWebStream(stream, this.tokenizerOptions) : strtok3.fromStream(stream, this.tokenizerOptions));
|
|
13
31
|
try {
|
|
14
32
|
return await super.fromTokenizer(tokenizer);
|
|
33
|
+
} catch (error) {
|
|
34
|
+
if (isTokenizerStreamBoundsError(error)) {
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
throw error;
|
|
15
39
|
} finally {
|
|
16
40
|
await tokenizer.close();
|
|
17
41
|
}
|
|
@@ -31,7 +55,7 @@ export class FileTypeParser extends DefaultFileTypeParser {
|
|
|
31
55
|
return super.toDetectionStream(readableStream, options);
|
|
32
56
|
}
|
|
33
57
|
|
|
34
|
-
const
|
|
58
|
+
const sampleSize = normalizeSampleSize(options.sampleSize ?? reasonableDetectionSizeInBytes);
|
|
35
59
|
|
|
36
60
|
return new Promise((resolve, reject) => {
|
|
37
61
|
readableStream.on('error', reject);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "file-type",
|
|
3
|
-
"version": "21.
|
|
3
|
+
"version": "21.3.1",
|
|
4
4
|
"description": "Detect the file type of a file, stream, or data",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": "sindresorhus/file-type",
|
|
@@ -258,12 +258,12 @@
|
|
|
258
258
|
},
|
|
259
259
|
"devDependencies": {
|
|
260
260
|
"@tokenizer/token": "^0.3.0",
|
|
261
|
-
"@types/node": "^
|
|
262
|
-
"ava": "^
|
|
261
|
+
"@types/node": "^25.3.3",
|
|
262
|
+
"ava": "^7.0.0",
|
|
263
263
|
"commonmark": "^0.31.2",
|
|
264
264
|
"get-stream": "^9.0.1",
|
|
265
265
|
"noop-stream": "^1.0.0",
|
|
266
|
-
"tsd": "^0.
|
|
266
|
+
"tsd": "^0.33.0",
|
|
267
267
|
"xo": "^0.60.0"
|
|
268
268
|
},
|
|
269
269
|
"xo": {
|
package/readme.md
CHANGED
|
@@ -206,21 +206,6 @@ console.log(await fileTypeFromBlob(blob));
|
|
|
206
206
|
//=> {ext: 'txt', mime: 'text/plain'}
|
|
207
207
|
```
|
|
208
208
|
|
|
209
|
-
> [!WARNING]
|
|
210
|
-
> This method depends on [ReadableStreamBYOBReader](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStreamBYOBReader) which **requires Node.js ≥ 20**
|
|
211
|
-
> and [may not be available in all modern browsers](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStreamBYOBReader#browser_compatibility).
|
|
212
|
-
|
|
213
|
-
To work around this limitation, you can use an alternative approach to read and process the `Blob` without relying on streaming:
|
|
214
|
-
|
|
215
|
-
```js
|
|
216
|
-
import {fileTypeFromBuffer} from 'file-type';
|
|
217
|
-
|
|
218
|
-
async function readFromBlobWithoutStreaming(blob) {
|
|
219
|
-
const buffer = await blob.arrayBuffer();
|
|
220
|
-
return fileTypeFromBuffer(buffer);
|
|
221
|
-
}
|
|
222
|
-
```
|
|
223
|
-
|
|
224
209
|
#### blob
|
|
225
210
|
|
|
226
211
|
Type: [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob)
|
|
@@ -395,6 +380,8 @@ console.log(fileType);
|
|
|
395
380
|
### Available third-party file-type detectors
|
|
396
381
|
|
|
397
382
|
- [@file-type/av](https://github.com/Borewit/file-type-av): Improves detection of audio and video file formats, with accurate differentiation between the two
|
|
383
|
+
- [@file-type/cfbf](https://github.com/Borewit/file-type-cfbf): Detects Compound File Binary Format (CFBF) based formats, such as Office 97–2003 documents and `.msi`.
|
|
384
|
+
- [@file-type/pdf](https://github.com/Borewit/file-type-pdf): Detects PDF based file types, such as Adobe Illustrator
|
|
398
385
|
- [@file-type/xml](https://github.com/Borewit/file-type-xml): Detects common XML file types, such as GLM, KML, MusicXML, RSS, SVG, and XHTML
|
|
399
386
|
|
|
400
387
|
### Detector execution flow
|
|
@@ -642,14 +629,14 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
|
|
|
642
629
|
|
|
643
630
|
*[Pull requests](.github/pull_request_template.md) are welcome for additional commonly used file types.*
|
|
644
631
|
|
|
645
|
-
The following file types will not be accepted
|
|
646
|
-
- [MS-CFB: Microsoft Compound File Binary File Format based formats](https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/53989ce4-7b05-4f8d-829b-d08d6148375b)
|
|
632
|
+
The following file types will not be accepted, but most of them are supported by [third-party detector](#available-third-party-file-type-detectors)
|
|
633
|
+
- [MS-CFB: Microsoft Compound File Binary File Format based formats](https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/53989ce4-7b05-4f8d-829b-d08d6148375b)
|
|
647
634
|
- `.doc` - Microsoft Word 97-2003 Document
|
|
648
635
|
- `.xls` - Microsoft Excel 97-2003 Document
|
|
649
636
|
- `.ppt` - Microsoft PowerPoint97-2003 Document
|
|
650
637
|
- `.msi` - Microsoft Windows Installer
|
|
651
638
|
- `.csv` - [Reason.](https://github.com/sindresorhus/file-type/issues/264#issuecomment-568439196)
|
|
652
|
-
- `.svg`
|
|
639
|
+
- `.svg`
|
|
653
640
|
|
|
654
641
|
#### tokenizer
|
|
655
642
|
|