file-type 21.3.0 → 21.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core.js +709 -145
- package/index.js +26 -2
- package/package.json +4 -4
- package/readme.md +4 -3
package/core.js
CHANGED
|
@@ -14,6 +14,169 @@ import {
|
|
|
14
14
|
import {extensions, mimeTypes} from './supported.js';
|
|
15
15
|
|
|
16
16
|
export const reasonableDetectionSizeInBytes = 4100; // A fair amount of file-types are detectable within this range.
|
|
17
|
+
// Keep defensive limits small enough to avoid accidental memory spikes from untrusted inputs.
|
|
18
|
+
const maximumMpegOffsetTolerance = reasonableDetectionSizeInBytes - 2;
|
|
19
|
+
const maximumZipEntrySizeInBytes = 1024 * 1024;
|
|
20
|
+
const maximumUntrustedSkipSizeInBytes = 16 * 1024 * 1024;
|
|
21
|
+
const maximumNestedGzipDetectionSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
22
|
+
const maximumId3HeaderSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
23
|
+
const maximumEbmlDocumentTypeSizeInBytes = 64;
|
|
24
|
+
const maximumEbmlElementPayloadSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
25
|
+
const maximumEbmlElementCount = 256;
|
|
26
|
+
const maximumPngChunkSizeInBytes = maximumUntrustedSkipSizeInBytes;
|
|
27
|
+
const maximumTiffIfdOffsetInBytes = maximumUntrustedSkipSizeInBytes;
|
|
28
|
+
const recoverableZipErrorMessages = new Set([
|
|
29
|
+
'Unexpected signature',
|
|
30
|
+
'Encrypted ZIP',
|
|
31
|
+
'Expected Central-File-Header signature',
|
|
32
|
+
]);
|
|
33
|
+
const recoverableZipErrorMessagePrefixes = [
|
|
34
|
+
'Unsupported ZIP compression method:',
|
|
35
|
+
'ZIP entry decompressed data exceeds ',
|
|
36
|
+
];
|
|
37
|
+
const recoverableZipErrorCodes = new Set([
|
|
38
|
+
'Z_BUF_ERROR',
|
|
39
|
+
'Z_DATA_ERROR',
|
|
40
|
+
'ERR_INVALID_STATE',
|
|
41
|
+
]);
|
|
42
|
+
|
|
43
|
+
class ParserHardLimitError extends Error {}
|
|
44
|
+
|
|
45
|
+
function getSafeBound(value, maximum, reason) {
|
|
46
|
+
if (
|
|
47
|
+
!Number.isFinite(value)
|
|
48
|
+
|| value < 0
|
|
49
|
+
|| value > maximum
|
|
50
|
+
) {
|
|
51
|
+
throw new ParserHardLimitError(`${reason} has invalid size ${value} (maximum ${maximum} bytes)`);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return value;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
async function safeIgnore(tokenizer, length, {maximumLength = maximumUntrustedSkipSizeInBytes, reason = 'skip'} = {}) {
|
|
58
|
+
const safeLength = getSafeBound(length, maximumLength, reason);
|
|
59
|
+
await tokenizer.ignore(safeLength);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async function safeReadBuffer(tokenizer, buffer, options, {maximumLength = buffer.length, reason = 'read'} = {}) {
|
|
63
|
+
const length = options?.length ?? buffer.length;
|
|
64
|
+
const safeLength = getSafeBound(length, maximumLength, reason);
|
|
65
|
+
return tokenizer.readBuffer(buffer, {
|
|
66
|
+
...options,
|
|
67
|
+
length: safeLength,
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async function decompressDeflateRawWithLimit(data, {maximumLength = maximumZipEntrySizeInBytes} = {}) {
|
|
72
|
+
const input = new ReadableStream({
|
|
73
|
+
start(controller) {
|
|
74
|
+
controller.enqueue(data);
|
|
75
|
+
controller.close();
|
|
76
|
+
},
|
|
77
|
+
});
|
|
78
|
+
const output = input.pipeThrough(new DecompressionStream('deflate-raw'));
|
|
79
|
+
const reader = output.getReader();
|
|
80
|
+
const chunks = [];
|
|
81
|
+
let totalLength = 0;
|
|
82
|
+
|
|
83
|
+
try {
|
|
84
|
+
for (;;) {
|
|
85
|
+
const {done, value} = await reader.read();
|
|
86
|
+
if (done) {
|
|
87
|
+
break;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
totalLength += value.length;
|
|
91
|
+
if (totalLength > maximumLength) {
|
|
92
|
+
await reader.cancel();
|
|
93
|
+
throw new Error(`ZIP entry decompressed data exceeds ${maximumLength} bytes`);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
chunks.push(value);
|
|
97
|
+
}
|
|
98
|
+
} finally {
|
|
99
|
+
reader.releaseLock();
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const uncompressedData = new Uint8Array(totalLength);
|
|
103
|
+
let offset = 0;
|
|
104
|
+
for (const chunk of chunks) {
|
|
105
|
+
uncompressedData.set(chunk, offset);
|
|
106
|
+
offset += chunk.length;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return uncompressedData;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Override the default inflate to enforce decompression size limits, since @tokenizer/inflate does not expose a configuration hook for this.
|
|
113
|
+
ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
|
|
114
|
+
if (zipHeader.compressedMethod === 0) {
|
|
115
|
+
return callback(fileData);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (zipHeader.compressedMethod !== 8) {
|
|
119
|
+
throw new Error(`Unsupported ZIP compression method: ${zipHeader.compressedMethod}`);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const maximumLength = hasUnknownFileSize(this.tokenizer) ? maximumZipEntrySizeInBytes : Number.MAX_SAFE_INTEGER;
|
|
123
|
+
const uncompressedData = await decompressDeflateRawWithLimit(fileData, {maximumLength});
|
|
124
|
+
return callback(uncompressedData);
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
function createByteLimitedReadableStream(stream, maximumBytes) {
|
|
128
|
+
const reader = stream.getReader();
|
|
129
|
+
let emittedBytes = 0;
|
|
130
|
+
let sourceDone = false;
|
|
131
|
+
let sourceCanceled = false;
|
|
132
|
+
|
|
133
|
+
const cancelSource = async reason => {
|
|
134
|
+
if (
|
|
135
|
+
sourceDone
|
|
136
|
+
|| sourceCanceled
|
|
137
|
+
) {
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
sourceCanceled = true;
|
|
142
|
+
await reader.cancel(reason);
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
return new ReadableStream({
|
|
146
|
+
async pull(controller) {
|
|
147
|
+
if (emittedBytes >= maximumBytes) {
|
|
148
|
+
controller.close();
|
|
149
|
+
await cancelSource();
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const {done, value} = await reader.read();
|
|
154
|
+
if (
|
|
155
|
+
done
|
|
156
|
+
|| !value
|
|
157
|
+
) {
|
|
158
|
+
sourceDone = true;
|
|
159
|
+
controller.close();
|
|
160
|
+
return;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const remainingBytes = maximumBytes - emittedBytes;
|
|
164
|
+
if (value.length > remainingBytes) {
|
|
165
|
+
controller.enqueue(value.subarray(0, remainingBytes));
|
|
166
|
+
emittedBytes += remainingBytes;
|
|
167
|
+
controller.close();
|
|
168
|
+
await cancelSource();
|
|
169
|
+
return;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
controller.enqueue(value);
|
|
173
|
+
emittedBytes += value.length;
|
|
174
|
+
},
|
|
175
|
+
async cancel(reason) {
|
|
176
|
+
await cancelSource(reason);
|
|
177
|
+
},
|
|
178
|
+
});
|
|
179
|
+
}
|
|
17
180
|
|
|
18
181
|
export async function fileTypeFromStream(stream, options) {
|
|
19
182
|
return new FileTypeParser(options).fromStream(stream);
|
|
@@ -180,6 +343,189 @@ function _check(buffer, headers, options) {
|
|
|
180
343
|
return true;
|
|
181
344
|
}
|
|
182
345
|
|
|
346
|
+
export function normalizeSampleSize(sampleSize) {
|
|
347
|
+
// Accept odd caller input, but preserve valid caller-requested probe depth.
|
|
348
|
+
if (!Number.isFinite(sampleSize)) {
|
|
349
|
+
return reasonableDetectionSizeInBytes;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
return Math.max(1, Math.trunc(sampleSize));
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
function normalizeMpegOffsetTolerance(mpegOffsetTolerance) {
|
|
356
|
+
// This value controls scan depth and therefore worst-case CPU work.
|
|
357
|
+
if (!Number.isFinite(mpegOffsetTolerance)) {
|
|
358
|
+
return 0;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
return Math.max(0, Math.min(maximumMpegOffsetTolerance, Math.trunc(mpegOffsetTolerance)));
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
function getKnownFileSizeOrMaximum(fileSize) {
|
|
365
|
+
if (!Number.isFinite(fileSize)) {
|
|
366
|
+
return Number.MAX_SAFE_INTEGER;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
return Math.max(0, fileSize);
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
function hasUnknownFileSize(tokenizer) {
|
|
373
|
+
const fileSize = tokenizer.fileInfo.size;
|
|
374
|
+
return (
|
|
375
|
+
!Number.isFinite(fileSize)
|
|
376
|
+
|| fileSize === Number.MAX_SAFE_INTEGER
|
|
377
|
+
);
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
function hasExceededUnknownSizeScanBudget(tokenizer, startOffset, maximumBytes) {
|
|
381
|
+
return (
|
|
382
|
+
hasUnknownFileSize(tokenizer)
|
|
383
|
+
&& tokenizer.position - startOffset > maximumBytes
|
|
384
|
+
);
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
function isRecoverableZipError(error) {
|
|
388
|
+
if (error instanceof strtok3.EndOfStreamError) {
|
|
389
|
+
return true;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
if (error instanceof ParserHardLimitError) {
|
|
393
|
+
return true;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
if (!(error instanceof Error)) {
|
|
397
|
+
return false;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
if (recoverableZipErrorMessages.has(error.message)) {
|
|
401
|
+
return true;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
if (
|
|
405
|
+
error instanceof TypeError
|
|
406
|
+
&& recoverableZipErrorCodes.has(error.code)
|
|
407
|
+
) {
|
|
408
|
+
return true;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
for (const prefix of recoverableZipErrorMessagePrefixes) {
|
|
412
|
+
if (error.message.startsWith(prefix)) {
|
|
413
|
+
return true;
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
return false;
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
function canReadZipEntryForDetection(zipHeader, maximumSize = maximumZipEntrySizeInBytes) {
|
|
421
|
+
const sizes = [zipHeader.compressedSize, zipHeader.uncompressedSize];
|
|
422
|
+
for (const size of sizes) {
|
|
423
|
+
if (
|
|
424
|
+
!Number.isFinite(size)
|
|
425
|
+
|| size < 0
|
|
426
|
+
|| size > maximumSize
|
|
427
|
+
) {
|
|
428
|
+
return false;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
return true;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
function createOpenXmlZipDetectionState() {
|
|
436
|
+
return {
|
|
437
|
+
hasContentTypesEntry: false,
|
|
438
|
+
hasParsedContentTypesEntry: false,
|
|
439
|
+
isParsingContentTypes: false,
|
|
440
|
+
hasUnparseableContentTypes: false,
|
|
441
|
+
hasWordDirectory: false,
|
|
442
|
+
hasPresentationDirectory: false,
|
|
443
|
+
hasSpreadsheetDirectory: false,
|
|
444
|
+
hasThreeDimensionalModelEntry: false,
|
|
445
|
+
};
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
function updateOpenXmlZipDetectionStateFromFilename(openXmlState, filename) {
|
|
449
|
+
if (filename.startsWith('word/')) {
|
|
450
|
+
openXmlState.hasWordDirectory = true;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
if (filename.startsWith('ppt/')) {
|
|
454
|
+
openXmlState.hasPresentationDirectory = true;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
if (filename.startsWith('xl/')) {
|
|
458
|
+
openXmlState.hasSpreadsheetDirectory = true;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
if (
|
|
462
|
+
filename.startsWith('3D/')
|
|
463
|
+
&& filename.endsWith('.model')
|
|
464
|
+
) {
|
|
465
|
+
openXmlState.hasThreeDimensionalModelEntry = true;
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
function getOpenXmlFileTypeFromZipEntries(openXmlState) {
|
|
470
|
+
// Only use directory-name heuristic when [Content_Types].xml was present in the archive
|
|
471
|
+
// but its handler was skipped (not invoked, not currently running, and not already resolved).
|
|
472
|
+
// This avoids guessing from directory names when content-type parsing already gave a definitive answer or failed.
|
|
473
|
+
if (
|
|
474
|
+
!openXmlState.hasContentTypesEntry
|
|
475
|
+
|| openXmlState.hasUnparseableContentTypes
|
|
476
|
+
|| openXmlState.isParsingContentTypes
|
|
477
|
+
|| openXmlState.hasParsedContentTypesEntry
|
|
478
|
+
) {
|
|
479
|
+
return;
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
if (openXmlState.hasWordDirectory) {
|
|
483
|
+
return {
|
|
484
|
+
ext: 'docx',
|
|
485
|
+
mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
486
|
+
};
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
if (openXmlState.hasPresentationDirectory) {
|
|
490
|
+
return {
|
|
491
|
+
ext: 'pptx',
|
|
492
|
+
mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
493
|
+
};
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
if (openXmlState.hasSpreadsheetDirectory) {
|
|
497
|
+
return {
|
|
498
|
+
ext: 'xlsx',
|
|
499
|
+
mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
500
|
+
};
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
if (openXmlState.hasThreeDimensionalModelEntry) {
|
|
504
|
+
return {
|
|
505
|
+
ext: '3mf',
|
|
506
|
+
mime: 'model/3mf',
|
|
507
|
+
};
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
function getOpenXmlMimeTypeFromContentTypesXml(xmlContent) {
|
|
512
|
+
// We only need the `ContentType="...main+xml"` value, so a small string scan is enough and avoids full XML parsing.
|
|
513
|
+
const endPosition = xmlContent.indexOf('.main+xml"');
|
|
514
|
+
if (endPosition === -1) {
|
|
515
|
+
const mimeType = 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml';
|
|
516
|
+
if (xmlContent.includes(`ContentType="${mimeType}"`)) {
|
|
517
|
+
return mimeType;
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
return;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
const truncatedContent = xmlContent.slice(0, endPosition);
|
|
524
|
+
const firstQuotePosition = truncatedContent.lastIndexOf('"');
|
|
525
|
+
// If no quote is found, `lastIndexOf` returns -1 and this intentionally falls back to the full truncated prefix.
|
|
526
|
+
return truncatedContent.slice(firstQuotePosition + 1);
|
|
527
|
+
}
|
|
528
|
+
|
|
183
529
|
export async function fileTypeFromTokenizer(tokenizer, options) {
|
|
184
530
|
return new FileTypeParser(options).fromTokenizer(tokenizer);
|
|
185
531
|
}
|
|
@@ -190,25 +536,39 @@ export async function fileTypeStream(webStream, options) {
|
|
|
190
536
|
|
|
191
537
|
export class FileTypeParser {
|
|
192
538
|
constructor(options) {
|
|
539
|
+
const normalizedMpegOffsetTolerance = normalizeMpegOffsetTolerance(options?.mpegOffsetTolerance);
|
|
193
540
|
this.options = {
|
|
194
|
-
mpegOffsetTolerance: 0,
|
|
195
541
|
...options,
|
|
542
|
+
mpegOffsetTolerance: normalizedMpegOffsetTolerance,
|
|
196
543
|
};
|
|
197
544
|
|
|
198
|
-
this.detectors = [...(options
|
|
545
|
+
this.detectors = [...(this.options.customDetectors ?? []),
|
|
199
546
|
{id: 'core', detect: this.detectConfident},
|
|
200
547
|
{id: 'core.imprecise', detect: this.detectImprecise}];
|
|
201
548
|
this.tokenizerOptions = {
|
|
202
|
-
abortSignal: options
|
|
549
|
+
abortSignal: this.options.signal,
|
|
203
550
|
};
|
|
204
551
|
}
|
|
205
552
|
|
|
206
553
|
async fromTokenizer(tokenizer) {
|
|
207
554
|
const initialPosition = tokenizer.position;
|
|
208
|
-
|
|
209
555
|
// Iterate through all file-type detectors
|
|
210
556
|
for (const detector of this.detectors) {
|
|
211
|
-
|
|
557
|
+
let fileType;
|
|
558
|
+
try {
|
|
559
|
+
fileType = await detector.detect(tokenizer);
|
|
560
|
+
} catch (error) {
|
|
561
|
+
if (error instanceof strtok3.EndOfStreamError) {
|
|
562
|
+
return;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
if (error instanceof ParserHardLimitError) {
|
|
566
|
+
return;
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
throw error;
|
|
570
|
+
}
|
|
571
|
+
|
|
212
572
|
if (fileType) {
|
|
213
573
|
return fileType;
|
|
214
574
|
}
|
|
@@ -252,7 +612,7 @@ export class FileTypeParser {
|
|
|
252
612
|
}
|
|
253
613
|
|
|
254
614
|
async toDetectionStream(stream, options) {
|
|
255
|
-
const
|
|
615
|
+
const sampleSize = normalizeSampleSize(options?.sampleSize ?? reasonableDetectionSizeInBytes);
|
|
256
616
|
let detectedFileType;
|
|
257
617
|
let firstChunk;
|
|
258
618
|
|
|
@@ -394,7 +754,7 @@ export class FileTypeParser {
|
|
|
394
754
|
|
|
395
755
|
if (this.check([0xEF, 0xBB, 0xBF])) { // UTF-8-BOM
|
|
396
756
|
// Strip off UTF-8-BOM
|
|
397
|
-
this.tokenizer.ignore(3);
|
|
757
|
+
await this.tokenizer.ignore(3);
|
|
398
758
|
return this.detectConfident(tokenizer);
|
|
399
759
|
}
|
|
400
760
|
|
|
@@ -414,27 +774,24 @@ export class FileTypeParser {
|
|
|
414
774
|
|
|
415
775
|
if (this.check([0x1F, 0x8B, 0x8])) {
|
|
416
776
|
const gzipHandler = new GzipHandler(tokenizer);
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
let shouldCancelStream = true;
|
|
777
|
+
const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes);
|
|
778
|
+
let compressedFileType;
|
|
420
779
|
try {
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
shouldCancelStream = false;
|
|
780
|
+
compressedFileType = await this.fromStream(limitedInflatedStream);
|
|
781
|
+
} catch (error) {
|
|
782
|
+
if (error?.name === 'AbortError') {
|
|
783
|
+
throw error;
|
|
426
784
|
}
|
|
427
785
|
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
}
|
|
786
|
+
// Decompression or inner-detection failures are expected for non-tar gzip files.
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
// We only need enough inflated bytes to confidently decide whether this is tar.gz.
|
|
790
|
+
if (compressedFileType?.ext === 'tar') {
|
|
791
|
+
return {
|
|
792
|
+
ext: 'tar.gz',
|
|
793
|
+
mime: 'application/gzip',
|
|
794
|
+
};
|
|
438
795
|
}
|
|
439
796
|
|
|
440
797
|
return {
|
|
@@ -451,17 +808,48 @@ export class FileTypeParser {
|
|
|
451
808
|
}
|
|
452
809
|
|
|
453
810
|
if (this.checkString('ID3')) {
|
|
454
|
-
await tokenizer
|
|
811
|
+
await safeIgnore(tokenizer, 6, {
|
|
812
|
+
maximumLength: 6,
|
|
813
|
+
reason: 'ID3 header prefix',
|
|
814
|
+
}); // Skip ID3 header until the header size
|
|
455
815
|
const id3HeaderLength = await tokenizer.readToken(uint32SyncSafeToken);
|
|
816
|
+
const isUnknownFileSize = hasUnknownFileSize(tokenizer);
|
|
817
|
+
if (
|
|
818
|
+
!Number.isFinite(id3HeaderLength)
|
|
819
|
+
|| id3HeaderLength < 0
|
|
820
|
+
// Keep ID3 probing bounded for unknown-size streams to avoid attacker-controlled large skips.
|
|
821
|
+
|| (
|
|
822
|
+
isUnknownFileSize
|
|
823
|
+
&& id3HeaderLength > maximumId3HeaderSizeInBytes
|
|
824
|
+
)
|
|
825
|
+
) {
|
|
826
|
+
return;
|
|
827
|
+
}
|
|
828
|
+
|
|
456
829
|
if (tokenizer.position + id3HeaderLength > tokenizer.fileInfo.size) {
|
|
457
|
-
|
|
830
|
+
if (isUnknownFileSize) {
|
|
831
|
+
return;
|
|
832
|
+
}
|
|
833
|
+
|
|
458
834
|
return {
|
|
459
835
|
ext: 'mp3',
|
|
460
836
|
mime: 'audio/mpeg',
|
|
461
837
|
};
|
|
462
838
|
}
|
|
463
839
|
|
|
464
|
-
|
|
840
|
+
try {
|
|
841
|
+
await safeIgnore(tokenizer, id3HeaderLength, {
|
|
842
|
+
maximumLength: isUnknownFileSize ? maximumId3HeaderSizeInBytes : tokenizer.fileInfo.size,
|
|
843
|
+
reason: 'ID3 payload',
|
|
844
|
+
});
|
|
845
|
+
} catch (error) {
|
|
846
|
+
if (error instanceof strtok3.EndOfStreamError) {
|
|
847
|
+
return;
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
throw error;
|
|
851
|
+
}
|
|
852
|
+
|
|
465
853
|
return this.fromTokenizer(tokenizer); // Skip ID3 header, recursion
|
|
466
854
|
}
|
|
467
855
|
|
|
@@ -547,72 +935,105 @@ export class FileTypeParser {
|
|
|
547
935
|
// Need to be before the `zip` check
|
|
548
936
|
if (this.check([0x50, 0x4B, 0x3, 0x4])) { // Local file header signature
|
|
549
937
|
let fileType;
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
mime: 'application/java-archive',
|
|
564
|
-
};
|
|
565
|
-
return {
|
|
566
|
-
stop: true,
|
|
567
|
-
};
|
|
568
|
-
case 'mimetype':
|
|
938
|
+
const openXmlState = createOpenXmlZipDetectionState();
|
|
939
|
+
|
|
940
|
+
try {
|
|
941
|
+
await new ZipHandler(tokenizer).unzip(zipHeader => {
|
|
942
|
+
updateOpenXmlZipDetectionStateFromFilename(openXmlState, zipHeader.filename);
|
|
943
|
+
|
|
944
|
+
const isOpenXmlContentTypesEntry = zipHeader.filename === '[Content_Types].xml';
|
|
945
|
+
const openXmlFileTypeFromEntries = getOpenXmlFileTypeFromZipEntries(openXmlState);
|
|
946
|
+
if (
|
|
947
|
+
!isOpenXmlContentTypesEntry
|
|
948
|
+
&& openXmlFileTypeFromEntries
|
|
949
|
+
) {
|
|
950
|
+
fileType = openXmlFileTypeFromEntries;
|
|
569
951
|
return {
|
|
570
|
-
async handler(fileData) {
|
|
571
|
-
// Use TextDecoder to decode the UTF-8 encoded data
|
|
572
|
-
const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
|
|
573
|
-
fileType = getFileTypeFromMimeType(mimeType);
|
|
574
|
-
},
|
|
575
952
|
stop: true,
|
|
576
953
|
};
|
|
954
|
+
}
|
|
577
955
|
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
956
|
+
switch (zipHeader.filename) {
|
|
957
|
+
case 'META-INF/mozilla.rsa':
|
|
958
|
+
fileType = {
|
|
959
|
+
ext: 'xpi',
|
|
960
|
+
mime: 'application/x-xpinstall',
|
|
961
|
+
};
|
|
962
|
+
return {
|
|
963
|
+
stop: true,
|
|
964
|
+
};
|
|
965
|
+
case 'META-INF/MANIFEST.MF':
|
|
966
|
+
fileType = {
|
|
967
|
+
ext: 'jar',
|
|
968
|
+
mime: 'application/java-archive',
|
|
969
|
+
};
|
|
970
|
+
return {
|
|
971
|
+
stop: true,
|
|
972
|
+
};
|
|
973
|
+
case 'mimetype':
|
|
974
|
+
if (!canReadZipEntryForDetection(zipHeader)) {
|
|
975
|
+
return {};
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
return {
|
|
979
|
+
async handler(fileData) {
|
|
980
|
+
// Use TextDecoder to decode the UTF-8 encoded data
|
|
981
|
+
const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
|
|
982
|
+
fileType = getFileTypeFromMimeType(mimeType);
|
|
983
|
+
},
|
|
984
|
+
stop: true,
|
|
985
|
+
};
|
|
986
|
+
|
|
987
|
+
case '[Content_Types].xml': {
|
|
988
|
+
openXmlState.hasContentTypesEntry = true;
|
|
989
|
+
|
|
990
|
+
const maximumContentTypesEntrySize = hasUnknownFileSize(tokenizer) ? maximumZipEntrySizeInBytes : Number.MAX_SAFE_INTEGER;
|
|
991
|
+
if (!canReadZipEntryForDetection(zipHeader, maximumContentTypesEntrySize)) {
|
|
992
|
+
openXmlState.hasUnparseableContentTypes = true;
|
|
993
|
+
return {};
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
openXmlState.isParsingContentTypes = true;
|
|
997
|
+
return {
|
|
998
|
+
async handler(fileData) {
|
|
999
|
+
// Use TextDecoder to decode the UTF-8 encoded data
|
|
1000
|
+
const xmlContent = new TextDecoder('utf-8').decode(fileData);
|
|
1001
|
+
const mimeType = getOpenXmlMimeTypeFromContentTypesXml(xmlContent);
|
|
1002
|
+
if (mimeType) {
|
|
587
1003
|
fileType = getFileTypeFromMimeType(mimeType);
|
|
588
1004
|
}
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
}
|
|
595
|
-
},
|
|
596
|
-
stop: true,
|
|
597
|
-
};
|
|
598
|
-
default:
|
|
599
|
-
if (/classes\d*\.dex/.test(zipHeader.filename)) {
|
|
600
|
-
fileType = {
|
|
601
|
-
ext: 'apk',
|
|
602
|
-
mime: 'application/vnd.android.package-archive',
|
|
1005
|
+
|
|
1006
|
+
openXmlState.hasParsedContentTypesEntry = true;
|
|
1007
|
+
openXmlState.isParsingContentTypes = false;
|
|
1008
|
+
},
|
|
1009
|
+
stop: true,
|
|
603
1010
|
};
|
|
604
|
-
return {stop: true};
|
|
605
1011
|
}
|
|
606
1012
|
|
|
607
|
-
|
|
1013
|
+
default:
|
|
1014
|
+
if (/classes\d*\.dex/.test(zipHeader.filename)) {
|
|
1015
|
+
fileType = {
|
|
1016
|
+
ext: 'apk',
|
|
1017
|
+
mime: 'application/vnd.android.package-archive',
|
|
1018
|
+
};
|
|
1019
|
+
return {stop: true};
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
return {};
|
|
1023
|
+
}
|
|
1024
|
+
});
|
|
1025
|
+
} catch (error) {
|
|
1026
|
+
if (!isRecoverableZipError(error)) {
|
|
1027
|
+
throw error;
|
|
608
1028
|
}
|
|
609
|
-
|
|
610
|
-
if (
|
|
611
|
-
|
|
1029
|
+
|
|
1030
|
+
if (openXmlState.isParsingContentTypes) {
|
|
1031
|
+
openXmlState.isParsingContentTypes = false;
|
|
1032
|
+
openXmlState.hasUnparseableContentTypes = true;
|
|
612
1033
|
}
|
|
613
|
-
}
|
|
1034
|
+
}
|
|
614
1035
|
|
|
615
|
-
return fileType ?? {
|
|
1036
|
+
return fileType ?? getOpenXmlFileTypeFromZipEntries(openXmlState) ?? {
|
|
616
1037
|
ext: 'zip',
|
|
617
1038
|
mime: 'application/zip',
|
|
618
1039
|
};
|
|
@@ -817,7 +1238,10 @@ export class FileTypeParser {
|
|
|
817
1238
|
}
|
|
818
1239
|
|
|
819
1240
|
const id = new Uint8Array(ic + 1);
|
|
820
|
-
await tokenizer
|
|
1241
|
+
await safeReadBuffer(tokenizer, id, undefined, {
|
|
1242
|
+
maximumLength: id.length,
|
|
1243
|
+
reason: 'EBML field',
|
|
1244
|
+
});
|
|
821
1245
|
return id;
|
|
822
1246
|
}
|
|
823
1247
|
|
|
@@ -838,20 +1262,47 @@ export class FileTypeParser {
|
|
|
838
1262
|
}
|
|
839
1263
|
|
|
840
1264
|
async function readChildren(children) {
|
|
1265
|
+
let ebmlElementCount = 0;
|
|
841
1266
|
while (children > 0) {
|
|
1267
|
+
ebmlElementCount++;
|
|
1268
|
+
if (ebmlElementCount > maximumEbmlElementCount) {
|
|
1269
|
+
return;
|
|
1270
|
+
}
|
|
1271
|
+
|
|
842
1272
|
const element = await readElement();
|
|
1273
|
+
|
|
843
1274
|
if (element.id === 0x42_82) {
|
|
844
|
-
|
|
1275
|
+
// `DocType` is a short string ("webm", "matroska", ...), reject implausible lengths to avoid large allocations.
|
|
1276
|
+
if (element.len > maximumEbmlDocumentTypeSizeInBytes) {
|
|
1277
|
+
return;
|
|
1278
|
+
}
|
|
1279
|
+
|
|
1280
|
+
const documentTypeLength = getSafeBound(element.len, maximumEbmlDocumentTypeSizeInBytes, 'EBML DocType');
|
|
1281
|
+
const rawValue = await tokenizer.readToken(new Token.StringType(documentTypeLength));
|
|
845
1282
|
return rawValue.replaceAll(/\00.*$/g, ''); // Return DocType
|
|
846
1283
|
}
|
|
847
1284
|
|
|
848
|
-
|
|
1285
|
+
if (
|
|
1286
|
+
hasUnknownFileSize(tokenizer)
|
|
1287
|
+
&& (
|
|
1288
|
+
!Number.isFinite(element.len)
|
|
1289
|
+
|| element.len < 0
|
|
1290
|
+
|| element.len > maximumEbmlElementPayloadSizeInBytes
|
|
1291
|
+
)
|
|
1292
|
+
) {
|
|
1293
|
+
return;
|
|
1294
|
+
}
|
|
1295
|
+
|
|
1296
|
+
await safeIgnore(tokenizer, element.len, {
|
|
1297
|
+
maximumLength: hasUnknownFileSize(tokenizer) ? maximumEbmlElementPayloadSizeInBytes : tokenizer.fileInfo.size,
|
|
1298
|
+
reason: 'EBML payload',
|
|
1299
|
+
}); // ignore payload
|
|
849
1300
|
--children;
|
|
850
1301
|
}
|
|
851
1302
|
}
|
|
852
1303
|
|
|
853
|
-
const
|
|
854
|
-
const documentType = await readChildren(
|
|
1304
|
+
const rootElement = await readElement();
|
|
1305
|
+
const documentType = await readChildren(rootElement.len);
|
|
855
1306
|
|
|
856
1307
|
switch (documentType) {
|
|
857
1308
|
case 'webm':
|
|
@@ -1203,6 +1654,16 @@ export class FileTypeParser {
|
|
|
1203
1654
|
// -- 8-byte signatures --
|
|
1204
1655
|
|
|
1205
1656
|
if (this.check([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])) {
|
|
1657
|
+
const pngFileType = {
|
|
1658
|
+
ext: 'png',
|
|
1659
|
+
mime: 'image/png',
|
|
1660
|
+
};
|
|
1661
|
+
|
|
1662
|
+
const apngFileType = {
|
|
1663
|
+
ext: 'apng',
|
|
1664
|
+
mime: 'image/apng',
|
|
1665
|
+
};
|
|
1666
|
+
|
|
1206
1667
|
// APNG format (https://wiki.mozilla.org/APNG_Specification)
|
|
1207
1668
|
// 1. Find the first IDAT (image data) chunk (49 44 41 54)
|
|
1208
1669
|
// 2. Check if there is an "acTL" chunk before the IDAT one (61 63 54 4C)
|
|
@@ -1220,7 +1681,13 @@ export class FileTypeParser {
|
|
|
1220
1681
|
};
|
|
1221
1682
|
}
|
|
1222
1683
|
|
|
1684
|
+
const isUnknownPngStream = hasUnknownFileSize(tokenizer);
|
|
1685
|
+
const pngScanStart = tokenizer.position;
|
|
1223
1686
|
do {
|
|
1687
|
+
if (hasExceededUnknownSizeScanBudget(tokenizer, pngScanStart, maximumPngChunkSizeInBytes)) {
|
|
1688
|
+
break;
|
|
1689
|
+
}
|
|
1690
|
+
|
|
1224
1691
|
const chunk = await readChunkHeader();
|
|
1225
1692
|
if (chunk.length < 0) {
|
|
1226
1693
|
return; // Invalid chunk length
|
|
@@ -1228,24 +1695,40 @@ export class FileTypeParser {
|
|
|
1228
1695
|
|
|
1229
1696
|
switch (chunk.type) {
|
|
1230
1697
|
case 'IDAT':
|
|
1231
|
-
return
|
|
1232
|
-
ext: 'png',
|
|
1233
|
-
mime: 'image/png',
|
|
1234
|
-
};
|
|
1698
|
+
return pngFileType;
|
|
1235
1699
|
case 'acTL':
|
|
1236
|
-
return
|
|
1237
|
-
ext: 'apng',
|
|
1238
|
-
mime: 'image/apng',
|
|
1239
|
-
};
|
|
1700
|
+
return apngFileType;
|
|
1240
1701
|
default:
|
|
1241
|
-
|
|
1702
|
+
if (
|
|
1703
|
+
isUnknownPngStream
|
|
1704
|
+
&& chunk.length > maximumPngChunkSizeInBytes
|
|
1705
|
+
) {
|
|
1706
|
+
// Avoid huge attacker-controlled skips when probing unknown-size streams.
|
|
1707
|
+
return;
|
|
1708
|
+
}
|
|
1709
|
+
|
|
1710
|
+
try {
|
|
1711
|
+
await safeIgnore(tokenizer, chunk.length + 4, {
|
|
1712
|
+
maximumLength: isUnknownPngStream ? maximumPngChunkSizeInBytes + 4 : tokenizer.fileInfo.size,
|
|
1713
|
+
reason: 'PNG chunk payload',
|
|
1714
|
+
}); // Ignore chunk-data + CRC
|
|
1715
|
+
} catch (error) {
|
|
1716
|
+
if (
|
|
1717
|
+
!isUnknownPngStream
|
|
1718
|
+
&& (
|
|
1719
|
+
error instanceof ParserHardLimitError
|
|
1720
|
+
|| error instanceof strtok3.EndOfStreamError
|
|
1721
|
+
)
|
|
1722
|
+
) {
|
|
1723
|
+
return pngFileType;
|
|
1724
|
+
}
|
|
1725
|
+
|
|
1726
|
+
throw error;
|
|
1727
|
+
}
|
|
1242
1728
|
}
|
|
1243
1729
|
} while (tokenizer.position + 8 < tokenizer.fileInfo.size);
|
|
1244
1730
|
|
|
1245
|
-
return
|
|
1246
|
-
ext: 'png',
|
|
1247
|
-
mime: 'image/png',
|
|
1248
|
-
};
|
|
1731
|
+
return pngFileType;
|
|
1249
1732
|
}
|
|
1250
1733
|
|
|
1251
1734
|
if (this.check([0x41, 0x52, 0x52, 0x4F, 0x57, 0x31, 0x00, 0x00])) {
|
|
@@ -1403,45 +1886,95 @@ export class FileTypeParser {
|
|
|
1403
1886
|
|
|
1404
1887
|
// ASF_Header_Object first 80 bytes
|
|
1405
1888
|
if (this.check([0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11, 0xA6, 0xD9])) {
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1889
|
+
let isMalformedAsf = false;
|
|
1890
|
+
try {
|
|
1891
|
+
async function readHeader() {
|
|
1892
|
+
const guid = new Uint8Array(16);
|
|
1893
|
+
await safeReadBuffer(tokenizer, guid, undefined, {
|
|
1894
|
+
maximumLength: guid.length,
|
|
1895
|
+
reason: 'ASF header GUID',
|
|
1896
|
+
});
|
|
1897
|
+
return {
|
|
1898
|
+
id: guid,
|
|
1899
|
+
size: Number(await tokenizer.readToken(Token.UINT64_LE)),
|
|
1900
|
+
};
|
|
1901
|
+
}
|
|
1414
1902
|
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
if (_check(typeId, [0x40, 0x9E, 0x69, 0xF8, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
|
|
1426
|
-
// Found audio:
|
|
1427
|
-
return {
|
|
1428
|
-
ext: 'asf',
|
|
1429
|
-
mime: 'audio/x-ms-asf',
|
|
1430
|
-
};
|
|
1903
|
+
await safeIgnore(tokenizer, 30, {
|
|
1904
|
+
maximumLength: 30,
|
|
1905
|
+
reason: 'ASF header prelude',
|
|
1906
|
+
});
|
|
1907
|
+
const isUnknownFileSize = hasUnknownFileSize(tokenizer);
|
|
1908
|
+
const asfHeaderScanStart = tokenizer.position;
|
|
1909
|
+
while (tokenizer.position + 24 < tokenizer.fileInfo.size) {
|
|
1910
|
+
if (hasExceededUnknownSizeScanBudget(tokenizer, asfHeaderScanStart, maximumUntrustedSkipSizeInBytes)) {
|
|
1911
|
+
break;
|
|
1431
1912
|
}
|
|
1432
1913
|
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1914
|
+
const previousPosition = tokenizer.position;
|
|
1915
|
+
const header = await readHeader();
|
|
1916
|
+
let payload = header.size - 24;
|
|
1917
|
+
if (
|
|
1918
|
+
!Number.isFinite(payload)
|
|
1919
|
+
|| payload < 0
|
|
1920
|
+
) {
|
|
1921
|
+
isMalformedAsf = true;
|
|
1922
|
+
break;
|
|
1439
1923
|
}
|
|
1440
1924
|
|
|
1441
|
-
|
|
1925
|
+
if (_check(header.id, [0x91, 0x07, 0xDC, 0xB7, 0xB7, 0xA9, 0xCF, 0x11, 0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65])) {
|
|
1926
|
+
// Sync on Stream-Properties-Object (B7DC0791-A9B7-11CF-8EE6-00C00C205365)
|
|
1927
|
+
const typeId = new Uint8Array(16);
|
|
1928
|
+
payload -= await safeReadBuffer(tokenizer, typeId, undefined, {
|
|
1929
|
+
maximumLength: typeId.length,
|
|
1930
|
+
reason: 'ASF stream type GUID',
|
|
1931
|
+
});
|
|
1932
|
+
|
|
1933
|
+
if (_check(typeId, [0x40, 0x9E, 0x69, 0xF8, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
|
|
1934
|
+
// Found audio:
|
|
1935
|
+
return {
|
|
1936
|
+
ext: 'asf',
|
|
1937
|
+
mime: 'audio/x-ms-asf',
|
|
1938
|
+
};
|
|
1939
|
+
}
|
|
1940
|
+
|
|
1941
|
+
if (_check(typeId, [0xC0, 0xEF, 0x19, 0xBC, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
|
|
1942
|
+
// Found video:
|
|
1943
|
+
return {
|
|
1944
|
+
ext: 'asf',
|
|
1945
|
+
mime: 'video/x-ms-asf',
|
|
1946
|
+
};
|
|
1947
|
+
}
|
|
1948
|
+
|
|
1949
|
+
break;
|
|
1950
|
+
}
|
|
1951
|
+
|
|
1952
|
+
await safeIgnore(tokenizer, payload, {
|
|
1953
|
+
maximumLength: isUnknownFileSize ? maximumUntrustedSkipSizeInBytes : tokenizer.fileInfo.size,
|
|
1954
|
+
reason: 'ASF header payload',
|
|
1955
|
+
});
|
|
1956
|
+
|
|
1957
|
+
// Safeguard against malformed files: break if the position did not advance.
|
|
1958
|
+
if (tokenizer.position <= previousPosition) {
|
|
1959
|
+
isMalformedAsf = true;
|
|
1960
|
+
break;
|
|
1961
|
+
}
|
|
1962
|
+
}
|
|
1963
|
+
} catch (error) {
|
|
1964
|
+
if (
|
|
1965
|
+
error instanceof strtok3.EndOfStreamError
|
|
1966
|
+
|| error instanceof ParserHardLimitError
|
|
1967
|
+
) {
|
|
1968
|
+
if (hasUnknownFileSize(tokenizer)) {
|
|
1969
|
+
isMalformedAsf = true;
|
|
1970
|
+
}
|
|
1971
|
+
} else {
|
|
1972
|
+
throw error;
|
|
1442
1973
|
}
|
|
1974
|
+
}
|
|
1443
1975
|
|
|
1444
|
-
|
|
1976
|
+
if (isMalformedAsf) {
|
|
1977
|
+
return;
|
|
1445
1978
|
}
|
|
1446
1979
|
|
|
1447
1980
|
// Default to ASF generic extension
|
|
@@ -1760,9 +2293,10 @@ export class FileTypeParser {
|
|
|
1760
2293
|
// Detections with limited supporting data, resulting in a higher likelihood of false positives
|
|
1761
2294
|
detectImprecise = async tokenizer => {
|
|
1762
2295
|
this.buffer = new Uint8Array(reasonableDetectionSizeInBytes);
|
|
2296
|
+
const fileSize = getKnownFileSizeOrMaximum(tokenizer.fileInfo.size);
|
|
1763
2297
|
|
|
1764
2298
|
// Read initial sample size of 8 bytes
|
|
1765
|
-
await tokenizer.peekBuffer(this.buffer, {length: Math.min(8,
|
|
2299
|
+
await tokenizer.peekBuffer(this.buffer, {length: Math.min(8, fileSize), mayBeLess: true});
|
|
1766
2300
|
|
|
1767
2301
|
if (
|
|
1768
2302
|
this.check([0x0, 0x0, 0x1, 0xBA])
|
|
@@ -1796,7 +2330,7 @@ export class FileTypeParser {
|
|
|
1796
2330
|
}
|
|
1797
2331
|
|
|
1798
2332
|
// Adjust buffer to `mpegOffsetTolerance`
|
|
1799
|
-
await tokenizer.peekBuffer(this.buffer, {length: Math.min(2 + this.options.mpegOffsetTolerance,
|
|
2333
|
+
await tokenizer.peekBuffer(this.buffer, {length: Math.min(2 + this.options.mpegOffsetTolerance, fileSize), mayBeLess: true});
|
|
1800
2334
|
|
|
1801
2335
|
// Check MPEG 1 or 2 Layer 3 header, or 'layer 0' for ADTS (MPEG sync-word 0xFFE)
|
|
1802
2336
|
if (this.buffer.length >= (2 + this.options.mpegOffsetTolerance)) {
|
|
@@ -1811,7 +2345,7 @@ export class FileTypeParser {
|
|
|
1811
2345
|
|
|
1812
2346
|
async readTiffTag(bigEndian) {
|
|
1813
2347
|
const tagId = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
|
|
1814
|
-
this.tokenizer.ignore(10);
|
|
2348
|
+
await this.tokenizer.ignore(10);
|
|
1815
2349
|
switch (tagId) {
|
|
1816
2350
|
case 50_341:
|
|
1817
2351
|
return {
|
|
@@ -1829,6 +2363,13 @@ export class FileTypeParser {
|
|
|
1829
2363
|
|
|
1830
2364
|
async readTiffIFD(bigEndian) {
|
|
1831
2365
|
const numberOfTags = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
|
|
2366
|
+
if (
|
|
2367
|
+
hasUnknownFileSize(this.tokenizer)
|
|
2368
|
+
&& (2 + (numberOfTags * 12)) > maximumTiffIfdOffsetInBytes
|
|
2369
|
+
) {
|
|
2370
|
+
return;
|
|
2371
|
+
}
|
|
2372
|
+
|
|
1832
2373
|
for (let n = 0; n < numberOfTags; ++n) {
|
|
1833
2374
|
const fileType = await this.readTiffTag(bigEndian);
|
|
1834
2375
|
if (fileType) {
|
|
@@ -1838,6 +2379,11 @@ export class FileTypeParser {
|
|
|
1838
2379
|
}
|
|
1839
2380
|
|
|
1840
2381
|
async readTiffHeader(bigEndian) {
|
|
2382
|
+
const tiffFileType = {
|
|
2383
|
+
ext: 'tif',
|
|
2384
|
+
mime: 'image/tiff',
|
|
2385
|
+
};
|
|
2386
|
+
|
|
1841
2387
|
const version = (bigEndian ? Token.UINT16_BE : Token.UINT16_LE).get(this.buffer, 2);
|
|
1842
2388
|
const ifdOffset = (bigEndian ? Token.UINT32_BE : Token.UINT32_LE).get(this.buffer, 4);
|
|
1843
2389
|
|
|
@@ -1866,19 +2412,37 @@ export class FileTypeParser {
|
|
|
1866
2412
|
}
|
|
1867
2413
|
}
|
|
1868
2414
|
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
2415
|
+
const maximumTiffOffset = hasUnknownFileSize(this.tokenizer) ? maximumTiffIfdOffsetInBytes : this.tokenizer.fileInfo.size;
|
|
2416
|
+
|
|
2417
|
+
try {
|
|
2418
|
+
await safeIgnore(this.tokenizer, ifdOffset, {
|
|
2419
|
+
maximumLength: maximumTiffOffset,
|
|
2420
|
+
reason: 'TIFF IFD offset',
|
|
2421
|
+
});
|
|
2422
|
+
} catch (error) {
|
|
2423
|
+
if (error instanceof strtok3.EndOfStreamError) {
|
|
2424
|
+
return;
|
|
2425
|
+
}
|
|
2426
|
+
|
|
2427
|
+
throw error;
|
|
2428
|
+
}
|
|
2429
|
+
|
|
2430
|
+
let fileType;
|
|
2431
|
+
try {
|
|
2432
|
+
fileType = await this.readTiffIFD(bigEndian);
|
|
2433
|
+
} catch (error) {
|
|
2434
|
+
if (error instanceof strtok3.EndOfStreamError) {
|
|
2435
|
+
return;
|
|
2436
|
+
}
|
|
2437
|
+
|
|
2438
|
+
throw error;
|
|
2439
|
+
}
|
|
2440
|
+
|
|
2441
|
+
return fileType ?? tiffFileType;
|
|
1875
2442
|
}
|
|
1876
2443
|
|
|
1877
2444
|
if (version === 43) { // Big TIFF file header
|
|
1878
|
-
return
|
|
1879
|
-
ext: 'tif',
|
|
1880
|
-
mime: 'image/tiff',
|
|
1881
|
-
};
|
|
2445
|
+
return tiffFileType;
|
|
1882
2446
|
}
|
|
1883
2447
|
}
|
|
1884
2448
|
|
package/index.js
CHANGED
|
@@ -5,13 +5,37 @@ Node.js specific entry point.
|
|
|
5
5
|
import {ReadableStream as WebReadableStream} from 'node:stream/web';
|
|
6
6
|
import {pipeline, PassThrough, Readable} from 'node:stream';
|
|
7
7
|
import * as strtok3 from 'strtok3';
|
|
8
|
-
import {
|
|
8
|
+
import {
|
|
9
|
+
FileTypeParser as DefaultFileTypeParser,
|
|
10
|
+
reasonableDetectionSizeInBytes,
|
|
11
|
+
normalizeSampleSize,
|
|
12
|
+
} from './core.js';
|
|
13
|
+
|
|
14
|
+
function isTokenizerStreamBoundsError(error) {
|
|
15
|
+
if (
|
|
16
|
+
!(error instanceof RangeError)
|
|
17
|
+
|| error.message !== 'offset is out of bounds'
|
|
18
|
+
|| typeof error.stack !== 'string'
|
|
19
|
+
) {
|
|
20
|
+
return false;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Some malformed or non-byte Node.js streams can surface this tokenizer-internal range error.
|
|
24
|
+
// Note: This stack-trace check is fragile and may break if strtok3 restructures its internals.
|
|
25
|
+
return /strtok3[/\\]lib[/\\]stream[/\\]/.test(error.stack);
|
|
26
|
+
}
|
|
9
27
|
|
|
10
28
|
export class FileTypeParser extends DefaultFileTypeParser {
|
|
11
29
|
async fromStream(stream) {
|
|
12
30
|
const tokenizer = await (stream instanceof WebReadableStream ? strtok3.fromWebStream(stream, this.tokenizerOptions) : strtok3.fromStream(stream, this.tokenizerOptions));
|
|
13
31
|
try {
|
|
14
32
|
return await super.fromTokenizer(tokenizer);
|
|
33
|
+
} catch (error) {
|
|
34
|
+
if (isTokenizerStreamBoundsError(error)) {
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
throw error;
|
|
15
39
|
} finally {
|
|
16
40
|
await tokenizer.close();
|
|
17
41
|
}
|
|
@@ -31,7 +55,7 @@ export class FileTypeParser extends DefaultFileTypeParser {
|
|
|
31
55
|
return super.toDetectionStream(readableStream, options);
|
|
32
56
|
}
|
|
33
57
|
|
|
34
|
-
const
|
|
58
|
+
const sampleSize = normalizeSampleSize(options.sampleSize ?? reasonableDetectionSizeInBytes);
|
|
35
59
|
|
|
36
60
|
return new Promise((resolve, reject) => {
|
|
37
61
|
readableStream.on('error', reject);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "file-type",
|
|
3
|
-
"version": "21.3.
|
|
3
|
+
"version": "21.3.1",
|
|
4
4
|
"description": "Detect the file type of a file, stream, or data",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": "sindresorhus/file-type",
|
|
@@ -258,12 +258,12 @@
|
|
|
258
258
|
},
|
|
259
259
|
"devDependencies": {
|
|
260
260
|
"@tokenizer/token": "^0.3.0",
|
|
261
|
-
"@types/node": "^
|
|
262
|
-
"ava": "^
|
|
261
|
+
"@types/node": "^25.3.3",
|
|
262
|
+
"ava": "^7.0.0",
|
|
263
263
|
"commonmark": "^0.31.2",
|
|
264
264
|
"get-stream": "^9.0.1",
|
|
265
265
|
"noop-stream": "^1.0.0",
|
|
266
|
-
"tsd": "^0.
|
|
266
|
+
"tsd": "^0.33.0",
|
|
267
267
|
"xo": "^0.60.0"
|
|
268
268
|
},
|
|
269
269
|
"xo": {
|
package/readme.md
CHANGED
|
@@ -380,6 +380,7 @@ console.log(fileType);
|
|
|
380
380
|
### Available third-party file-type detectors
|
|
381
381
|
|
|
382
382
|
- [@file-type/av](https://github.com/Borewit/file-type-av): Improves detection of audio and video file formats, with accurate differentiation between the two
|
|
383
|
+
- [@file-type/cfbf](https://github.com/Borewit/file-type-cfbf): Detects Compound File Binary Format (CFBF) based formats, such as Office 97–2003 documents and `.msi`.
|
|
383
384
|
- [@file-type/pdf](https://github.com/Borewit/file-type-pdf): Detects PDF based file types, such as Adobe Illustrator
|
|
384
385
|
- [@file-type/xml](https://github.com/Borewit/file-type-xml): Detects common XML file types, such as GLM, KML, MusicXML, RSS, SVG, and XHTML
|
|
385
386
|
|
|
@@ -628,14 +629,14 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
|
|
|
628
629
|
|
|
629
630
|
*[Pull requests](.github/pull_request_template.md) are welcome for additional commonly used file types.*
|
|
630
631
|
|
|
631
|
-
The following file types will not be accepted
|
|
632
|
-
- [MS-CFB: Microsoft Compound File Binary File Format based formats](https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/53989ce4-7b05-4f8d-829b-d08d6148375b)
|
|
632
|
+
The following file types will not be accepted, but most of them are supported by [third-party detector](#available-third-party-file-type-detectors)
|
|
633
|
+
- [MS-CFB: Microsoft Compound File Binary File Format based formats](https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/53989ce4-7b05-4f8d-829b-d08d6148375b)
|
|
633
634
|
- `.doc` - Microsoft Word 97-2003 Document
|
|
634
635
|
- `.xls` - Microsoft Excel 97-2003 Document
|
|
635
636
|
- `.ppt` - Microsoft PowerPoint97-2003 Document
|
|
636
637
|
- `.msi` - Microsoft Windows Installer
|
|
637
638
|
- `.csv` - [Reason.](https://github.com/sindresorhus/file-type/issues/264#issuecomment-568439196)
|
|
638
|
-
- `.svg`
|
|
639
|
+
- `.svg`
|
|
639
640
|
|
|
640
641
|
#### tokenizer
|
|
641
642
|
|