file-type 21.3.3 → 22.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,315 +4,91 @@ Primary entry point, Node.js specific entry point is index.js
4
4
 
5
5
  import * as Token from 'token-types';
6
6
  import * as strtok3 from 'strtok3/core';
7
- import {ZipHandler, GzipHandler} from '@tokenizer/inflate';
8
- import {getUintBE} from 'uint8array-extras';
7
+ import {GzipHandler} from '@tokenizer/inflate';
8
+ import {concatUint8Arrays} from 'uint8array-extras';
9
9
  import {
10
10
  stringToBytes,
11
11
  tarHeaderChecksumMatches,
12
12
  uint32SyncSafeToken,
13
- } from './util.js';
13
+ } from './tokens.js';
14
14
  import {extensions, mimeTypes} from './supported.js';
15
+ import {
16
+ maximumUntrustedSkipSizeInBytes,
17
+ ParserHardLimitError,
18
+ safeIgnore,
19
+ checkBytes,
20
+ hasUnknownFileSize,
21
+ } from './parser.js';
22
+ import {detectZip} from './detectors/zip.js';
23
+ import {detectEbml} from './detectors/ebml.js';
24
+ import {detectPng} from './detectors/png.js';
25
+ import {detectAsf} from './detectors/asf.js';
15
26
 
16
27
  export const reasonableDetectionSizeInBytes = 4100; // A fair amount of file-types are detectable within this range.
17
- // Keep defensive limits small enough to avoid accidental memory spikes from untrusted inputs.
18
28
  const maximumMpegOffsetTolerance = reasonableDetectionSizeInBytes - 2;
19
- const maximumZipEntrySizeInBytes = 1024 * 1024;
20
- const maximumZipEntryCount = 1024;
21
- const maximumZipBufferedReadSizeInBytes = (2 ** 31) - 1;
22
- const maximumUntrustedSkipSizeInBytes = 16 * 1024 * 1024;
23
- const maximumUnknownSizePayloadProbeSizeInBytes = maximumZipEntrySizeInBytes;
24
- const maximumZipTextEntrySizeInBytes = maximumZipEntrySizeInBytes;
25
29
  const maximumNestedGzipDetectionSizeInBytes = maximumUntrustedSkipSizeInBytes;
26
30
  const maximumNestedGzipProbeDepth = 1;
31
+ const unknownSizeGzipProbeTimeoutInMilliseconds = 100;
27
32
  const maximumId3HeaderSizeInBytes = maximumUntrustedSkipSizeInBytes;
28
- const maximumEbmlDocumentTypeSizeInBytes = 64;
29
- const maximumEbmlElementPayloadSizeInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
30
- const maximumEbmlElementCount = 256;
31
- const maximumPngChunkCount = 512;
32
- const maximumPngStreamScanBudgetInBytes = maximumUntrustedSkipSizeInBytes;
33
- const maximumAsfHeaderObjectCount = 512;
34
33
  const maximumTiffTagCount = 512;
35
34
  const maximumDetectionReentryCount = 256;
36
- const maximumPngChunkSizeInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
37
- const maximumAsfHeaderPayloadSizeInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
38
- const maximumTiffStreamIfdOffsetInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
35
+ const maximumTiffStreamIfdOffsetInBytes = 1024 * 1024;
39
36
  const maximumTiffIfdOffsetInBytes = maximumUntrustedSkipSizeInBytes;
40
- const recoverableZipErrorMessages = new Set([
41
- 'Unexpected signature',
42
- 'Encrypted ZIP',
43
- 'Expected Central-File-Header signature',
44
- ]);
45
- const recoverableZipErrorMessagePrefixes = [
46
- 'ZIP entry count exceeds ',
47
- 'Unsupported ZIP compression method:',
48
- 'ZIP entry compressed data exceeds ',
49
- 'ZIP entry decompressed data exceeds ',
50
- ];
51
- const recoverableZipErrorCodes = new Set([
52
- 'Z_BUF_ERROR',
53
- 'Z_DATA_ERROR',
54
- 'ERR_INVALID_STATE',
55
- ]);
56
-
57
- class ParserHardLimitError extends Error {}
58
-
59
- function getSafeBound(value, maximum, reason) {
60
- if (
61
- !Number.isFinite(value)
62
- || value < 0
63
- || value > maximum
64
- ) {
65
- throw new ParserHardLimitError(`${reason} has invalid size ${value} (maximum ${maximum} bytes)`);
66
- }
67
-
68
- return value;
69
- }
70
-
71
- async function safeIgnore(tokenizer, length, {maximumLength = maximumUntrustedSkipSizeInBytes, reason = 'skip'} = {}) {
72
- const safeLength = getSafeBound(length, maximumLength, reason);
73
- await tokenizer.ignore(safeLength);
74
- }
75
-
76
- async function safeReadBuffer(tokenizer, buffer, options, {maximumLength = buffer.length, reason = 'read'} = {}) {
77
- const length = options?.length ?? buffer.length;
78
- const safeLength = getSafeBound(length, maximumLength, reason);
79
- return tokenizer.readBuffer(buffer, {
80
- ...options,
81
- length: safeLength,
82
- });
83
- }
84
-
85
- async function decompressDeflateRawWithLimit(data, {maximumLength = maximumZipEntrySizeInBytes} = {}) {
86
- const input = new ReadableStream({
87
- start(controller) {
88
- controller.enqueue(data);
89
- controller.close();
90
- },
91
- });
92
- const output = input.pipeThrough(new DecompressionStream('deflate-raw'));
93
- const reader = output.getReader();
94
- const chunks = [];
95
- let totalLength = 0;
96
-
97
- try {
98
- for (;;) {
99
- const {done, value} = await reader.read();
100
- if (done) {
101
- break;
102
- }
103
37
 
104
- totalLength += value.length;
105
- if (totalLength > maximumLength) {
106
- await reader.cancel();
107
- throw new Error(`ZIP entry decompressed data exceeds ${maximumLength} bytes`);
108
- }
109
-
110
- chunks.push(value);
111
- }
112
- } finally {
113
- reader.releaseLock();
114
- }
115
-
116
- const uncompressedData = new Uint8Array(totalLength);
117
- let offset = 0;
118
- for (const chunk of chunks) {
119
- uncompressedData.set(chunk, offset);
120
- offset += chunk.length;
38
+ export function normalizeSampleSize(sampleSize) {
39
+ // `sampleSize` is an explicit caller-controlled tuning knob, not untrusted file input.
40
+ // Preserve valid caller-requested probe depth here; applications must bound attacker-derived option values themselves.
41
+ if (!Number.isFinite(sampleSize)) {
42
+ return reasonableDetectionSizeInBytes;
121
43
  }
122
44
 
123
- return uncompressedData;
45
+ return Math.max(1, Math.trunc(sampleSize));
124
46
  }
125
47
 
126
- const zipDataDescriptorSignature = 0x08_07_4B_50;
127
- const zipDataDescriptorLengthInBytes = 16;
128
- const zipDataDescriptorOverlapLengthInBytes = zipDataDescriptorLengthInBytes - 1;
129
-
130
- function findZipDataDescriptorOffset(buffer, bytesConsumed) {
131
- if (buffer.length < zipDataDescriptorLengthInBytes) {
132
- return -1;
133
- }
134
-
135
- const lastPossibleDescriptorOffset = buffer.length - zipDataDescriptorLengthInBytes;
136
- for (let index = 0; index <= lastPossibleDescriptorOffset; index++) {
137
- if (
138
- Token.UINT32_LE.get(buffer, index) === zipDataDescriptorSignature
139
- && Token.UINT32_LE.get(buffer, index + 8) === bytesConsumed + index
140
- ) {
141
- return index;
142
- }
48
+ function normalizeMpegOffsetTolerance(mpegOffsetTolerance) {
49
+ // This value controls scan depth and therefore worst-case CPU work.
50
+ if (!Number.isFinite(mpegOffsetTolerance)) {
51
+ return 0;
143
52
  }
144
53
 
145
- return -1;
146
- }
147
-
148
- function isPngAncillaryChunk(type) {
149
- return (type.codePointAt(0) & 0x20) !== 0;
54
+ return Math.max(0, Math.min(maximumMpegOffsetTolerance, Math.trunc(mpegOffsetTolerance)));
150
55
  }
151
56
 
152
- function mergeByteChunks(chunks, totalLength) {
153
- const merged = new Uint8Array(totalLength);
154
- let offset = 0;
155
-
156
- for (const chunk of chunks) {
157
- merged.set(chunk, offset);
158
- offset += chunk.length;
57
+ function getKnownFileSizeOrMaximum(fileSize) {
58
+ if (!Number.isFinite(fileSize)) {
59
+ return Number.MAX_SAFE_INTEGER;
159
60
  }
160
61
 
161
- return merged;
62
+ return Math.max(0, fileSize);
162
63
  }
163
64
 
164
- async function readZipDataDescriptorEntryWithLimit(zipHandler, {shouldBuffer, maximumLength = maximumZipEntrySizeInBytes} = {}) {
165
- const {syncBuffer} = zipHandler;
166
- const {length: syncBufferLength} = syncBuffer;
167
- const chunks = [];
168
- let bytesConsumed = 0;
169
-
170
- for (;;) {
171
- const length = await zipHandler.tokenizer.peekBuffer(syncBuffer, {mayBeLess: true});
172
- const dataDescriptorOffset = findZipDataDescriptorOffset(syncBuffer.subarray(0, length), bytesConsumed);
173
- const retainedLength = dataDescriptorOffset >= 0
174
- ? 0
175
- : (
176
- length === syncBufferLength
177
- ? Math.min(zipDataDescriptorOverlapLengthInBytes, length - 1)
178
- : 0
179
- );
180
- const chunkLength = dataDescriptorOffset >= 0 ? dataDescriptorOffset : length - retainedLength;
181
-
182
- if (chunkLength === 0) {
183
- break;
184
- }
185
-
186
- bytesConsumed += chunkLength;
187
- if (bytesConsumed > maximumLength) {
188
- throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
189
- }
190
-
191
- if (shouldBuffer) {
192
- const data = new Uint8Array(chunkLength);
193
- await zipHandler.tokenizer.readBuffer(data);
194
- chunks.push(data);
195
- } else {
196
- await zipHandler.tokenizer.ignore(chunkLength);
197
- }
198
-
199
- if (dataDescriptorOffset >= 0) {
200
- break;
201
- }
202
- }
203
-
204
- if (!hasUnknownFileSize(zipHandler.tokenizer)) {
205
- zipHandler.knownSizeDescriptorScannedBytes += bytesConsumed;
206
- }
207
-
208
- if (!shouldBuffer) {
209
- return;
210
- }
211
-
212
- return mergeByteChunks(chunks, bytesConsumed);
65
+ // Wrap stream in an identity TransformStream to avoid BYOB readers.
66
+ // Node.js has a bug where calling controller.close() inside a BYOB stream's
67
+ // pull() callback does not resolve pending reader.read() calls, causing
68
+ // permanent hangs on streams shorter than the requested read size.
69
+ // Using a default (non-BYOB) reader via TransformStream avoids this.
70
+ function toDefaultStream(stream) {
71
+ return stream.pipeThrough(new TransformStream());
213
72
  }
214
73
 
215
- function getRemainingZipScanBudget(zipHandler, startOffset) {
216
- if (hasUnknownFileSize(zipHandler.tokenizer)) {
217
- return Math.max(0, maximumUntrustedSkipSizeInBytes - (zipHandler.tokenizer.position - startOffset));
74
+ function readWithSignal(reader, signal) {
75
+ if (signal === undefined) {
76
+ return reader.read();
218
77
  }
219
78
 
220
- return Math.max(0, maximumZipEntrySizeInBytes - zipHandler.knownSizeDescriptorScannedBytes);
79
+ signal.throwIfAborted();
80
+
81
+ return Promise.race([
82
+ reader.read(),
83
+ new Promise((_resolve, reject) => {
84
+ signal.addEventListener('abort', () => {
85
+ reject(signal.reason);
86
+ reader.cancel(signal.reason).catch(() => {});
87
+ }, {once: true});
88
+ }),
89
+ ]);
221
90
  }
222
91
 
223
- async function readZipEntryData(zipHandler, zipHeader, {shouldBuffer, maximumDescriptorLength = maximumZipEntrySizeInBytes} = {}) {
224
- if (
225
- zipHeader.dataDescriptor
226
- && zipHeader.compressedSize === 0
227
- ) {
228
- return readZipDataDescriptorEntryWithLimit(zipHandler, {
229
- shouldBuffer,
230
- maximumLength: maximumDescriptorLength,
231
- });
232
- }
233
-
234
- if (!shouldBuffer) {
235
- await safeIgnore(zipHandler.tokenizer, zipHeader.compressedSize, {
236
- maximumLength: hasUnknownFileSize(zipHandler.tokenizer) ? maximumZipEntrySizeInBytes : zipHandler.tokenizer.fileInfo.size,
237
- reason: 'ZIP entry compressed data',
238
- });
239
- return;
240
- }
241
-
242
- const maximumLength = getMaximumZipBufferedReadLength(zipHandler.tokenizer);
243
- if (
244
- !Number.isFinite(zipHeader.compressedSize)
245
- || zipHeader.compressedSize < 0
246
- || zipHeader.compressedSize > maximumLength
247
- ) {
248
- throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
249
- }
250
-
251
- const fileData = new Uint8Array(zipHeader.compressedSize);
252
- await zipHandler.tokenizer.readBuffer(fileData);
253
- return fileData;
254
- }
255
-
256
- // Override the default inflate to enforce decompression size limits, since @tokenizer/inflate does not expose a configuration hook for this.
257
- ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
258
- if (zipHeader.compressedMethod === 0) {
259
- return callback(fileData);
260
- }
261
-
262
- if (zipHeader.compressedMethod !== 8) {
263
- throw new Error(`Unsupported ZIP compression method: ${zipHeader.compressedMethod}`);
264
- }
265
-
266
- const uncompressedData = await decompressDeflateRawWithLimit(fileData, {maximumLength: maximumZipEntrySizeInBytes});
267
- return callback(uncompressedData);
268
- };
269
-
270
- ZipHandler.prototype.unzip = async function (fileCallback) {
271
- let stop = false;
272
- let zipEntryCount = 0;
273
- const zipScanStart = this.tokenizer.position;
274
- this.knownSizeDescriptorScannedBytes = 0;
275
- do {
276
- if (hasExceededUnknownSizeScanBudget(this.tokenizer, zipScanStart, maximumUntrustedSkipSizeInBytes)) {
277
- throw new ParserHardLimitError(`ZIP stream probing exceeds ${maximumUntrustedSkipSizeInBytes} bytes`);
278
- }
279
-
280
- const zipHeader = await this.readLocalFileHeader();
281
- if (!zipHeader) {
282
- break;
283
- }
284
-
285
- zipEntryCount++;
286
- if (zipEntryCount > maximumZipEntryCount) {
287
- throw new Error(`ZIP entry count exceeds ${maximumZipEntryCount}`);
288
- }
289
-
290
- const next = fileCallback(zipHeader);
291
- stop = Boolean(next.stop);
292
- await this.tokenizer.ignore(zipHeader.extraFieldLength);
293
- const fileData = await readZipEntryData(this, zipHeader, {
294
- shouldBuffer: Boolean(next.handler),
295
- maximumDescriptorLength: Math.min(maximumZipEntrySizeInBytes, getRemainingZipScanBudget(this, zipScanStart)),
296
- });
297
-
298
- if (next.handler) {
299
- await this.inflate(zipHeader, fileData, next.handler);
300
- }
301
-
302
- if (zipHeader.dataDescriptor) {
303
- const dataDescriptor = new Uint8Array(zipDataDescriptorLengthInBytes);
304
- await this.tokenizer.readBuffer(dataDescriptor);
305
- if (Token.UINT32_LE.get(dataDescriptor, 0) !== zipDataDescriptorSignature) {
306
- throw new Error(`Expected data-descriptor-signature at position ${this.tokenizer.position - dataDescriptor.length}`);
307
- }
308
- }
309
-
310
- if (hasExceededUnknownSizeScanBudget(this.tokenizer, zipScanStart, maximumUntrustedSkipSizeInBytes)) {
311
- throw new ParserHardLimitError(`ZIP stream probing exceeds ${maximumUntrustedSkipSizeInBytes} bytes`);
312
- }
313
- } while (!stop);
314
- };
315
-
316
92
  function createByteLimitedReadableStream(stream, maximumBytes) {
317
93
  const reader = stream.getReader();
318
94
  let emittedBytes = 0;
@@ -379,348 +155,6 @@ export async function fileTypeFromBlob(blob, options) {
379
155
  return new FileTypeParser(options).fromBlob(blob);
380
156
  }
381
157
 
382
- function getFileTypeFromMimeType(mimeType) {
383
- mimeType = mimeType.toLowerCase();
384
- switch (mimeType) {
385
- case 'application/epub+zip':
386
- return {
387
- ext: 'epub',
388
- mime: mimeType,
389
- };
390
- case 'application/vnd.oasis.opendocument.text':
391
- return {
392
- ext: 'odt',
393
- mime: mimeType,
394
- };
395
- case 'application/vnd.oasis.opendocument.text-template':
396
- return {
397
- ext: 'ott',
398
- mime: mimeType,
399
- };
400
- case 'application/vnd.oasis.opendocument.spreadsheet':
401
- return {
402
- ext: 'ods',
403
- mime: mimeType,
404
- };
405
- case 'application/vnd.oasis.opendocument.spreadsheet-template':
406
- return {
407
- ext: 'ots',
408
- mime: mimeType,
409
- };
410
- case 'application/vnd.oasis.opendocument.presentation':
411
- return {
412
- ext: 'odp',
413
- mime: mimeType,
414
- };
415
- case 'application/vnd.oasis.opendocument.presentation-template':
416
- return {
417
- ext: 'otp',
418
- mime: mimeType,
419
- };
420
- case 'application/vnd.oasis.opendocument.graphics':
421
- return {
422
- ext: 'odg',
423
- mime: mimeType,
424
- };
425
- case 'application/vnd.oasis.opendocument.graphics-template':
426
- return {
427
- ext: 'otg',
428
- mime: mimeType,
429
- };
430
- case 'application/vnd.openxmlformats-officedocument.presentationml.slideshow':
431
- return {
432
- ext: 'ppsx',
433
- mime: mimeType,
434
- };
435
- case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
436
- return {
437
- ext: 'xlsx',
438
- mime: mimeType,
439
- };
440
- case 'application/vnd.ms-excel.sheet.macroenabled':
441
- return {
442
- ext: 'xlsm',
443
- mime: 'application/vnd.ms-excel.sheet.macroenabled.12',
444
- };
445
- case 'application/vnd.openxmlformats-officedocument.spreadsheetml.template':
446
- return {
447
- ext: 'xltx',
448
- mime: mimeType,
449
- };
450
- case 'application/vnd.ms-excel.template.macroenabled':
451
- return {
452
- ext: 'xltm',
453
- mime: 'application/vnd.ms-excel.template.macroenabled.12',
454
- };
455
- case 'application/vnd.ms-powerpoint.slideshow.macroenabled':
456
- return {
457
- ext: 'ppsm',
458
- mime: 'application/vnd.ms-powerpoint.slideshow.macroenabled.12',
459
- };
460
- case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
461
- return {
462
- ext: 'docx',
463
- mime: mimeType,
464
- };
465
- case 'application/vnd.ms-word.document.macroenabled':
466
- return {
467
- ext: 'docm',
468
- mime: 'application/vnd.ms-word.document.macroenabled.12',
469
- };
470
- case 'application/vnd.openxmlformats-officedocument.wordprocessingml.template':
471
- return {
472
- ext: 'dotx',
473
- mime: mimeType,
474
- };
475
- case 'application/vnd.ms-word.template.macroenabledtemplate':
476
- return {
477
- ext: 'dotm',
478
- mime: 'application/vnd.ms-word.template.macroenabled.12',
479
- };
480
- case 'application/vnd.openxmlformats-officedocument.presentationml.template':
481
- return {
482
- ext: 'potx',
483
- mime: mimeType,
484
- };
485
- case 'application/vnd.ms-powerpoint.template.macroenabled':
486
- return {
487
- ext: 'potm',
488
- mime: 'application/vnd.ms-powerpoint.template.macroenabled.12',
489
- };
490
- case 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
491
- return {
492
- ext: 'pptx',
493
- mime: mimeType,
494
- };
495
- case 'application/vnd.ms-powerpoint.presentation.macroenabled':
496
- return {
497
- ext: 'pptm',
498
- mime: 'application/vnd.ms-powerpoint.presentation.macroenabled.12',
499
- };
500
- case 'application/vnd.ms-visio.drawing':
501
- return {
502
- ext: 'vsdx',
503
- mime: 'application/vnd.visio',
504
- };
505
- case 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml':
506
- return {
507
- ext: '3mf',
508
- mime: 'model/3mf',
509
- };
510
- default:
511
- }
512
- }
513
-
514
- function _check(buffer, headers, options) {
515
- options = {
516
- offset: 0,
517
- ...options,
518
- };
519
-
520
- for (const [index, header] of headers.entries()) {
521
- // If a bitmask is set
522
- if (options.mask) {
523
- // If header doesn't equal `buf` with bits masked off
524
- if (header !== (options.mask[index] & buffer[index + options.offset])) {
525
- return false;
526
- }
527
- } else if (header !== buffer[index + options.offset]) {
528
- return false;
529
- }
530
- }
531
-
532
- return true;
533
- }
534
-
535
- export function normalizeSampleSize(sampleSize) {
536
- // Accept odd caller input, but preserve valid caller-requested probe depth.
537
- if (!Number.isFinite(sampleSize)) {
538
- return reasonableDetectionSizeInBytes;
539
- }
540
-
541
- return Math.max(1, Math.trunc(sampleSize));
542
- }
543
-
544
- function normalizeMpegOffsetTolerance(mpegOffsetTolerance) {
545
- // This value controls scan depth and therefore worst-case CPU work.
546
- if (!Number.isFinite(mpegOffsetTolerance)) {
547
- return 0;
548
- }
549
-
550
- return Math.max(0, Math.min(maximumMpegOffsetTolerance, Math.trunc(mpegOffsetTolerance)));
551
- }
552
-
553
- function getKnownFileSizeOrMaximum(fileSize) {
554
- if (!Number.isFinite(fileSize)) {
555
- return Number.MAX_SAFE_INTEGER;
556
- }
557
-
558
- return Math.max(0, fileSize);
559
- }
560
-
561
- function hasUnknownFileSize(tokenizer) {
562
- const fileSize = tokenizer.fileInfo.size;
563
- return (
564
- !Number.isFinite(fileSize)
565
- || fileSize === Number.MAX_SAFE_INTEGER
566
- );
567
- }
568
-
569
- function hasExceededUnknownSizeScanBudget(tokenizer, startOffset, maximumBytes) {
570
- return (
571
- hasUnknownFileSize(tokenizer)
572
- && tokenizer.position - startOffset > maximumBytes
573
- );
574
- }
575
-
576
- function getMaximumZipBufferedReadLength(tokenizer) {
577
- const fileSize = tokenizer.fileInfo.size;
578
- const remainingBytes = Number.isFinite(fileSize)
579
- ? Math.max(0, fileSize - tokenizer.position)
580
- : Number.MAX_SAFE_INTEGER;
581
-
582
- return Math.min(remainingBytes, maximumZipBufferedReadSizeInBytes);
583
- }
584
-
585
- function isRecoverableZipError(error) {
586
- if (error instanceof strtok3.EndOfStreamError) {
587
- return true;
588
- }
589
-
590
- if (error instanceof ParserHardLimitError) {
591
- return true;
592
- }
593
-
594
- if (!(error instanceof Error)) {
595
- return false;
596
- }
597
-
598
- if (recoverableZipErrorMessages.has(error.message)) {
599
- return true;
600
- }
601
-
602
- if (recoverableZipErrorCodes.has(error.code)) {
603
- return true;
604
- }
605
-
606
- for (const prefix of recoverableZipErrorMessagePrefixes) {
607
- if (error.message.startsWith(prefix)) {
608
- return true;
609
- }
610
- }
611
-
612
- return false;
613
- }
614
-
615
- function canReadZipEntryForDetection(zipHeader, maximumSize = maximumZipEntrySizeInBytes) {
616
- const sizes = [zipHeader.compressedSize, zipHeader.uncompressedSize];
617
- for (const size of sizes) {
618
- if (
619
- !Number.isFinite(size)
620
- || size < 0
621
- || size > maximumSize
622
- ) {
623
- return false;
624
- }
625
- }
626
-
627
- return true;
628
- }
629
-
630
- function createOpenXmlZipDetectionState() {
631
- return {
632
- hasContentTypesEntry: false,
633
- hasParsedContentTypesEntry: false,
634
- isParsingContentTypes: false,
635
- hasUnparseableContentTypes: false,
636
- hasWordDirectory: false,
637
- hasPresentationDirectory: false,
638
- hasSpreadsheetDirectory: false,
639
- hasThreeDimensionalModelEntry: false,
640
- };
641
- }
642
-
643
- function updateOpenXmlZipDetectionStateFromFilename(openXmlState, filename) {
644
- if (filename.startsWith('word/')) {
645
- openXmlState.hasWordDirectory = true;
646
- }
647
-
648
- if (filename.startsWith('ppt/')) {
649
- openXmlState.hasPresentationDirectory = true;
650
- }
651
-
652
- if (filename.startsWith('xl/')) {
653
- openXmlState.hasSpreadsheetDirectory = true;
654
- }
655
-
656
- if (
657
- filename.startsWith('3D/')
658
- && filename.endsWith('.model')
659
- ) {
660
- openXmlState.hasThreeDimensionalModelEntry = true;
661
- }
662
- }
663
-
664
- function getOpenXmlFileTypeFromZipEntries(openXmlState) {
665
- // Only use directory-name heuristic when [Content_Types].xml was present in the archive
666
- // but its handler was skipped (not invoked, not currently running, and not already resolved).
667
- // This avoids guessing from directory names when content-type parsing already gave a definitive answer or failed.
668
- if (
669
- !openXmlState.hasContentTypesEntry
670
- || openXmlState.hasUnparseableContentTypes
671
- || openXmlState.isParsingContentTypes
672
- || openXmlState.hasParsedContentTypesEntry
673
- ) {
674
- return;
675
- }
676
-
677
- if (openXmlState.hasWordDirectory) {
678
- return {
679
- ext: 'docx',
680
- mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
681
- };
682
- }
683
-
684
- if (openXmlState.hasPresentationDirectory) {
685
- return {
686
- ext: 'pptx',
687
- mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
688
- };
689
- }
690
-
691
- if (openXmlState.hasSpreadsheetDirectory) {
692
- return {
693
- ext: 'xlsx',
694
- mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
695
- };
696
- }
697
-
698
- if (openXmlState.hasThreeDimensionalModelEntry) {
699
- return {
700
- ext: '3mf',
701
- mime: 'model/3mf',
702
- };
703
- }
704
- }
705
-
706
- function getOpenXmlMimeTypeFromContentTypesXml(xmlContent) {
707
- // We only need the `ContentType="...main+xml"` value, so a small string scan is enough and avoids full XML parsing.
708
- const endPosition = xmlContent.indexOf('.main+xml"');
709
- if (endPosition === -1) {
710
- const mimeType = 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml';
711
- if (xmlContent.includes(`ContentType="${mimeType}"`)) {
712
- return mimeType;
713
- }
714
-
715
- return;
716
- }
717
-
718
- const truncatedContent = xmlContent.slice(0, endPosition);
719
- const firstQuotePosition = truncatedContent.lastIndexOf('"');
720
- // If no quote is found, `lastIndexOf` returns -1 and this intentionally falls back to the full truncated prefix.
721
- return truncatedContent.slice(firstQuotePosition + 1);
722
- }
723
-
724
158
  export async function fileTypeFromTokenizer(tokenizer, options) {
725
159
  return new FileTypeParser(options).fromTokenizer(tokenizer);
726
160
  }
@@ -752,7 +186,11 @@ export class FileTypeParser {
752
186
  };
753
187
  }
754
188
 
755
- async fromTokenizer(tokenizer, detectionReentryCount = 0) {
189
+ createTokenizerFromWebStream(stream) {
190
+ return strtok3.fromWebStream(toDefaultStream(stream), this.getTokenizerOptions());
191
+ }
192
+
193
+ async parseTokenizer(tokenizer, detectionReentryCount = 0) {
756
194
  this.detectionReentryCount = detectionReentryCount;
757
195
  const initialPosition = tokenizer.position;
758
196
  // Iterate through all file-type detectors
@@ -782,6 +220,14 @@ export class FileTypeParser {
782
220
  }
783
221
  }
784
222
 
223
+ async fromTokenizer(tokenizer) {
224
+ try {
225
+ return await this.parseTokenizer(tokenizer);
226
+ } finally {
227
+ await tokenizer.close();
228
+ }
229
+ }
230
+
785
231
  async fromBuffer(input) {
786
232
  if (!(input instanceof Uint8Array || input instanceof ArrayBuffer)) {
787
233
  throw new TypeError(`Expected the \`input\` argument to be of type \`Uint8Array\` or \`ArrayBuffer\`, got \`${typeof input}\``);
@@ -797,58 +243,107 @@ export class FileTypeParser {
797
243
  }
798
244
 
799
245
  async fromBlob(blob) {
246
+ this.options.signal?.throwIfAborted();
800
247
  const tokenizer = strtok3.fromBlob(blob, this.getTokenizerOptions());
801
- try {
802
- return await this.fromTokenizer(tokenizer);
803
- } finally {
804
- await tokenizer.close();
805
- }
248
+ return this.fromTokenizer(tokenizer);
806
249
  }
807
250
 
808
251
  async fromStream(stream) {
809
- const tokenizer = strtok3.fromWebStream(stream, this.getTokenizerOptions());
810
- try {
811
- return await this.fromTokenizer(tokenizer);
812
- } finally {
813
- await tokenizer.close();
252
+ this.options.signal?.throwIfAborted();
253
+ const tokenizer = this.createTokenizerFromWebStream(stream);
254
+ return this.fromTokenizer(tokenizer);
255
+ }
256
+
257
+ async fromFile(path) {
258
+ this.options.signal?.throwIfAborted();
259
+ // TODO: Remove this when `strtok3.fromFile()` safely rejects non-regular filesystem objects without a pathname race.
260
+ const [{default: fsPromises}, {FileTokenizer}] = await Promise.all([
261
+ import('node:fs/promises'),
262
+ import('strtok3'),
263
+ ]);
264
+ const fileHandle = await fsPromises.open(path, fsPromises.constants.O_RDONLY | fsPromises.constants.O_NONBLOCK);
265
+ const fileStat = await fileHandle.stat();
266
+ if (!fileStat.isFile()) {
267
+ await fileHandle.close();
268
+ return;
814
269
  }
270
+
271
+ const tokenizer = new FileTokenizer(fileHandle, {
272
+ ...this.getTokenizerOptions(),
273
+ fileInfo: {path, size: fileStat.size},
274
+ });
275
+ return this.fromTokenizer(tokenizer);
815
276
  }
816
277
 
817
278
  async toDetectionStream(stream, options) {
279
+ this.options.signal?.throwIfAborted();
818
280
  const sampleSize = normalizeSampleSize(options?.sampleSize ?? reasonableDetectionSizeInBytes);
819
281
  let detectedFileType;
820
- let firstChunk;
282
+ let streamEnded = false;
821
283
 
822
- const reader = stream.getReader({mode: 'byob'});
823
- try {
824
- // Read the first chunk from the stream
825
- const {value: chunk, done} = await reader.read(new Uint8Array(sampleSize));
826
- firstChunk = chunk;
827
- if (!done && chunk) {
828
- try {
829
- // Attempt to detect the file type from the chunk
830
- detectedFileType = await this.fromBuffer(chunk.subarray(0, sampleSize));
831
- } catch (error) {
832
- if (!(error instanceof strtok3.EndOfStreamError)) {
833
- throw error; // Re-throw non-EndOfStreamError
834
- }
284
+ const reader = stream.getReader();
285
+ const chunks = [];
286
+ let totalSize = 0;
835
287
 
836
- detectedFileType = undefined;
288
+ try {
289
+ while (totalSize < sampleSize) {
290
+ const {value, done} = await readWithSignal(reader, this.options.signal);
291
+ if (done || !value) {
292
+ streamEnded = true;
293
+ break;
837
294
  }
295
+
296
+ chunks.push(value);
297
+ totalSize += value.length;
838
298
  }
839
299
 
840
- firstChunk = chunk;
300
+ if (
301
+ !streamEnded
302
+ && totalSize === sampleSize
303
+ ) {
304
+ const {value, done} = await readWithSignal(reader, this.options.signal);
305
+ if (done || !value) {
306
+ streamEnded = true;
307
+ } else {
308
+ chunks.push(value);
309
+ totalSize += value.length;
310
+ }
311
+ }
841
312
  } finally {
842
- reader.releaseLock(); // Ensure the reader is released
313
+ reader.releaseLock();
314
+ }
315
+
316
+ if (totalSize > 0) {
317
+ const sample = chunks.length === 1 ? chunks[0] : concatUint8Arrays(chunks);
318
+ try {
319
+ detectedFileType = await this.fromBuffer(sample.subarray(0, sampleSize));
320
+ } catch (error) {
321
+ if (!(error instanceof strtok3.EndOfStreamError)) {
322
+ throw error;
323
+ }
324
+
325
+ detectedFileType = undefined;
326
+ }
327
+
328
+ if (
329
+ !streamEnded
330
+ && detectedFileType?.ext === 'pages'
331
+ ) {
332
+ detectedFileType = {
333
+ ext: 'zip',
334
+ mime: 'application/zip',
335
+ };
336
+ }
843
337
  }
844
338
 
845
- // Create a new ReadableStream to manage locking issues
339
+ // Prepend collected chunks and pipe the rest through
846
340
  const transformStream = new TransformStream({
847
- async start(controller) {
848
- controller.enqueue(firstChunk); // Enqueue the initial chunk
341
+ start(controller) {
342
+ for (const chunk of chunks) {
343
+ controller.enqueue(chunk);
344
+ }
849
345
  },
850
346
  transform(chunk, controller) {
851
- // Pass through the chunks without modification
852
347
  controller.enqueue(chunk);
853
348
  },
854
349
  });
@@ -859,8 +354,72 @@ export class FileTypeParser {
859
354
  return newStream;
860
355
  }
861
356
 
357
+ async detectGzip(tokenizer) {
358
+ if (this.gzipProbeDepth >= maximumNestedGzipProbeDepth) {
359
+ return {
360
+ ext: 'gz',
361
+ mime: 'application/gzip',
362
+ };
363
+ }
364
+
365
+ const gzipHandler = new GzipHandler(tokenizer);
366
+ const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes);
367
+ const hasUnknownSize = hasUnknownFileSize(tokenizer);
368
+ let timeout;
369
+ let probeSignal;
370
+ let probeParser;
371
+ let compressedFileType;
372
+
373
+ if (hasUnknownSize) {
374
+ const timeoutController = new AbortController();
375
+ timeout = setTimeout(() => {
376
+ timeoutController.abort(new DOMException(`Operation timed out after ${unknownSizeGzipProbeTimeoutInMilliseconds} ms`, 'TimeoutError'));
377
+ }, unknownSizeGzipProbeTimeoutInMilliseconds);
378
+ probeSignal = this.options.signal === undefined
379
+ ? timeoutController.signal
380
+ : AbortSignal.any([this.options.signal, timeoutController.signal]);
381
+ probeParser = new FileTypeParser({
382
+ ...this.options,
383
+ signal: probeSignal,
384
+ });
385
+ probeParser.gzipProbeDepth = this.gzipProbeDepth + 1;
386
+ } else {
387
+ this.gzipProbeDepth++;
388
+ }
389
+
390
+ try {
391
+ compressedFileType = await (probeParser ?? this).fromStream(limitedInflatedStream);
392
+ } catch (error) {
393
+ if (
394
+ error?.name === 'AbortError'
395
+ && probeSignal?.reason?.name !== 'TimeoutError'
396
+ ) {
397
+ throw error;
398
+ }
399
+
400
+ // Timeout, decompression, or inner-detection failures are expected for non-tar gzip files.
401
+ } finally {
402
+ clearTimeout(timeout);
403
+ if (!hasUnknownSize) {
404
+ this.gzipProbeDepth--;
405
+ }
406
+ }
407
+
408
+ if (compressedFileType?.ext === 'tar') {
409
+ return {
410
+ ext: 'tar.gz',
411
+ mime: 'application/gzip',
412
+ };
413
+ }
414
+
415
+ return {
416
+ ext: 'gz',
417
+ mime: 'application/gzip',
418
+ };
419
+ }
420
+
862
421
  check(header, options) {
863
- return _check(this.buffer, header, options);
422
+ return checkBytes(this.buffer, header, options);
864
423
  }
865
424
 
866
425
  checkString(header, options) {
@@ -878,6 +437,13 @@ export class FileTypeParser {
878
437
 
879
438
  this.tokenizer = tokenizer;
880
439
 
440
+ if (hasUnknownFileSize(tokenizer)) {
441
+ await tokenizer.peekBuffer(this.buffer, {length: 3, mayBeLess: true});
442
+ if (this.check([0x1F, 0x8B, 0x8])) {
443
+ return this.detectGzip(tokenizer);
444
+ }
445
+ }
446
+
881
447
  await tokenizer.peekBuffer(this.buffer, {length: 32, mayBeLess: true});
882
448
 
883
449
  // -- 2-byte signatures --
@@ -981,41 +547,7 @@ export class FileTypeParser {
981
547
  }
982
548
 
983
549
  if (this.check([0x1F, 0x8B, 0x8])) {
984
- if (this.gzipProbeDepth >= maximumNestedGzipProbeDepth) {
985
- return {
986
- ext: 'gz',
987
- mime: 'application/gzip',
988
- };
989
- }
990
-
991
- const gzipHandler = new GzipHandler(tokenizer);
992
- const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes);
993
- let compressedFileType;
994
- try {
995
- this.gzipProbeDepth++;
996
- compressedFileType = await this.fromStream(limitedInflatedStream);
997
- } catch (error) {
998
- if (error?.name === 'AbortError') {
999
- throw error;
1000
- }
1001
-
1002
- // Decompression or inner-detection failures are expected for non-tar gzip files.
1003
- } finally {
1004
- this.gzipProbeDepth--;
1005
- }
1006
-
1007
- // We only need enough inflated bytes to confidently decide whether this is tar.gz.
1008
- if (compressedFileType?.ext === 'tar') {
1009
- return {
1010
- ext: 'tar.gz',
1011
- mime: 'application/gzip',
1012
- };
1013
- }
1014
-
1015
- return {
1016
- ext: 'gz',
1017
- mime: 'application/gzip',
1018
- };
550
+ return this.detectGzip(tokenizer);
1019
551
  }
1020
552
 
1021
553
  if (this.check([0x42, 0x5A, 0x68])) {
@@ -1034,7 +566,7 @@ export class FileTypeParser {
1034
566
  const isUnknownFileSize = hasUnknownFileSize(tokenizer);
1035
567
  if (
1036
568
  !Number.isFinite(id3HeaderLength)
1037
- || id3HeaderLength < 0
569
+ || id3HeaderLength < 0
1038
570
  // Keep ID3 probing bounded for unknown-size streams to avoid attacker-controlled large skips.
1039
571
  || (
1040
572
  isUnknownFileSize
@@ -1076,7 +608,7 @@ export class FileTypeParser {
1076
608
  }
1077
609
 
1078
610
  this.detectionReentryCount++;
1079
- return this.fromTokenizer(tokenizer, this.detectionReentryCount); // Skip ID3 header, recursion
611
+ return this.parseTokenizer(tokenizer, this.detectionReentryCount); // Skip ID3 header, recursion
1080
612
  }
1081
613
 
1082
614
  // Musepack, SV7
@@ -1160,108 +692,7 @@ export class FileTypeParser {
1160
692
  // Zip-based file formats
1161
693
  // Need to be before the `zip` check
1162
694
  if (this.check([0x50, 0x4B, 0x3, 0x4])) { // Local file header signature
1163
- let fileType;
1164
- const openXmlState = createOpenXmlZipDetectionState();
1165
-
1166
- try {
1167
- await new ZipHandler(tokenizer).unzip(zipHeader => {
1168
- updateOpenXmlZipDetectionStateFromFilename(openXmlState, zipHeader.filename);
1169
-
1170
- const isOpenXmlContentTypesEntry = zipHeader.filename === '[Content_Types].xml';
1171
- const openXmlFileTypeFromEntries = getOpenXmlFileTypeFromZipEntries(openXmlState);
1172
- if (
1173
- !isOpenXmlContentTypesEntry
1174
- && openXmlFileTypeFromEntries
1175
- ) {
1176
- fileType = openXmlFileTypeFromEntries;
1177
- return {
1178
- stop: true,
1179
- };
1180
- }
1181
-
1182
- switch (zipHeader.filename) {
1183
- case 'META-INF/mozilla.rsa':
1184
- fileType = {
1185
- ext: 'xpi',
1186
- mime: 'application/x-xpinstall',
1187
- };
1188
- return {
1189
- stop: true,
1190
- };
1191
- case 'META-INF/MANIFEST.MF':
1192
- fileType = {
1193
- ext: 'jar',
1194
- mime: 'application/java-archive',
1195
- };
1196
- return {
1197
- stop: true,
1198
- };
1199
- case 'mimetype':
1200
- if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
1201
- return {};
1202
- }
1203
-
1204
- return {
1205
- async handler(fileData) {
1206
- // Use TextDecoder to decode the UTF-8 encoded data
1207
- const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
1208
- fileType = getFileTypeFromMimeType(mimeType);
1209
- },
1210
- stop: true,
1211
- };
1212
-
1213
- case '[Content_Types].xml': {
1214
- openXmlState.hasContentTypesEntry = true;
1215
-
1216
- if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
1217
- openXmlState.hasUnparseableContentTypes = true;
1218
- return {};
1219
- }
1220
-
1221
- openXmlState.isParsingContentTypes = true;
1222
- return {
1223
- async handler(fileData) {
1224
- // Use TextDecoder to decode the UTF-8 encoded data
1225
- const xmlContent = new TextDecoder('utf-8').decode(fileData);
1226
- const mimeType = getOpenXmlMimeTypeFromContentTypesXml(xmlContent);
1227
- if (mimeType) {
1228
- fileType = getFileTypeFromMimeType(mimeType);
1229
- }
1230
-
1231
- openXmlState.hasParsedContentTypesEntry = true;
1232
- openXmlState.isParsingContentTypes = false;
1233
- },
1234
- stop: true,
1235
- };
1236
- }
1237
-
1238
- default:
1239
- if (/classes\d*\.dex/.test(zipHeader.filename)) {
1240
- fileType = {
1241
- ext: 'apk',
1242
- mime: 'application/vnd.android.package-archive',
1243
- };
1244
- return {stop: true};
1245
- }
1246
-
1247
- return {};
1248
- }
1249
- });
1250
- } catch (error) {
1251
- if (!isRecoverableZipError(error)) {
1252
- throw error;
1253
- }
1254
-
1255
- if (openXmlState.isParsingContentTypes) {
1256
- openXmlState.isParsingContentTypes = false;
1257
- openXmlState.hasUnparseableContentTypes = true;
1258
- }
1259
- }
1260
-
1261
- return fileType ?? getOpenXmlFileTypeFromZipEntries(openXmlState) ?? {
1262
- ext: 'zip',
1263
- mime: 'application/zip',
1264
- };
695
+ return detectZip(tokenizer);
1265
696
  }
1266
697
 
1267
698
  if (this.checkString('OggS')) {
@@ -1271,7 +702,7 @@ export class FileTypeParser {
1271
702
  await tokenizer.readBuffer(type);
1272
703
 
1273
704
  // Needs to be before `ogg` check
1274
- if (_check(type, [0x4F, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64])) {
705
+ if (checkBytes(type, [0x4F, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64])) {
1275
706
  return {
1276
707
  ext: 'opus',
1277
708
  mime: 'audio/ogg; codecs=opus',
@@ -1279,7 +710,7 @@ export class FileTypeParser {
1279
710
  }
1280
711
 
1281
712
  // If ' theora' in header.
1282
- if (_check(type, [0x80, 0x74, 0x68, 0x65, 0x6F, 0x72, 0x61])) {
713
+ if (checkBytes(type, [0x80, 0x74, 0x68, 0x65, 0x6F, 0x72, 0x61])) {
1283
714
  return {
1284
715
  ext: 'ogv',
1285
716
  mime: 'video/ogg',
@@ -1287,7 +718,7 @@ export class FileTypeParser {
1287
718
  }
1288
719
 
1289
720
  // If '\x01video' in header.
1290
- if (_check(type, [0x01, 0x76, 0x69, 0x64, 0x65, 0x6F, 0x00])) {
721
+ if (checkBytes(type, [0x01, 0x76, 0x69, 0x64, 0x65, 0x6F, 0x00])) {
1291
722
  return {
1292
723
  ext: 'ogm',
1293
724
  mime: 'video/ogg',
@@ -1295,7 +726,7 @@ export class FileTypeParser {
1295
726
  }
1296
727
 
1297
728
  // If ' FLAC' in header https://xiph.org/flac/faq.html
1298
- if (_check(type, [0x7F, 0x46, 0x4C, 0x41, 0x43])) {
729
+ if (checkBytes(type, [0x7F, 0x46, 0x4C, 0x41, 0x43])) {
1299
730
  return {
1300
731
  ext: 'oga',
1301
732
  mime: 'audio/ogg',
@@ -1303,7 +734,7 @@ export class FileTypeParser {
1303
734
  }
1304
735
 
1305
736
  // 'Speex ' in header https://en.wikipedia.org/wiki/Speex
1306
- if (_check(type, [0x53, 0x70, 0x65, 0x65, 0x78, 0x20, 0x20])) {
737
+ if (checkBytes(type, [0x53, 0x70, 0x65, 0x65, 0x78, 0x20, 0x20])) {
1307
738
  return {
1308
739
  ext: 'spx',
1309
740
  mime: 'audio/ogg',
@@ -1311,7 +742,7 @@ export class FileTypeParser {
1311
742
  }
1312
743
 
1313
744
  // If '\x01vorbis' in header
1314
- if (_check(type, [0x01, 0x76, 0x6F, 0x72, 0x62, 0x69, 0x73])) {
745
+ if (checkBytes(type, [0x01, 0x76, 0x6F, 0x72, 0x62, 0x69, 0x73])) {
1315
746
  return {
1316
747
  ext: 'ogg',
1317
748
  mime: 'audio/ogg',
@@ -1387,7 +818,7 @@ export class FileTypeParser {
1387
818
  if (this.checkString('LZIP')) {
1388
819
  return {
1389
820
  ext: 'lz',
1390
- mime: 'application/x-lzip',
821
+ mime: 'application/lzip',
1391
822
  };
1392
823
  }
1393
824
 
@@ -1452,110 +883,7 @@ export class FileTypeParser {
1452
883
 
1453
884
  // https://github.com/file/file/blob/master/magic/Magdir/matroska
1454
885
  if (this.check([0x1A, 0x45, 0xDF, 0xA3])) { // Root element: EBML
1455
- async function readField() {
1456
- const msb = await tokenizer.peekNumber(Token.UINT8);
1457
- let mask = 0x80;
1458
- let ic = 0; // 0 = A, 1 = B, 2 = C, 3 = D
1459
-
1460
- while ((msb & mask) === 0 && mask !== 0) {
1461
- ++ic;
1462
- mask >>= 1;
1463
- }
1464
-
1465
- const id = new Uint8Array(ic + 1);
1466
- await safeReadBuffer(tokenizer, id, undefined, {
1467
- maximumLength: id.length,
1468
- reason: 'EBML field',
1469
- });
1470
- return id;
1471
- }
1472
-
1473
- async function readElement() {
1474
- const idField = await readField();
1475
- const lengthField = await readField();
1476
-
1477
- lengthField[0] ^= 0x80 >> (lengthField.length - 1);
1478
- const nrLength = Math.min(6, lengthField.length); // JavaScript can max read 6 bytes integer
1479
-
1480
- const idView = new DataView(idField.buffer);
1481
- const lengthView = new DataView(lengthField.buffer, lengthField.length - nrLength, nrLength);
1482
-
1483
- return {
1484
- id: getUintBE(idView),
1485
- len: getUintBE(lengthView),
1486
- };
1487
- }
1488
-
1489
- async function readChildren(children) {
1490
- let ebmlElementCount = 0;
1491
- while (children > 0) {
1492
- ebmlElementCount++;
1493
- if (ebmlElementCount > maximumEbmlElementCount) {
1494
- return;
1495
- }
1496
-
1497
- if (hasExceededUnknownSizeScanBudget(tokenizer, ebmlScanStart, maximumUntrustedSkipSizeInBytes)) {
1498
- return;
1499
- }
1500
-
1501
- const previousPosition = tokenizer.position;
1502
- const element = await readElement();
1503
-
1504
- if (element.id === 0x42_82) {
1505
- // `DocType` is a short string ("webm", "matroska", ...), reject implausible lengths to avoid large allocations.
1506
- if (element.len > maximumEbmlDocumentTypeSizeInBytes) {
1507
- return;
1508
- }
1509
-
1510
- const documentTypeLength = getSafeBound(element.len, maximumEbmlDocumentTypeSizeInBytes, 'EBML DocType');
1511
- const rawValue = await tokenizer.readToken(new Token.StringType(documentTypeLength));
1512
- return rawValue.replaceAll(/\00.*$/g, ''); // Return DocType
1513
- }
1514
-
1515
- if (
1516
- hasUnknownFileSize(tokenizer)
1517
- && (
1518
- !Number.isFinite(element.len)
1519
- || element.len < 0
1520
- || element.len > maximumEbmlElementPayloadSizeInBytes
1521
- )
1522
- ) {
1523
- return;
1524
- }
1525
-
1526
- await safeIgnore(tokenizer, element.len, {
1527
- maximumLength: hasUnknownFileSize(tokenizer) ? maximumEbmlElementPayloadSizeInBytes : tokenizer.fileInfo.size,
1528
- reason: 'EBML payload',
1529
- }); // ignore payload
1530
- --children;
1531
-
1532
- // Safeguard against malformed files: bail if the position did not advance.
1533
- if (tokenizer.position <= previousPosition) {
1534
- return;
1535
- }
1536
- }
1537
- }
1538
-
1539
- const rootElement = await readElement();
1540
- const ebmlScanStart = tokenizer.position;
1541
- const documentType = await readChildren(rootElement.len);
1542
-
1543
- switch (documentType) {
1544
- case 'webm':
1545
- return {
1546
- ext: 'webm',
1547
- mime: 'video/webm',
1548
- };
1549
-
1550
- case 'matroska':
1551
- return {
1552
- ext: 'mkv',
1553
- mime: 'video/matroska',
1554
- };
1555
-
1556
- default:
1557
- return;
1558
- }
886
+ return detectEbml(tokenizer);
1559
887
  }
1560
888
 
1561
889
  if (this.checkString('SQLi')) {
@@ -1653,7 +981,7 @@ export class FileTypeParser {
1653
981
  if (this.check([0x04, 0x22, 0x4D, 0x18])) {
1654
982
  return {
1655
983
  ext: 'lz4',
1656
- mime: 'application/x-lz4', // Invented by us
984
+ mime: 'application/x-lz4', // Informal, used by freedesktop.org shared-mime-info
1657
985
  };
1658
986
  }
1659
987
 
@@ -1688,7 +1016,7 @@ export class FileTypeParser {
1688
1016
  };
1689
1017
  }
1690
1018
 
1691
- if (this.checkString('{\\rtf')) {
1019
+ if (this.checkString(String.raw`{\rtf`)) {
1692
1020
  return {
1693
1021
  ext: 'rtf',
1694
1022
  mime: 'application/rtf',
@@ -1789,7 +1117,7 @@ export class FileTypeParser {
1789
1117
  if (this.checkString('DRACO')) {
1790
1118
  return {
1791
1119
  ext: 'drc',
1792
- mime: 'application/vnd.google.draco', // Invented by us
1120
+ mime: 'application/x-ft-draco',
1793
1121
  };
1794
1122
  }
1795
1123
 
@@ -1835,7 +1163,7 @@ export class FileTypeParser {
1835
1163
 
1836
1164
  if (this.checkString('AC')) {
1837
1165
  const version = new Token.StringType(4, 'latin1').get(this.buffer, 2);
1838
- if (version.match('^d*') && version >= 1000 && version <= 1050) {
1166
+ if (/^\d+$/v.test(version) && version >= 1000 && version <= 1050) {
1839
1167
  return {
1840
1168
  ext: 'dwg',
1841
1169
  mime: 'image/vnd.dwg',
@@ -1890,110 +1218,7 @@ export class FileTypeParser {
1890
1218
  // -- 8-byte signatures --
1891
1219
 
1892
1220
  if (this.check([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])) {
1893
- const pngFileType = {
1894
- ext: 'png',
1895
- mime: 'image/png',
1896
- };
1897
-
1898
- const apngFileType = {
1899
- ext: 'apng',
1900
- mime: 'image/apng',
1901
- };
1902
-
1903
- // APNG format (https://wiki.mozilla.org/APNG_Specification)
1904
- // 1. Find the first IDAT (image data) chunk (49 44 41 54)
1905
- // 2. Check if there is an "acTL" chunk before the IDAT one (61 63 54 4C)
1906
-
1907
- // Offset calculated as follows:
1908
- // - 8 bytes: PNG signature
1909
- // - 4 (length) + 4 (chunk type) + 13 (chunk data) + 4 (CRC): IHDR chunk
1910
-
1911
- await tokenizer.ignore(8); // ignore PNG signature
1912
-
1913
- async function readChunkHeader() {
1914
- return {
1915
- length: await tokenizer.readToken(Token.INT32_BE),
1916
- type: await tokenizer.readToken(new Token.StringType(4, 'latin1')),
1917
- };
1918
- }
1919
-
1920
- const isUnknownPngStream = hasUnknownFileSize(tokenizer);
1921
- const pngScanStart = tokenizer.position;
1922
- let pngChunkCount = 0;
1923
- let hasSeenImageHeader = false;
1924
- do {
1925
- pngChunkCount++;
1926
- if (pngChunkCount > maximumPngChunkCount) {
1927
- break;
1928
- }
1929
-
1930
- if (hasExceededUnknownSizeScanBudget(tokenizer, pngScanStart, maximumPngStreamScanBudgetInBytes)) {
1931
- break;
1932
- }
1933
-
1934
- const previousPosition = tokenizer.position;
1935
- const chunk = await readChunkHeader();
1936
- if (chunk.length < 0) {
1937
- return; // Invalid chunk length
1938
- }
1939
-
1940
- if (chunk.type === 'IHDR') {
1941
- // PNG requires the first real image header to be a 13-byte IHDR chunk.
1942
- if (chunk.length !== 13) {
1943
- return;
1944
- }
1945
-
1946
- hasSeenImageHeader = true;
1947
- }
1948
-
1949
- switch (chunk.type) {
1950
- case 'IDAT':
1951
- return pngFileType;
1952
- case 'acTL':
1953
- return apngFileType;
1954
- default:
1955
- if (
1956
- !hasSeenImageHeader
1957
- && chunk.type !== 'CgBI'
1958
- ) {
1959
- return;
1960
- }
1961
-
1962
- if (
1963
- isUnknownPngStream
1964
- && chunk.length > maximumPngChunkSizeInBytes
1965
- ) {
1966
- // Avoid huge attacker-controlled skips when probing unknown-size streams.
1967
- return hasSeenImageHeader && isPngAncillaryChunk(chunk.type) ? pngFileType : undefined;
1968
- }
1969
-
1970
- try {
1971
- await safeIgnore(tokenizer, chunk.length + 4, {
1972
- maximumLength: isUnknownPngStream ? maximumPngChunkSizeInBytes + 4 : tokenizer.fileInfo.size,
1973
- reason: 'PNG chunk payload',
1974
- }); // Ignore chunk-data + CRC
1975
- } catch (error) {
1976
- if (
1977
- !isUnknownPngStream
1978
- && (
1979
- error instanceof ParserHardLimitError
1980
- || error instanceof strtok3.EndOfStreamError
1981
- )
1982
- ) {
1983
- return pngFileType;
1984
- }
1985
-
1986
- throw error;
1987
- }
1988
- }
1989
-
1990
- // Safeguard against malformed files: bail if the position did not advance.
1991
- if (tokenizer.position <= previousPosition) {
1992
- break;
1993
- }
1994
- } while (tokenizer.position + 8 < tokenizer.fileInfo.size);
1995
-
1996
- return pngFileType;
1221
+ return detectPng(tokenizer);
1997
1222
  }
1998
1223
 
1999
1224
  if (this.check([0x41, 0x52, 0x52, 0x4F, 0x57, 0x31, 0x00, 0x00])) {
@@ -2151,116 +1376,7 @@ export class FileTypeParser {
2151
1376
 
2152
1377
  // ASF_Header_Object first 80 bytes
2153
1378
  if (this.check([0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11, 0xA6, 0xD9])) {
2154
- let isMalformedAsf = false;
2155
- try {
2156
- async function readHeader() {
2157
- const guid = new Uint8Array(16);
2158
- await safeReadBuffer(tokenizer, guid, undefined, {
2159
- maximumLength: guid.length,
2160
- reason: 'ASF header GUID',
2161
- });
2162
- return {
2163
- id: guid,
2164
- size: Number(await tokenizer.readToken(Token.UINT64_LE)),
2165
- };
2166
- }
2167
-
2168
- await safeIgnore(tokenizer, 30, {
2169
- maximumLength: 30,
2170
- reason: 'ASF header prelude',
2171
- });
2172
- const isUnknownFileSize = hasUnknownFileSize(tokenizer);
2173
- const asfHeaderScanStart = tokenizer.position;
2174
- let asfHeaderObjectCount = 0;
2175
- while (tokenizer.position + 24 < tokenizer.fileInfo.size) {
2176
- asfHeaderObjectCount++;
2177
- if (asfHeaderObjectCount > maximumAsfHeaderObjectCount) {
2178
- break;
2179
- }
2180
-
2181
- if (hasExceededUnknownSizeScanBudget(tokenizer, asfHeaderScanStart, maximumUntrustedSkipSizeInBytes)) {
2182
- break;
2183
- }
2184
-
2185
- const previousPosition = tokenizer.position;
2186
- const header = await readHeader();
2187
- let payload = header.size - 24;
2188
- if (
2189
- !Number.isFinite(payload)
2190
- || payload < 0
2191
- ) {
2192
- isMalformedAsf = true;
2193
- break;
2194
- }
2195
-
2196
- if (_check(header.id, [0x91, 0x07, 0xDC, 0xB7, 0xB7, 0xA9, 0xCF, 0x11, 0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65])) {
2197
- // Sync on Stream-Properties-Object (B7DC0791-A9B7-11CF-8EE6-00C00C205365)
2198
- const typeId = new Uint8Array(16);
2199
- payload -= await safeReadBuffer(tokenizer, typeId, undefined, {
2200
- maximumLength: typeId.length,
2201
- reason: 'ASF stream type GUID',
2202
- });
2203
-
2204
- if (_check(typeId, [0x40, 0x9E, 0x69, 0xF8, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
2205
- // Found audio:
2206
- return {
2207
- ext: 'asf',
2208
- mime: 'audio/x-ms-asf',
2209
- };
2210
- }
2211
-
2212
- if (_check(typeId, [0xC0, 0xEF, 0x19, 0xBC, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
2213
- // Found video:
2214
- return {
2215
- ext: 'asf',
2216
- mime: 'video/x-ms-asf',
2217
- };
2218
- }
2219
-
2220
- break;
2221
- }
2222
-
2223
- if (
2224
- isUnknownFileSize
2225
- && payload > maximumAsfHeaderPayloadSizeInBytes
2226
- ) {
2227
- isMalformedAsf = true;
2228
- break;
2229
- }
2230
-
2231
- await safeIgnore(tokenizer, payload, {
2232
- maximumLength: isUnknownFileSize ? maximumAsfHeaderPayloadSizeInBytes : tokenizer.fileInfo.size,
2233
- reason: 'ASF header payload',
2234
- });
2235
-
2236
- // Safeguard against malformed files: break if the position did not advance.
2237
- if (tokenizer.position <= previousPosition) {
2238
- isMalformedAsf = true;
2239
- break;
2240
- }
2241
- }
2242
- } catch (error) {
2243
- if (
2244
- error instanceof strtok3.EndOfStreamError
2245
- || error instanceof ParserHardLimitError
2246
- ) {
2247
- if (hasUnknownFileSize(tokenizer)) {
2248
- isMalformedAsf = true;
2249
- }
2250
- } else {
2251
- throw error;
2252
- }
2253
- }
2254
-
2255
- if (isMalformedAsf) {
2256
- return;
2257
- }
2258
-
2259
- // Default to ASF generic extension
2260
- return {
2261
- ext: 'asf',
2262
- mime: 'application/vnd.ms-asf',
2263
- };
1379
+ return detectAsf(tokenizer);
2264
1380
  }
2265
1381
 
2266
1382
  if (this.check([0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A])) {
@@ -2474,21 +1590,21 @@ export class FileTypeParser {
2474
1590
  if (this.check([0x4C, 0x00, 0x00, 0x00, 0x01, 0x14, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46])) {
2475
1591
  return {
2476
1592
  ext: 'lnk',
2477
- mime: 'application/x.ms.shortcut', // Invented by us
1593
+ mime: 'application/x-ms-shortcut', // Informal, used by freedesktop.org shared-mime-info
2478
1594
  };
2479
1595
  }
2480
1596
 
2481
1597
  if (this.check([0x62, 0x6F, 0x6F, 0x6B, 0x00, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x72, 0x6B, 0x00, 0x00, 0x00, 0x00])) {
2482
1598
  return {
2483
1599
  ext: 'alias',
2484
- mime: 'application/x.apple.alias', // Invented by us
1600
+ mime: 'application/x-ft-apple.alias',
2485
1601
  };
2486
1602
  }
2487
1603
 
2488
1604
  if (this.checkString('Kaydara FBX Binary \u0000')) {
2489
1605
  return {
2490
1606
  ext: 'fbx',
2491
- mime: 'application/x.autodesk.fbx', // Invented by us
1607
+ mime: 'application/x-ft-fbx',
2492
1608
  };
2493
1609
  }
2494
1610
 
@@ -2790,3 +1906,7 @@ export class FileTypeParser {
2790
1906
 
2791
1907
  export const supportedExtensions = new Set(extensions);
2792
1908
  export const supportedMimeTypes = new Set(mimeTypes);
1909
+
1910
+ export async function fileTypeFromFile(path, options) {
1911
+ return (new FileTypeParser(options)).fromFile(path);
1912
+ }