file-type 16.5.4 → 21.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/core.js CHANGED
@@ -1,43 +1,543 @@
1
- 'use strict';
2
- const Token = require('token-types');
3
- const strtok3 = require('strtok3/lib/core');
4
- const {
1
+ /**
2
+ Primary entry point, Node.js specific entry point is index.js
3
+ */
4
+
5
+ import * as Token from 'token-types';
6
+ import * as strtok3 from 'strtok3/core';
7
+ import {ZipHandler, GzipHandler} from '@tokenizer/inflate';
8
+ import {getUintBE} from 'uint8array-extras';
9
+ import {
5
10
  stringToBytes,
6
11
  tarHeaderChecksumMatches,
7
- uint32SyncSafeToken
8
- } = require('./util');
9
- const supported = require('./supported');
12
+ uint32SyncSafeToken,
13
+ } from './util.js';
14
+ import {extensions, mimeTypes} from './supported.js';
15
+
16
+ export const reasonableDetectionSizeInBytes = 4100; // A fair amount of file-types are detectable within this range.
17
+ // Keep defensive limits small enough to avoid accidental memory spikes from untrusted inputs.
18
+ const maximumMpegOffsetTolerance = reasonableDetectionSizeInBytes - 2;
19
+ const maximumZipEntrySizeInBytes = 1024 * 1024;
20
+ const maximumZipEntryCount = 1024;
21
+ const maximumZipBufferedReadSizeInBytes = (2 ** 31) - 1;
22
+ const maximumUntrustedSkipSizeInBytes = 16 * 1024 * 1024;
23
+ const maximumUnknownSizePayloadProbeSizeInBytes = maximumZipEntrySizeInBytes;
24
+ const maximumZipTextEntrySizeInBytes = maximumZipEntrySizeInBytes;
25
+ const maximumNestedGzipDetectionSizeInBytes = maximumUntrustedSkipSizeInBytes;
26
+ const maximumNestedGzipProbeDepth = 1;
27
+ const unknownSizeGzipProbeTimeoutInMilliseconds = 100;
28
+ const maximumId3HeaderSizeInBytes = maximumUntrustedSkipSizeInBytes;
29
+ const maximumEbmlDocumentTypeSizeInBytes = 64;
30
+ const maximumEbmlElementPayloadSizeInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
31
+ const maximumEbmlElementCount = 256;
32
+ const maximumPngChunkCount = 512;
33
+ const maximumPngStreamScanBudgetInBytes = maximumUntrustedSkipSizeInBytes;
34
+ const maximumAsfHeaderObjectCount = 512;
35
+ const maximumTiffTagCount = 512;
36
+ const maximumDetectionReentryCount = 256;
37
+ const maximumPngChunkSizeInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
38
+ const maximumAsfHeaderPayloadSizeInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
39
+ const maximumTiffStreamIfdOffsetInBytes = maximumUnknownSizePayloadProbeSizeInBytes;
40
+ const maximumTiffIfdOffsetInBytes = maximumUntrustedSkipSizeInBytes;
41
+ const recoverableZipErrorMessages = new Set([
42
+ 'Unexpected signature',
43
+ 'Encrypted ZIP',
44
+ 'Expected Central-File-Header signature',
45
+ ]);
46
+ const recoverableZipErrorMessagePrefixes = [
47
+ 'ZIP entry count exceeds ',
48
+ 'Unsupported ZIP compression method:',
49
+ 'ZIP entry compressed data exceeds ',
50
+ 'ZIP entry decompressed data exceeds ',
51
+ 'Expected data-descriptor-signature at position ',
52
+ ];
53
+ const recoverableZipErrorCodes = new Set([
54
+ 'Z_BUF_ERROR',
55
+ 'Z_DATA_ERROR',
56
+ 'ERR_INVALID_STATE',
57
+ ]);
58
+
59
+ class ParserHardLimitError extends Error {}
60
+
61
+ function patchWebByobTokenizerClose(tokenizer) {
62
+ const streamReader = tokenizer?.streamReader;
63
+ if (streamReader?.constructor?.name !== 'WebStreamByobReader') {
64
+ return tokenizer;
65
+ }
66
+
67
+ const {reader} = streamReader;
68
+ const cancelAndRelease = async () => {
69
+ await reader.cancel();
70
+ reader.releaseLock();
71
+ };
72
+
73
+ streamReader.close = cancelAndRelease;
74
+ streamReader.abort = async () => {
75
+ streamReader.interrupted = true;
76
+ await cancelAndRelease();
77
+ };
78
+
79
+ return tokenizer;
80
+ }
81
+
82
+ function getSafeBound(value, maximum, reason) {
83
+ if (
84
+ !Number.isFinite(value)
85
+ || value < 0
86
+ || value > maximum
87
+ ) {
88
+ throw new ParserHardLimitError(`${reason} has invalid size ${value} (maximum ${maximum} bytes)`);
89
+ }
90
+
91
+ return value;
92
+ }
93
+
94
+ async function safeIgnore(tokenizer, length, {maximumLength = maximumUntrustedSkipSizeInBytes, reason = 'skip'} = {}) {
95
+ const safeLength = getSafeBound(length, maximumLength, reason);
96
+ await tokenizer.ignore(safeLength);
97
+ }
98
+
99
+ async function safeReadBuffer(tokenizer, buffer, options, {maximumLength = buffer.length, reason = 'read'} = {}) {
100
+ const length = options?.length ?? buffer.length;
101
+ const safeLength = getSafeBound(length, maximumLength, reason);
102
+ return tokenizer.readBuffer(buffer, {
103
+ ...options,
104
+ length: safeLength,
105
+ });
106
+ }
10
107
 
11
- const minimumBytes = 4100; // A fair amount of file-types are detectable within this range
108
+ async function decompressDeflateRawWithLimit(data, {maximumLength = maximumZipEntrySizeInBytes} = {}) {
109
+ const input = new ReadableStream({
110
+ start(controller) {
111
+ controller.enqueue(data);
112
+ controller.close();
113
+ },
114
+ });
115
+ const output = input.pipeThrough(new DecompressionStream('deflate-raw'));
116
+ const reader = output.getReader();
117
+ const chunks = [];
118
+ let totalLength = 0;
12
119
 
13
- async function fromStream(stream) {
14
- const tokenizer = await strtok3.fromStream(stream);
15
120
  try {
16
- return await fromTokenizer(tokenizer);
121
+ for (;;) {
122
+ const {done, value} = await reader.read();
123
+ if (done) {
124
+ break;
125
+ }
126
+
127
+ totalLength += value.length;
128
+ if (totalLength > maximumLength) {
129
+ await reader.cancel();
130
+ throw new Error(`ZIP entry decompressed data exceeds ${maximumLength} bytes`);
131
+ }
132
+
133
+ chunks.push(value);
134
+ }
17
135
  } finally {
18
- await tokenizer.close();
136
+ reader.releaseLock();
137
+ }
138
+
139
+ const uncompressedData = new Uint8Array(totalLength);
140
+ let offset = 0;
141
+ for (const chunk of chunks) {
142
+ uncompressedData.set(chunk, offset);
143
+ offset += chunk.length;
144
+ }
145
+
146
+ return uncompressedData;
147
+ }
148
+
149
+ const zipDataDescriptorSignature = 0x08_07_4B_50;
150
+ const zipDataDescriptorLengthInBytes = 16;
151
+ const zipDataDescriptorOverlapLengthInBytes = zipDataDescriptorLengthInBytes - 1;
152
+
153
+ function findZipDataDescriptorOffset(buffer, bytesConsumed) {
154
+ if (buffer.length < zipDataDescriptorLengthInBytes) {
155
+ return -1;
156
+ }
157
+
158
+ const lastPossibleDescriptorOffset = buffer.length - zipDataDescriptorLengthInBytes;
159
+ for (let index = 0; index <= lastPossibleDescriptorOffset; index++) {
160
+ if (
161
+ Token.UINT32_LE.get(buffer, index) === zipDataDescriptorSignature
162
+ && Token.UINT32_LE.get(buffer, index + 8) === bytesConsumed + index
163
+ ) {
164
+ return index;
165
+ }
166
+ }
167
+
168
+ return -1;
169
+ }
170
+
171
+ function isPngAncillaryChunk(type) {
172
+ return (type.codePointAt(0) & 0x20) !== 0;
173
+ }
174
+
175
+ function mergeByteChunks(chunks, totalLength) {
176
+ const merged = new Uint8Array(totalLength);
177
+ let offset = 0;
178
+
179
+ for (const chunk of chunks) {
180
+ merged.set(chunk, offset);
181
+ offset += chunk.length;
182
+ }
183
+
184
+ return merged;
185
+ }
186
+
187
+ async function readZipDataDescriptorEntryWithLimit(zipHandler, {shouldBuffer, maximumLength = maximumZipEntrySizeInBytes} = {}) {
188
+ const {syncBuffer} = zipHandler;
189
+ const {length: syncBufferLength} = syncBuffer;
190
+ const chunks = [];
191
+ let bytesConsumed = 0;
192
+
193
+ for (;;) {
194
+ const length = await zipHandler.tokenizer.peekBuffer(syncBuffer, {mayBeLess: true});
195
+ const dataDescriptorOffset = findZipDataDescriptorOffset(syncBuffer.subarray(0, length), bytesConsumed);
196
+ const retainedLength = dataDescriptorOffset >= 0
197
+ ? 0
198
+ : (
199
+ length === syncBufferLength
200
+ ? Math.min(zipDataDescriptorOverlapLengthInBytes, length - 1)
201
+ : 0
202
+ );
203
+ const chunkLength = dataDescriptorOffset >= 0 ? dataDescriptorOffset : length - retainedLength;
204
+
205
+ if (chunkLength === 0) {
206
+ break;
207
+ }
208
+
209
+ bytesConsumed += chunkLength;
210
+ if (bytesConsumed > maximumLength) {
211
+ throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
212
+ }
213
+
214
+ if (shouldBuffer) {
215
+ const data = new Uint8Array(chunkLength);
216
+ await zipHandler.tokenizer.readBuffer(data);
217
+ chunks.push(data);
218
+ } else {
219
+ await zipHandler.tokenizer.ignore(chunkLength);
220
+ }
221
+
222
+ if (dataDescriptorOffset >= 0) {
223
+ break;
224
+ }
225
+ }
226
+
227
+ if (!hasUnknownFileSize(zipHandler.tokenizer)) {
228
+ zipHandler.knownSizeDescriptorScannedBytes += bytesConsumed;
19
229
  }
230
+
231
+ if (!shouldBuffer) {
232
+ return;
233
+ }
234
+
235
+ return mergeByteChunks(chunks, bytesConsumed);
20
236
  }
21
237
 
22
- async function fromBuffer(input) {
23
- if (!(input instanceof Uint8Array || input instanceof ArrayBuffer || Buffer.isBuffer(input))) {
24
- throw new TypeError(`Expected the \`input\` argument to be of type \`Uint8Array\` or \`Buffer\` or \`ArrayBuffer\`, got \`${typeof input}\``);
238
+ function getRemainingZipScanBudget(zipHandler, startOffset) {
239
+ if (hasUnknownFileSize(zipHandler.tokenizer)) {
240
+ return Math.max(0, maximumUntrustedSkipSizeInBytes - (zipHandler.tokenizer.position - startOffset));
25
241
  }
26
242
 
27
- const buffer = input instanceof Buffer ? input : Buffer.from(input);
243
+ return Math.max(0, maximumZipEntrySizeInBytes - zipHandler.knownSizeDescriptorScannedBytes);
244
+ }
245
+
246
+ async function readZipEntryData(zipHandler, zipHeader, {shouldBuffer, maximumDescriptorLength = maximumZipEntrySizeInBytes} = {}) {
247
+ if (
248
+ zipHeader.dataDescriptor
249
+ && zipHeader.compressedSize === 0
250
+ ) {
251
+ return readZipDataDescriptorEntryWithLimit(zipHandler, {
252
+ shouldBuffer,
253
+ maximumLength: maximumDescriptorLength,
254
+ });
255
+ }
28
256
 
29
- if (!(buffer && buffer.length > 1)) {
257
+ if (!shouldBuffer) {
258
+ await safeIgnore(zipHandler.tokenizer, zipHeader.compressedSize, {
259
+ maximumLength: hasUnknownFileSize(zipHandler.tokenizer) ? maximumZipEntrySizeInBytes : zipHandler.tokenizer.fileInfo.size,
260
+ reason: 'ZIP entry compressed data',
261
+ });
30
262
  return;
31
263
  }
32
264
 
33
- const tokenizer = strtok3.fromBuffer(buffer);
34
- return fromTokenizer(tokenizer);
265
+ const maximumLength = getMaximumZipBufferedReadLength(zipHandler.tokenizer);
266
+ if (
267
+ !Number.isFinite(zipHeader.compressedSize)
268
+ || zipHeader.compressedSize < 0
269
+ || zipHeader.compressedSize > maximumLength
270
+ ) {
271
+ throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
272
+ }
273
+
274
+ const fileData = new Uint8Array(zipHeader.compressedSize);
275
+ await zipHandler.tokenizer.readBuffer(fileData);
276
+ return fileData;
277
+ }
278
+
279
+ // Override the default inflate to enforce decompression size limits, since @tokenizer/inflate does not expose a configuration hook for this.
280
+ ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
281
+ if (zipHeader.compressedMethod === 0) {
282
+ return callback(fileData);
283
+ }
284
+
285
+ if (zipHeader.compressedMethod !== 8) {
286
+ throw new Error(`Unsupported ZIP compression method: ${zipHeader.compressedMethod}`);
287
+ }
288
+
289
+ const uncompressedData = await decompressDeflateRawWithLimit(fileData, {maximumLength: maximumZipEntrySizeInBytes});
290
+ return callback(uncompressedData);
291
+ };
292
+
293
+ ZipHandler.prototype.unzip = async function (fileCallback) {
294
+ let stop = false;
295
+ let zipEntryCount = 0;
296
+ const zipScanStart = this.tokenizer.position;
297
+ this.knownSizeDescriptorScannedBytes = 0;
298
+ do {
299
+ if (hasExceededUnknownSizeScanBudget(this.tokenizer, zipScanStart, maximumUntrustedSkipSizeInBytes)) {
300
+ throw new ParserHardLimitError(`ZIP stream probing exceeds ${maximumUntrustedSkipSizeInBytes} bytes`);
301
+ }
302
+
303
+ const zipHeader = await this.readLocalFileHeader();
304
+ if (!zipHeader) {
305
+ break;
306
+ }
307
+
308
+ zipEntryCount++;
309
+ if (zipEntryCount > maximumZipEntryCount) {
310
+ throw new Error(`ZIP entry count exceeds ${maximumZipEntryCount}`);
311
+ }
312
+
313
+ const next = fileCallback(zipHeader);
314
+ stop = Boolean(next.stop);
315
+ await this.tokenizer.ignore(zipHeader.extraFieldLength);
316
+ const fileData = await readZipEntryData(this, zipHeader, {
317
+ shouldBuffer: Boolean(next.handler),
318
+ maximumDescriptorLength: Math.min(maximumZipEntrySizeInBytes, getRemainingZipScanBudget(this, zipScanStart)),
319
+ });
320
+
321
+ if (next.handler) {
322
+ await this.inflate(zipHeader, fileData, next.handler);
323
+ }
324
+
325
+ if (zipHeader.dataDescriptor) {
326
+ const dataDescriptor = new Uint8Array(zipDataDescriptorLengthInBytes);
327
+ await this.tokenizer.readBuffer(dataDescriptor);
328
+ if (Token.UINT32_LE.get(dataDescriptor, 0) !== zipDataDescriptorSignature) {
329
+ throw new Error(`Expected data-descriptor-signature at position ${this.tokenizer.position - dataDescriptor.length}`);
330
+ }
331
+ }
332
+
333
+ if (hasExceededUnknownSizeScanBudget(this.tokenizer, zipScanStart, maximumUntrustedSkipSizeInBytes)) {
334
+ throw new ParserHardLimitError(`ZIP stream probing exceeds ${maximumUntrustedSkipSizeInBytes} bytes`);
335
+ }
336
+ } while (!stop);
337
+ };
338
+
339
+ function createByteLimitedReadableStream(stream, maximumBytes) {
340
+ const reader = stream.getReader();
341
+ let emittedBytes = 0;
342
+ let sourceDone = false;
343
+ let sourceCanceled = false;
344
+
345
+ const cancelSource = async reason => {
346
+ if (
347
+ sourceDone
348
+ || sourceCanceled
349
+ ) {
350
+ return;
351
+ }
352
+
353
+ sourceCanceled = true;
354
+ await reader.cancel(reason);
355
+ };
356
+
357
+ return new ReadableStream({
358
+ async pull(controller) {
359
+ if (emittedBytes >= maximumBytes) {
360
+ controller.close();
361
+ await cancelSource();
362
+ return;
363
+ }
364
+
365
+ const {done, value} = await reader.read();
366
+ if (
367
+ done
368
+ || !value
369
+ ) {
370
+ sourceDone = true;
371
+ controller.close();
372
+ return;
373
+ }
374
+
375
+ const remainingBytes = maximumBytes - emittedBytes;
376
+ if (value.length > remainingBytes) {
377
+ controller.enqueue(value.subarray(0, remainingBytes));
378
+ emittedBytes += remainingBytes;
379
+ controller.close();
380
+ await cancelSource();
381
+ return;
382
+ }
383
+
384
+ controller.enqueue(value);
385
+ emittedBytes += value.length;
386
+ },
387
+ async cancel(reason) {
388
+ await cancelSource(reason);
389
+ },
390
+ });
391
+ }
392
+
393
+ export async function fileTypeFromStream(stream, options) {
394
+ return new FileTypeParser(options).fromStream(stream);
395
+ }
396
+
397
+ export async function fileTypeFromBuffer(input, options) {
398
+ return new FileTypeParser(options).fromBuffer(input);
399
+ }
400
+
401
+ export async function fileTypeFromBlob(blob, options) {
402
+ return new FileTypeParser(options).fromBlob(blob);
403
+ }
404
+
405
+ function getFileTypeFromMimeType(mimeType) {
406
+ mimeType = mimeType.toLowerCase();
407
+ switch (mimeType) {
408
+ case 'application/epub+zip':
409
+ return {
410
+ ext: 'epub',
411
+ mime: mimeType,
412
+ };
413
+ case 'application/vnd.oasis.opendocument.text':
414
+ return {
415
+ ext: 'odt',
416
+ mime: mimeType,
417
+ };
418
+ case 'application/vnd.oasis.opendocument.text-template':
419
+ return {
420
+ ext: 'ott',
421
+ mime: mimeType,
422
+ };
423
+ case 'application/vnd.oasis.opendocument.spreadsheet':
424
+ return {
425
+ ext: 'ods',
426
+ mime: mimeType,
427
+ };
428
+ case 'application/vnd.oasis.opendocument.spreadsheet-template':
429
+ return {
430
+ ext: 'ots',
431
+ mime: mimeType,
432
+ };
433
+ case 'application/vnd.oasis.opendocument.presentation':
434
+ return {
435
+ ext: 'odp',
436
+ mime: mimeType,
437
+ };
438
+ case 'application/vnd.oasis.opendocument.presentation-template':
439
+ return {
440
+ ext: 'otp',
441
+ mime: mimeType,
442
+ };
443
+ case 'application/vnd.oasis.opendocument.graphics':
444
+ return {
445
+ ext: 'odg',
446
+ mime: mimeType,
447
+ };
448
+ case 'application/vnd.oasis.opendocument.graphics-template':
449
+ return {
450
+ ext: 'otg',
451
+ mime: mimeType,
452
+ };
453
+ case 'application/vnd.openxmlformats-officedocument.presentationml.slideshow':
454
+ return {
455
+ ext: 'ppsx',
456
+ mime: mimeType,
457
+ };
458
+ case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
459
+ return {
460
+ ext: 'xlsx',
461
+ mime: mimeType,
462
+ };
463
+ case 'application/vnd.ms-excel.sheet.macroenabled':
464
+ return {
465
+ ext: 'xlsm',
466
+ mime: 'application/vnd.ms-excel.sheet.macroenabled.12',
467
+ };
468
+ case 'application/vnd.openxmlformats-officedocument.spreadsheetml.template':
469
+ return {
470
+ ext: 'xltx',
471
+ mime: mimeType,
472
+ };
473
+ case 'application/vnd.ms-excel.template.macroenabled':
474
+ return {
475
+ ext: 'xltm',
476
+ mime: 'application/vnd.ms-excel.template.macroenabled.12',
477
+ };
478
+ case 'application/vnd.ms-powerpoint.slideshow.macroenabled':
479
+ return {
480
+ ext: 'ppsm',
481
+ mime: 'application/vnd.ms-powerpoint.slideshow.macroenabled.12',
482
+ };
483
+ case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
484
+ return {
485
+ ext: 'docx',
486
+ mime: mimeType,
487
+ };
488
+ case 'application/vnd.ms-word.document.macroenabled':
489
+ return {
490
+ ext: 'docm',
491
+ mime: 'application/vnd.ms-word.document.macroenabled.12',
492
+ };
493
+ case 'application/vnd.openxmlformats-officedocument.wordprocessingml.template':
494
+ return {
495
+ ext: 'dotx',
496
+ mime: mimeType,
497
+ };
498
+ case 'application/vnd.ms-word.template.macroenabledtemplate':
499
+ return {
500
+ ext: 'dotm',
501
+ mime: 'application/vnd.ms-word.template.macroenabled.12',
502
+ };
503
+ case 'application/vnd.openxmlformats-officedocument.presentationml.template':
504
+ return {
505
+ ext: 'potx',
506
+ mime: mimeType,
507
+ };
508
+ case 'application/vnd.ms-powerpoint.template.macroenabled':
509
+ return {
510
+ ext: 'potm',
511
+ mime: 'application/vnd.ms-powerpoint.template.macroenabled.12',
512
+ };
513
+ case 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
514
+ return {
515
+ ext: 'pptx',
516
+ mime: mimeType,
517
+ };
518
+ case 'application/vnd.ms-powerpoint.presentation.macroenabled':
519
+ return {
520
+ ext: 'pptm',
521
+ mime: 'application/vnd.ms-powerpoint.presentation.macroenabled.12',
522
+ };
523
+ case 'application/vnd.ms-visio.drawing':
524
+ return {
525
+ ext: 'vsdx',
526
+ mime: 'application/vnd.visio',
527
+ };
528
+ case 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml':
529
+ return {
530
+ ext: '3mf',
531
+ mime: 'model/3mf',
532
+ };
533
+ default:
534
+ }
35
535
  }
36
536
 
37
537
  function _check(buffer, headers, options) {
38
538
  options = {
39
539
  offset: 0,
40
- ...options
540
+ ...options,
41
541
  };
42
542
 
43
543
  for (const [index, header] of headers.entries()) {
@@ -55,1411 +555,2345 @@ function _check(buffer, headers, options) {
55
555
  return true;
56
556
  }
57
557
 
58
- async function fromTokenizer(tokenizer) {
59
- try {
60
- return _fromTokenizer(tokenizer);
61
- } catch (error) {
62
- if (!(error instanceof strtok3.EndOfStreamError)) {
63
- throw error;
64
- }
558
+ export function normalizeSampleSize(sampleSize) {
559
+ // `sampleSize` is an explicit caller-controlled tuning knob, not untrusted file input.
560
+ // Preserve valid caller-requested probe depth here; applications must bound attacker-derived option values themselves.
561
+ if (!Number.isFinite(sampleSize)) {
562
+ return reasonableDetectionSizeInBytes;
65
563
  }
66
- }
67
564
 
68
- async function _fromTokenizer(tokenizer) {
69
- let buffer = Buffer.alloc(minimumBytes);
70
- const bytesRead = 12;
71
- const check = (header, options) => _check(buffer, header, options);
72
- const checkString = (header, options) => check(stringToBytes(header), options);
565
+ return Math.max(1, Math.trunc(sampleSize));
566
+ }
73
567
 
74
- // Keep reading until EOF if the file size is unknown.
75
- if (!tokenizer.fileInfo.size) {
76
- tokenizer.fileInfo.size = Number.MAX_SAFE_INTEGER;
568
+ function readByobReaderWithSignal(reader, buffer, signal) {
569
+ if (signal === undefined) {
570
+ return reader.read(buffer);
77
571
  }
78
572
 
79
- await tokenizer.peekBuffer(buffer, {length: bytesRead, mayBeLess: true});
573
+ signal.throwIfAborted();
80
574
 
81
- // -- 2-byte signatures --
82
-
83
- if (check([0x42, 0x4D])) {
84
- return {
85
- ext: 'bmp',
86
- mime: 'image/bmp'
575
+ return new Promise((resolve, reject) => {
576
+ const cleanup = () => {
577
+ signal.removeEventListener('abort', onAbort);
87
578
  };
88
- }
89
579
 
90
- if (check([0x0B, 0x77])) {
91
- return {
92
- ext: 'ac3',
93
- mime: 'audio/vnd.dolby.dd-raw'
94
- };
95
- }
580
+ const onAbort = () => {
581
+ const abortReason = signal.reason;
582
+ cleanup();
96
583
 
97
- if (check([0x78, 0x01])) {
98
- return {
99
- ext: 'dmg',
100
- mime: 'application/x-apple-diskimage'
584
+ (async () => {
585
+ try {
586
+ await reader.cancel(abortReason);
587
+ } catch {}
588
+ })();
589
+
590
+ reject(abortReason);
101
591
  };
592
+
593
+ signal.addEventListener('abort', onAbort, {once: true});
594
+ (async () => {
595
+ try {
596
+ const result = await reader.read(buffer);
597
+ cleanup();
598
+ resolve(result);
599
+ } catch (error) {
600
+ cleanup();
601
+ reject(error);
602
+ }
603
+ })();
604
+ });
605
+ }
606
+
607
+ function normalizeMpegOffsetTolerance(mpegOffsetTolerance) {
608
+ // This value controls scan depth and therefore worst-case CPU work.
609
+ if (!Number.isFinite(mpegOffsetTolerance)) {
610
+ return 0;
102
611
  }
103
612
 
104
- if (check([0x4D, 0x5A])) {
105
- return {
106
- ext: 'exe',
107
- mime: 'application/x-msdownload'
108
- };
613
+ return Math.max(0, Math.min(maximumMpegOffsetTolerance, Math.trunc(mpegOffsetTolerance)));
614
+ }
615
+
616
+ function getKnownFileSizeOrMaximum(fileSize) {
617
+ if (!Number.isFinite(fileSize)) {
618
+ return Number.MAX_SAFE_INTEGER;
109
619
  }
110
620
 
111
- if (check([0x25, 0x21])) {
112
- await tokenizer.peekBuffer(buffer, {length: 24, mayBeLess: true});
621
+ return Math.max(0, fileSize);
622
+ }
113
623
 
114
- if (checkString('PS-Adobe-', {offset: 2}) &&
115
- checkString(' EPSF-', {offset: 14})) {
116
- return {
117
- ext: 'eps',
118
- mime: 'application/eps'
119
- };
120
- }
624
+ function hasUnknownFileSize(tokenizer) {
625
+ const fileSize = tokenizer.fileInfo.size;
626
+ return (
627
+ !Number.isFinite(fileSize)
628
+ || fileSize === Number.MAX_SAFE_INTEGER
629
+ );
630
+ }
121
631
 
122
- return {
123
- ext: 'ps',
124
- mime: 'application/postscript'
125
- };
126
- }
632
+ function hasExceededUnknownSizeScanBudget(tokenizer, startOffset, maximumBytes) {
633
+ return (
634
+ hasUnknownFileSize(tokenizer)
635
+ && tokenizer.position - startOffset > maximumBytes
636
+ );
637
+ }
127
638
 
128
- if (
129
- check([0x1F, 0xA0]) ||
130
- check([0x1F, 0x9D])
131
- ) {
132
- return {
133
- ext: 'Z',
134
- mime: 'application/x-compress'
135
- };
639
+ function getMaximumZipBufferedReadLength(tokenizer) {
640
+ const fileSize = tokenizer.fileInfo.size;
641
+ const remainingBytes = Number.isFinite(fileSize)
642
+ ? Math.max(0, fileSize - tokenizer.position)
643
+ : Number.MAX_SAFE_INTEGER;
644
+
645
+ return Math.min(remainingBytes, maximumZipBufferedReadSizeInBytes);
646
+ }
647
+
648
+ function isRecoverableZipError(error) {
649
+ if (error instanceof strtok3.EndOfStreamError) {
650
+ return true;
136
651
  }
137
652
 
138
- // -- 3-byte signatures --
653
+ if (error instanceof ParserHardLimitError) {
654
+ return true;
655
+ }
139
656
 
140
- if (check([0xFF, 0xD8, 0xFF])) {
141
- return {
142
- ext: 'jpg',
143
- mime: 'image/jpeg'
144
- };
657
+ if (!(error instanceof Error)) {
658
+ return false;
145
659
  }
146
660
 
147
- if (check([0x49, 0x49, 0xBC])) {
148
- return {
149
- ext: 'jxr',
150
- mime: 'image/vnd.ms-photo'
151
- };
661
+ if (recoverableZipErrorMessages.has(error.message)) {
662
+ return true;
152
663
  }
153
664
 
154
- if (check([0x1F, 0x8B, 0x8])) {
155
- return {
156
- ext: 'gz',
157
- mime: 'application/gzip'
158
- };
665
+ if (recoverableZipErrorCodes.has(error.code)) {
666
+ return true;
159
667
  }
160
668
 
161
- if (check([0x42, 0x5A, 0x68])) {
162
- return {
163
- ext: 'bz2',
164
- mime: 'application/x-bzip2'
165
- };
669
+ for (const prefix of recoverableZipErrorMessagePrefixes) {
670
+ if (error.message.startsWith(prefix)) {
671
+ return true;
672
+ }
166
673
  }
167
674
 
168
- if (checkString('ID3')) {
169
- await tokenizer.ignore(6); // Skip ID3 header until the header size
170
- const id3HeaderLen = await tokenizer.readToken(uint32SyncSafeToken);
171
- if (tokenizer.position + id3HeaderLen > tokenizer.fileInfo.size) {
172
- // Guess file type based on ID3 header for backward compatibility
173
- return {
174
- ext: 'mp3',
175
- mime: 'audio/mpeg'
176
- };
675
+ return false;
676
+ }
677
+
678
+ function canReadZipEntryForDetection(zipHeader, maximumSize = maximumZipEntrySizeInBytes) {
679
+ const sizes = [zipHeader.compressedSize, zipHeader.uncompressedSize];
680
+ for (const size of sizes) {
681
+ if (
682
+ !Number.isFinite(size)
683
+ || size < 0
684
+ || size > maximumSize
685
+ ) {
686
+ return false;
177
687
  }
688
+ }
689
+
690
+ return true;
691
+ }
692
+
693
+ function createOpenXmlZipDetectionState() {
694
+ return {
695
+ hasContentTypesEntry: false,
696
+ hasParsedContentTypesEntry: false,
697
+ isParsingContentTypes: false,
698
+ hasUnparseableContentTypes: false,
699
+ hasWordDirectory: false,
700
+ hasPresentationDirectory: false,
701
+ hasSpreadsheetDirectory: false,
702
+ hasThreeDimensionalModelEntry: false,
703
+ };
704
+ }
178
705
 
179
- await tokenizer.ignore(id3HeaderLen);
180
- return fromTokenizer(tokenizer); // Skip ID3 header, recursion
706
+ function updateOpenXmlZipDetectionStateFromFilename(openXmlState, filename) {
707
+ if (filename.startsWith('word/')) {
708
+ openXmlState.hasWordDirectory = true;
181
709
  }
182
710
 
183
- // Musepack, SV7
184
- if (checkString('MP+')) {
185
- return {
186
- ext: 'mpc',
187
- mime: 'audio/x-musepack'
188
- };
711
+ if (filename.startsWith('ppt/')) {
712
+ openXmlState.hasPresentationDirectory = true;
713
+ }
714
+
715
+ if (filename.startsWith('xl/')) {
716
+ openXmlState.hasSpreadsheetDirectory = true;
189
717
  }
190
718
 
191
719
  if (
192
- (buffer[0] === 0x43 || buffer[0] === 0x46) &&
193
- check([0x57, 0x53], {offset: 1})
720
+ filename.startsWith('3D/')
721
+ && filename.endsWith('.model')
194
722
  ) {
195
- return {
196
- ext: 'swf',
197
- mime: 'application/x-shockwave-flash'
198
- };
723
+ openXmlState.hasThreeDimensionalModelEntry = true;
199
724
  }
725
+ }
200
726
 
201
- // -- 4-byte signatures --
202
-
203
- if (check([0x47, 0x49, 0x46])) {
204
- return {
205
- ext: 'gif',
206
- mime: 'image/gif'
207
- };
727
+ function getOpenXmlFileTypeFromZipEntries(openXmlState) {
728
+ // Only use directory-name heuristic when [Content_Types].xml was present in the archive
729
+ // but its handler was skipped (not invoked, not currently running, and not already resolved).
730
+ // This avoids guessing from directory names when content-type parsing already gave a definitive answer or failed.
731
+ if (
732
+ !openXmlState.hasContentTypesEntry
733
+ || openXmlState.hasUnparseableContentTypes
734
+ || openXmlState.isParsingContentTypes
735
+ || openXmlState.hasParsedContentTypesEntry
736
+ ) {
737
+ return;
208
738
  }
209
739
 
210
- if (checkString('FLIF')) {
740
+ if (openXmlState.hasWordDirectory) {
211
741
  return {
212
- ext: 'flif',
213
- mime: 'image/flif'
742
+ ext: 'docx',
743
+ mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
214
744
  };
215
745
  }
216
746
 
217
- if (checkString('8BPS')) {
747
+ if (openXmlState.hasPresentationDirectory) {
218
748
  return {
219
- ext: 'psd',
220
- mime: 'image/vnd.adobe.photoshop'
749
+ ext: 'pptx',
750
+ mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
221
751
  };
222
752
  }
223
753
 
224
- if (checkString('WEBP', {offset: 8})) {
754
+ if (openXmlState.hasSpreadsheetDirectory) {
225
755
  return {
226
- ext: 'webp',
227
- mime: 'image/webp'
756
+ ext: 'xlsx',
757
+ mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
228
758
  };
229
759
  }
230
760
 
231
- // Musepack, SV8
232
- if (checkString('MPCK')) {
761
+ if (openXmlState.hasThreeDimensionalModelEntry) {
233
762
  return {
234
- ext: 'mpc',
235
- mime: 'audio/x-musepack'
763
+ ext: '3mf',
764
+ mime: 'model/3mf',
236
765
  };
237
766
  }
767
+ }
238
768
 
239
- if (checkString('FORM')) {
240
- return {
241
- ext: 'aif',
242
- mime: 'audio/aiff'
243
- };
244
- }
769
+ function getOpenXmlMimeTypeFromContentTypesXml(xmlContent) {
770
+ // We only need the `ContentType="...main+xml"` value, so a small string scan is enough and avoids full XML parsing.
771
+ const endPosition = xmlContent.indexOf('.main+xml"');
772
+ if (endPosition === -1) {
773
+ const mimeType = 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml';
774
+ if (xmlContent.includes(`ContentType="${mimeType}"`)) {
775
+ return mimeType;
776
+ }
245
777
 
246
- if (checkString('icns', {offset: 0})) {
247
- return {
248
- ext: 'icns',
249
- mime: 'image/icns'
250
- };
778
+ return;
251
779
  }
252
780
 
253
- // Zip-based file formats
254
- // Need to be before the `zip` check
255
- if (check([0x50, 0x4B, 0x3, 0x4])) { // Local file header signature
256
- try {
257
- while (tokenizer.position + 30 < tokenizer.fileInfo.size) {
258
- await tokenizer.readBuffer(buffer, {length: 30});
259
-
260
- // https://en.wikipedia.org/wiki/Zip_(file_format)#File_headers
261
- const zipHeader = {
262
- compressedSize: buffer.readUInt32LE(18),
263
- uncompressedSize: buffer.readUInt32LE(22),
264
- filenameLength: buffer.readUInt16LE(26),
265
- extraFieldLength: buffer.readUInt16LE(28)
266
- };
781
+ const truncatedContent = xmlContent.slice(0, endPosition);
782
+ const firstQuotePosition = truncatedContent.lastIndexOf('"');
783
+ // If no quote is found, `lastIndexOf` returns -1 and this intentionally falls back to the full truncated prefix.
784
+ return truncatedContent.slice(firstQuotePosition + 1);
785
+ }
267
786
 
268
- zipHeader.filename = await tokenizer.readToken(new Token.StringType(zipHeader.filenameLength, 'utf-8'));
269
- await tokenizer.ignore(zipHeader.extraFieldLength);
270
-
271
- // Assumes signed `.xpi` from addons.mozilla.org
272
- if (zipHeader.filename === 'META-INF/mozilla.rsa') {
273
- return {
274
- ext: 'xpi',
275
- mime: 'application/x-xpinstall'
276
- };
277
- }
787
+ export async function fileTypeFromTokenizer(tokenizer, options) {
788
+ return new FileTypeParser(options).fromTokenizer(tokenizer);
789
+ }
278
790
 
279
- if (zipHeader.filename.endsWith('.rels') || zipHeader.filename.endsWith('.xml')) {
280
- const type = zipHeader.filename.split('/')[0];
281
- switch (type) {
282
- case '_rels':
283
- break;
284
- case 'word':
285
- return {
286
- ext: 'docx',
287
- mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
288
- };
289
- case 'ppt':
290
- return {
291
- ext: 'pptx',
292
- mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
293
- };
294
- case 'xl':
295
- return {
296
- ext: 'xlsx',
297
- mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
298
- };
299
- default:
300
- break;
301
- }
302
- }
791
+ export async function fileTypeStream(webStream, options) {
792
+ return new FileTypeParser(options).toDetectionStream(webStream, options);
793
+ }
303
794
 
304
- if (zipHeader.filename.startsWith('xl/')) {
305
- return {
306
- ext: 'xlsx',
307
- mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
308
- };
309
- }
795
+ export class FileTypeParser {
796
+ constructor(options) {
797
+ const normalizedMpegOffsetTolerance = normalizeMpegOffsetTolerance(options?.mpegOffsetTolerance);
798
+ this.options = {
799
+ ...options,
800
+ mpegOffsetTolerance: normalizedMpegOffsetTolerance,
801
+ };
310
802
 
311
- if (zipHeader.filename.startsWith('3D/') && zipHeader.filename.endsWith('.model')) {
312
- return {
313
- ext: '3mf',
314
- mime: 'model/3mf'
315
- };
316
- }
803
+ this.detectors = [...(this.options.customDetectors ?? []),
804
+ {id: 'core', detect: this.detectConfident},
805
+ {id: 'core.imprecise', detect: this.detectImprecise}];
806
+ this.tokenizerOptions = {
807
+ abortSignal: this.options.signal,
808
+ };
809
+ this.gzipProbeDepth = 0;
810
+ }
317
811
 
318
- // The docx, xlsx and pptx file types extend the Office Open XML file format:
319
- // https://en.wikipedia.org/wiki/Office_Open_XML_file_formats
320
- // We look for:
321
- // - one entry named '[Content_Types].xml' or '_rels/.rels',
322
- // - one entry indicating specific type of file.
323
- // MS Office, OpenOffice and LibreOffice may put the parts in different order, so the check should not rely on it.
324
- if (zipHeader.filename === 'mimetype' && zipHeader.compressedSize === zipHeader.uncompressedSize) {
325
- const mimeType = await tokenizer.readToken(new Token.StringType(zipHeader.compressedSize, 'utf-8'));
326
-
327
- switch (mimeType) {
328
- case 'application/epub+zip':
329
- return {
330
- ext: 'epub',
331
- mime: 'application/epub+zip'
332
- };
333
- case 'application/vnd.oasis.opendocument.text':
334
- return {
335
- ext: 'odt',
336
- mime: 'application/vnd.oasis.opendocument.text'
337
- };
338
- case 'application/vnd.oasis.opendocument.spreadsheet':
339
- return {
340
- ext: 'ods',
341
- mime: 'application/vnd.oasis.opendocument.spreadsheet'
342
- };
343
- case 'application/vnd.oasis.opendocument.presentation':
344
- return {
345
- ext: 'odp',
346
- mime: 'application/vnd.oasis.opendocument.presentation'
347
- };
348
- default:
349
- }
350
- }
812
+ getTokenizerOptions() {
813
+ return {
814
+ ...this.tokenizerOptions,
815
+ };
816
+ }
351
817
 
352
- // Try to find next header manually when current one is corrupted
353
- if (zipHeader.compressedSize === 0) {
354
- let nextHeaderIndex = -1;
818
+ createTokenizerFromWebStream(stream) {
819
+ return patchWebByobTokenizerClose(strtok3.fromWebStream(stream, this.getTokenizerOptions()));
820
+ }
355
821
 
356
- while (nextHeaderIndex < 0 && (tokenizer.position < tokenizer.fileInfo.size)) {
357
- await tokenizer.peekBuffer(buffer, {mayBeLess: true});
822
+ async parseTokenizer(tokenizer, detectionReentryCount = 0) {
823
+ this.detectionReentryCount = detectionReentryCount;
824
+ const initialPosition = tokenizer.position;
825
+ // Iterate through all file-type detectors
826
+ for (const detector of this.detectors) {
827
+ let fileType;
828
+ try {
829
+ fileType = await detector.detect(tokenizer);
830
+ } catch (error) {
831
+ if (error instanceof strtok3.EndOfStreamError) {
832
+ return;
833
+ }
358
834
 
359
- nextHeaderIndex = buffer.indexOf('504B0304', 0, 'hex');
360
- // Move position to the next header if found, skip the whole buffer otherwise
361
- await tokenizer.ignore(nextHeaderIndex >= 0 ? nextHeaderIndex : buffer.length);
362
- }
363
- } else {
364
- await tokenizer.ignore(zipHeader.compressedSize);
835
+ if (error instanceof ParserHardLimitError) {
836
+ return;
365
837
  }
366
- }
367
- } catch (error) {
368
- if (!(error instanceof strtok3.EndOfStreamError)) {
838
+
369
839
  throw error;
370
840
  }
371
- }
372
-
373
- return {
374
- ext: 'zip',
375
- mime: 'application/zip'
376
- };
377
- }
378
841
 
379
- if (checkString('OggS')) {
380
- // This is an OGG container
381
- await tokenizer.ignore(28);
382
- const type = Buffer.alloc(8);
383
- await tokenizer.readBuffer(type);
842
+ if (fileType) {
843
+ return fileType;
844
+ }
384
845
 
385
- // Needs to be before `ogg` check
386
- if (_check(type, [0x4F, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64])) {
387
- return {
388
- ext: 'opus',
389
- mime: 'audio/opus'
390
- };
846
+ if (initialPosition !== tokenizer.position) {
847
+ return undefined; // Cannot proceed scanning of the tokenizer is at an arbitrary position
848
+ }
391
849
  }
850
+ }
392
851
 
393
- // If ' theora' in header.
394
- if (_check(type, [0x80, 0x74, 0x68, 0x65, 0x6F, 0x72, 0x61])) {
395
- return {
396
- ext: 'ogv',
397
- mime: 'video/ogg'
398
- };
852
+ async fromTokenizer(tokenizer) {
853
+ try {
854
+ return await this.parseTokenizer(tokenizer);
855
+ } finally {
856
+ await tokenizer.close();
399
857
  }
858
+ }
400
859
 
401
- // If '\x01video' in header.
402
- if (_check(type, [0x01, 0x76, 0x69, 0x64, 0x65, 0x6F, 0x00])) {
403
- return {
404
- ext: 'ogm',
405
- mime: 'video/ogg'
406
- };
860
+ async fromBuffer(input) {
861
+ if (!(input instanceof Uint8Array || input instanceof ArrayBuffer)) {
862
+ throw new TypeError(`Expected the \`input\` argument to be of type \`Uint8Array\` or \`ArrayBuffer\`, got \`${typeof input}\``);
407
863
  }
408
864
 
409
- // If ' FLAC' in header https://xiph.org/flac/faq.html
410
- if (_check(type, [0x7F, 0x46, 0x4C, 0x41, 0x43])) {
411
- return {
412
- ext: 'oga',
413
- mime: 'audio/ogg'
414
- };
415
- }
865
+ const buffer = input instanceof Uint8Array ? input : new Uint8Array(input);
416
866
 
417
- // 'Speex ' in header https://en.wikipedia.org/wiki/Speex
418
- if (_check(type, [0x53, 0x70, 0x65, 0x65, 0x78, 0x20, 0x20])) {
419
- return {
420
- ext: 'spx',
421
- mime: 'audio/ogg'
422
- };
867
+ if (!(buffer?.length > 1)) {
868
+ return;
423
869
  }
424
870
 
425
- // If '\x01vorbis' in header
426
- if (_check(type, [0x01, 0x76, 0x6F, 0x72, 0x62, 0x69, 0x73])) {
427
- return {
428
- ext: 'ogg',
429
- mime: 'audio/ogg'
430
- };
431
- }
871
+ return this.fromTokenizer(strtok3.fromBuffer(buffer, this.getTokenizerOptions()));
872
+ }
432
873
 
433
- // Default OGG container https://www.iana.org/assignments/media-types/application/ogg
434
- return {
435
- ext: 'ogx',
436
- mime: 'application/ogg'
437
- };
874
+ async fromBlob(blob) {
875
+ this.options.signal?.throwIfAborted();
876
+ const tokenizer = strtok3.fromBlob(blob, this.getTokenizerOptions());
877
+ return this.fromTokenizer(tokenizer);
438
878
  }
439
879
 
440
- if (
441
- check([0x50, 0x4B]) &&
442
- (buffer[2] === 0x3 || buffer[2] === 0x5 || buffer[2] === 0x7) &&
443
- (buffer[3] === 0x4 || buffer[3] === 0x6 || buffer[3] === 0x8)
444
- ) {
445
- return {
446
- ext: 'zip',
447
- mime: 'application/zip'
448
- };
880
+ async fromStream(stream) {
881
+ this.options.signal?.throwIfAborted();
882
+ const tokenizer = this.createTokenizerFromWebStream(stream);
883
+ return this.fromTokenizer(tokenizer);
449
884
  }
450
885
 
451
- //
886
+ async toDetectionStream(stream, options) {
887
+ const sampleSize = normalizeSampleSize(options?.sampleSize ?? reasonableDetectionSizeInBytes);
888
+ let detectedFileType;
889
+ let firstChunk;
452
890
 
453
- // File Type Box (https://en.wikipedia.org/wiki/ISO_base_media_file_format)
454
- // It's not required to be first, but it's recommended to be. Almost all ISO base media files start with `ftyp` box.
455
- // `ftyp` box must contain a brand major identifier, which must consist of ISO 8859-1 printable characters.
456
- // Here we check for 8859-1 printable characters (for simplicity, it's a mask which also catches one non-printable character).
457
- if (
458
- checkString('ftyp', {offset: 4}) &&
459
- (buffer[8] & 0x60) !== 0x00 // Brand major, first character ASCII?
460
- ) {
461
- // They all can have MIME `video/mp4` except `application/mp4` special-case which is hard to detect.
462
- // For some cases, we're specific, everything else falls to `video/mp4` with `mp4` extension.
463
- const brandMajor = buffer.toString('binary', 8, 12).replace('\0', ' ').trim();
464
- switch (brandMajor) {
465
- case 'avif':
466
- return {ext: 'avif', mime: 'image/avif'};
467
- case 'mif1':
468
- return {ext: 'heic', mime: 'image/heif'};
469
- case 'msf1':
470
- return {ext: 'heic', mime: 'image/heif-sequence'};
471
- case 'heic':
472
- case 'heix':
473
- return {ext: 'heic', mime: 'image/heic'};
474
- case 'hevc':
475
- case 'hevx':
476
- return {ext: 'heic', mime: 'image/heic-sequence'};
477
- case 'qt':
478
- return {ext: 'mov', mime: 'video/quicktime'};
479
- case 'M4V':
480
- case 'M4VH':
481
- case 'M4VP':
482
- return {ext: 'm4v', mime: 'video/x-m4v'};
483
- case 'M4P':
484
- return {ext: 'm4p', mime: 'video/mp4'};
485
- case 'M4B':
486
- return {ext: 'm4b', mime: 'audio/mp4'};
487
- case 'M4A':
488
- return {ext: 'm4a', mime: 'audio/x-m4a'};
489
- case 'F4V':
490
- return {ext: 'f4v', mime: 'video/mp4'};
491
- case 'F4P':
492
- return {ext: 'f4p', mime: 'video/mp4'};
493
- case 'F4A':
494
- return {ext: 'f4a', mime: 'audio/mp4'};
495
- case 'F4B':
496
- return {ext: 'f4b', mime: 'audio/mp4'};
497
- case 'crx':
498
- return {ext: 'cr3', mime: 'image/x-canon-cr3'};
499
- default:
500
- if (brandMajor.startsWith('3g')) {
501
- if (brandMajor.startsWith('3g2')) {
502
- return {ext: '3g2', mime: 'video/3gpp2'};
891
+ const reader = stream.getReader({mode: 'byob'});
892
+ try {
893
+ // Read the first chunk from the stream
894
+ const {value: chunk, done} = await readByobReaderWithSignal(reader, new Uint8Array(sampleSize), this.options.signal);
895
+ firstChunk = chunk;
896
+ if (!done && chunk) {
897
+ try {
898
+ // Attempt to detect the file type from the chunk
899
+ detectedFileType = await this.fromBuffer(chunk.subarray(0, sampleSize));
900
+ } catch (error) {
901
+ if (!(error instanceof strtok3.EndOfStreamError)) {
902
+ throw error; // Re-throw non-EndOfStreamError
503
903
  }
504
904
 
505
- return {ext: '3gp', mime: 'video/3gpp'};
905
+ detectedFileType = undefined;
506
906
  }
907
+ }
507
908
 
508
- return {ext: 'mp4', mime: 'video/mp4'};
909
+ firstChunk = chunk;
910
+ } finally {
911
+ reader.releaseLock(); // Ensure the reader is released
509
912
  }
510
- }
511
913
 
512
- if (checkString('MThd')) {
513
- return {
514
- ext: 'mid',
515
- mime: 'audio/midi'
516
- };
517
- }
914
+ // Create a new ReadableStream to manage locking issues
915
+ const transformStream = new TransformStream({
916
+ async start(controller) {
917
+ controller.enqueue(firstChunk); // Enqueue the initial chunk
918
+ },
919
+ transform(chunk, controller) {
920
+ // Pass through the chunks without modification
921
+ controller.enqueue(chunk);
922
+ },
923
+ });
518
924
 
519
- if (
520
- checkString('wOFF') &&
521
- (
522
- check([0x00, 0x01, 0x00, 0x00], {offset: 4}) ||
523
- checkString('OTTO', {offset: 4})
524
- )
525
- ) {
526
- return {
527
- ext: 'woff',
528
- mime: 'font/woff'
529
- };
530
- }
925
+ const newStream = stream.pipeThrough(transformStream);
926
+ newStream.fileType = detectedFileType;
531
927
 
532
- if (
533
- checkString('wOF2') &&
534
- (
535
- check([0x00, 0x01, 0x00, 0x00], {offset: 4}) ||
536
- checkString('OTTO', {offset: 4})
537
- )
538
- ) {
539
- return {
540
- ext: 'woff2',
541
- mime: 'font/woff2'
542
- };
928
+ return newStream;
543
929
  }
544
930
 
545
- if (check([0xD4, 0xC3, 0xB2, 0xA1]) || check([0xA1, 0xB2, 0xC3, 0xD4])) {
546
- return {
547
- ext: 'pcap',
548
- mime: 'application/vnd.tcpdump.pcap'
549
- };
550
- }
931
+ async detectGzip(tokenizer) {
932
+ if (this.gzipProbeDepth >= maximumNestedGzipProbeDepth) {
933
+ return {
934
+ ext: 'gz',
935
+ mime: 'application/gzip',
936
+ };
937
+ }
551
938
 
552
- // Sony DSD Stream File (DSF)
553
- if (checkString('DSD ')) {
554
- return {
555
- ext: 'dsf',
556
- mime: 'audio/x-dsf' // Non-standard
557
- };
558
- }
939
+ const gzipHandler = new GzipHandler(tokenizer);
940
+ const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes);
941
+ const hasUnknownSize = hasUnknownFileSize(tokenizer);
942
+ let timeout;
943
+ let probeSignal;
944
+ let probeParser;
945
+ let compressedFileType;
946
+
947
+ if (hasUnknownSize) {
948
+ const timeoutController = new AbortController();
949
+ timeout = setTimeout(() => {
950
+ timeoutController.abort(new DOMException(`Operation timed out after ${unknownSizeGzipProbeTimeoutInMilliseconds} ms`, 'TimeoutError'));
951
+ }, unknownSizeGzipProbeTimeoutInMilliseconds);
952
+ probeSignal = this.options.signal === undefined
953
+ ? timeoutController.signal
954
+ // eslint-disable-next-line n/no-unsupported-features/node-builtins
955
+ : AbortSignal.any([this.options.signal, timeoutController.signal]);
956
+ probeParser = new FileTypeParser({
957
+ ...this.options,
958
+ signal: probeSignal,
959
+ });
960
+ probeParser.gzipProbeDepth = this.gzipProbeDepth + 1;
961
+ } else {
962
+ this.gzipProbeDepth++;
963
+ }
559
964
 
560
- if (checkString('LZIP')) {
561
- return {
562
- ext: 'lz',
563
- mime: 'application/x-lzip'
564
- };
565
- }
965
+ try {
966
+ compressedFileType = await (probeParser ?? this).fromStream(limitedInflatedStream);
967
+ } catch (error) {
968
+ if (
969
+ error?.name === 'AbortError'
970
+ && probeSignal?.reason?.name !== 'TimeoutError'
971
+ ) {
972
+ throw error;
973
+ }
974
+
975
+ // Timeout, decompression, or inner-detection failures are expected for non-tar gzip files.
976
+ } finally {
977
+ clearTimeout(timeout);
978
+ if (!hasUnknownSize) {
979
+ this.gzipProbeDepth--;
980
+ }
981
+ }
982
+
983
+ if (compressedFileType?.ext === 'tar') {
984
+ return {
985
+ ext: 'tar.gz',
986
+ mime: 'application/gzip',
987
+ };
988
+ }
566
989
 
567
- if (checkString('fLaC')) {
568
990
  return {
569
- ext: 'flac',
570
- mime: 'audio/x-flac'
991
+ ext: 'gz',
992
+ mime: 'application/gzip',
571
993
  };
572
994
  }
573
995
 
574
- if (check([0x42, 0x50, 0x47, 0xFB])) {
575
- return {
576
- ext: 'bpg',
577
- mime: 'image/bpg'
578
- };
996
+ check(header, options) {
997
+ return _check(this.buffer, header, options);
579
998
  }
580
999
 
581
- if (checkString('wvpk')) {
582
- return {
583
- ext: 'wv',
584
- mime: 'audio/wavpack'
585
- };
1000
+ checkString(header, options) {
1001
+ return this.check(stringToBytes(header, options?.encoding), options);
586
1002
  }
587
1003
 
588
- if (checkString('%PDF')) {
589
- await tokenizer.ignore(1350);
590
- const maxBufferSize = 10 * 1024 * 1024;
591
- const buffer = Buffer.alloc(Math.min(maxBufferSize, tokenizer.fileInfo.size));
592
- await tokenizer.readBuffer(buffer, {mayBeLess: true});
1004
+ // Detections with a high degree of certainty in identifying the correct file type
1005
+ detectConfident = async tokenizer => {
1006
+ this.buffer = new Uint8Array(reasonableDetectionSizeInBytes);
593
1007
 
594
- // Check if this is an Adobe Illustrator file
595
- if (buffer.includes(Buffer.from('AIPrivateData'))) {
596
- return {
597
- ext: 'ai',
598
- mime: 'application/postscript'
599
- };
1008
+ // Keep reading until EOF if the file size is unknown.
1009
+ if (tokenizer.fileInfo.size === undefined) {
1010
+ tokenizer.fileInfo.size = Number.MAX_SAFE_INTEGER;
600
1011
  }
601
1012
 
602
- // Assume this is just a normal PDF
603
- return {
604
- ext: 'pdf',
605
- mime: 'application/pdf'
606
- };
607
- }
1013
+ this.tokenizer = tokenizer;
608
1014
 
609
- if (check([0x00, 0x61, 0x73, 0x6D])) {
610
- return {
611
- ext: 'wasm',
612
- mime: 'application/wasm'
613
- };
614
- }
1015
+ if (hasUnknownFileSize(tokenizer)) {
1016
+ await tokenizer.peekBuffer(this.buffer, {length: 3, mayBeLess: true});
1017
+ if (this.check([0x1F, 0x8B, 0x8])) {
1018
+ return this.detectGzip(tokenizer);
1019
+ }
1020
+ }
615
1021
 
616
- // TIFF, little-endian type
617
- if (check([0x49, 0x49, 0x2A, 0x0])) {
618
- if (checkString('CR', {offset: 8})) {
1022
+ await tokenizer.peekBuffer(this.buffer, {length: 32, mayBeLess: true});
1023
+
1024
+ // -- 2-byte signatures --
1025
+
1026
+ if (this.check([0x42, 0x4D])) {
619
1027
  return {
620
- ext: 'cr2',
621
- mime: 'image/x-canon-cr2'
1028
+ ext: 'bmp',
1029
+ mime: 'image/bmp',
622
1030
  };
623
1031
  }
624
1032
 
625
- if (check([0x1C, 0x00, 0xFE, 0x00], {offset: 8}) || check([0x1F, 0x00, 0x0B, 0x00], {offset: 8})) {
1033
+ if (this.check([0x0B, 0x77])) {
626
1034
  return {
627
- ext: 'nef',
628
- mime: 'image/x-nikon-nef'
1035
+ ext: 'ac3',
1036
+ mime: 'audio/vnd.dolby.dd-raw',
629
1037
  };
630
1038
  }
631
1039
 
632
- if (
633
- check([0x08, 0x00, 0x00, 0x00], {offset: 4}) &&
634
- (check([0x2D, 0x00, 0xFE, 0x00], {offset: 8}) ||
635
- check([0x27, 0x00, 0xFE, 0x00], {offset: 8}))
636
- ) {
1040
+ if (this.check([0x78, 0x01])) {
637
1041
  return {
638
- ext: 'dng',
639
- mime: 'image/x-adobe-dng'
1042
+ ext: 'dmg',
1043
+ mime: 'application/x-apple-diskimage',
640
1044
  };
641
1045
  }
642
1046
 
643
- buffer = Buffer.alloc(24);
644
- await tokenizer.peekBuffer(buffer);
645
- if (
646
- (check([0x10, 0xFB, 0x86, 0x01], {offset: 4}) || check([0x08, 0x00, 0x00, 0x00], {offset: 4})) &&
647
- // This pattern differentiates ARW from other TIFF-ish file types:
648
- check([0x00, 0xFE, 0x00, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x01], {offset: 9})
649
- ) {
1047
+ if (this.check([0x4D, 0x5A])) {
650
1048
  return {
651
- ext: 'arw',
652
- mime: 'image/x-sony-arw'
1049
+ ext: 'exe',
1050
+ mime: 'application/x-msdownload',
653
1051
  };
654
1052
  }
655
1053
 
656
- return {
657
- ext: 'tif',
658
- mime: 'image/tiff'
659
- };
660
- }
661
-
662
- // TIFF, big-endian type
663
- if (check([0x4D, 0x4D, 0x0, 0x2A])) {
664
- return {
665
- ext: 'tif',
666
- mime: 'image/tiff'
667
- };
668
- }
669
-
670
- if (checkString('MAC ')) {
671
- return {
672
- ext: 'ape',
673
- mime: 'audio/ape'
674
- };
675
- }
676
-
677
- // https://github.com/threatstack/libmagic/blob/master/magic/Magdir/matroska
678
- if (check([0x1A, 0x45, 0xDF, 0xA3])) { // Root element: EBML
679
- async function readField() {
680
- const msb = await tokenizer.peekNumber(Token.UINT8);
681
- let mask = 0x80;
682
- let ic = 0; // 0 = A, 1 = B, 2 = C, 3 = D
1054
+ if (this.check([0x25, 0x21])) {
1055
+ await tokenizer.peekBuffer(this.buffer, {length: 24, mayBeLess: true});
683
1056
 
684
- while ((msb & mask) === 0 && mask !== 0) {
685
- ++ic;
686
- mask >>= 1;
1057
+ if (
1058
+ this.checkString('PS-Adobe-', {offset: 2})
1059
+ && this.checkString(' EPSF-', {offset: 14})
1060
+ ) {
1061
+ return {
1062
+ ext: 'eps',
1063
+ mime: 'application/eps',
1064
+ };
687
1065
  }
688
1066
 
689
- const id = Buffer.alloc(ic + 1);
690
- await tokenizer.readBuffer(id);
691
- return id;
1067
+ return {
1068
+ ext: 'ps',
1069
+ mime: 'application/postscript',
1070
+ };
692
1071
  }
693
1072
 
694
- async function readElement() {
695
- const id = await readField();
696
- const lenField = await readField();
697
- lenField[0] ^= 0x80 >> (lenField.length - 1);
698
- const nrLen = Math.min(6, lenField.length); // JavaScript can max read 6 bytes integer
1073
+ if (
1074
+ this.check([0x1F, 0xA0])
1075
+ || this.check([0x1F, 0x9D])
1076
+ ) {
699
1077
  return {
700
- id: id.readUIntBE(0, id.length),
701
- len: lenField.readUIntBE(lenField.length - nrLen, nrLen)
1078
+ ext: 'Z',
1079
+ mime: 'application/x-compress',
702
1080
  };
703
1081
  }
704
1082
 
705
- async function readChildren(level, children) {
706
- while (children > 0) {
707
- const e = await readElement();
708
- if (e.id === 0x4282) {
709
- return tokenizer.readToken(new Token.StringType(e.len, 'utf-8')); // Return DocType
710
- }
711
-
712
- await tokenizer.ignore(e.len); // ignore payload
713
- --children;
714
- }
1083
+ if (this.check([0xC7, 0x71])) {
1084
+ return {
1085
+ ext: 'cpio',
1086
+ mime: 'application/x-cpio',
1087
+ };
715
1088
  }
716
1089
 
717
- const re = await readElement();
718
- const docType = await readChildren(1, re.len);
719
-
720
- switch (docType) {
721
- case 'webm':
722
- return {
723
- ext: 'webm',
724
- mime: 'video/webm'
725
- };
1090
+ if (this.check([0x60, 0xEA])) {
1091
+ return {
1092
+ ext: 'arj',
1093
+ mime: 'application/x-arj',
1094
+ };
1095
+ }
726
1096
 
727
- case 'matroska':
728
- return {
729
- ext: 'mkv',
730
- mime: 'video/x-matroska'
731
- };
1097
+ // -- 3-byte signatures --
732
1098
 
733
- default:
1099
+ if (this.check([0xEF, 0xBB, 0xBF])) { // UTF-8-BOM
1100
+ if (this.detectionReentryCount >= maximumDetectionReentryCount) {
734
1101
  return;
1102
+ }
1103
+
1104
+ this.detectionReentryCount++;
1105
+ // Strip off UTF-8-BOM
1106
+ await this.tokenizer.ignore(3);
1107
+ return this.detectConfident(tokenizer);
735
1108
  }
736
- }
737
1109
 
738
- // RIFF file format which might be AVI, WAV, QCP, etc
739
- if (check([0x52, 0x49, 0x46, 0x46])) {
740
- if (check([0x41, 0x56, 0x49], {offset: 8})) {
1110
+ if (this.check([0x47, 0x49, 0x46])) {
741
1111
  return {
742
- ext: 'avi',
743
- mime: 'video/vnd.avi'
1112
+ ext: 'gif',
1113
+ mime: 'image/gif',
744
1114
  };
745
1115
  }
746
1116
 
747
- if (check([0x57, 0x41, 0x56, 0x45], {offset: 8})) {
1117
+ if (this.check([0x49, 0x49, 0xBC])) {
748
1118
  return {
749
- ext: 'wav',
750
- mime: 'audio/vnd.wave'
1119
+ ext: 'jxr',
1120
+ mime: 'image/vnd.ms-photo',
751
1121
  };
752
1122
  }
753
1123
 
754
- // QLCM, QCP file
755
- if (check([0x51, 0x4C, 0x43, 0x4D], {offset: 8})) {
1124
+ if (this.check([0x1F, 0x8B, 0x8])) {
1125
+ return this.detectGzip(tokenizer);
1126
+ }
1127
+
1128
+ if (this.check([0x42, 0x5A, 0x68])) {
756
1129
  return {
757
- ext: 'qcp',
758
- mime: 'audio/qcelp'
1130
+ ext: 'bz2',
1131
+ mime: 'application/x-bzip2',
759
1132
  };
760
1133
  }
761
- }
762
-
763
- if (checkString('SQLi')) {
764
- return {
765
- ext: 'sqlite',
766
- mime: 'application/x-sqlite3'
767
- };
768
- }
769
-
770
- if (check([0x4E, 0x45, 0x53, 0x1A])) {
771
- return {
772
- ext: 'nes',
773
- mime: 'application/x-nintendo-nes-rom'
774
- };
775
- }
776
1134
 
777
- if (checkString('Cr24')) {
778
- return {
779
- ext: 'crx',
780
- mime: 'application/x-google-chrome-extension'
781
- };
782
- }
1135
+ if (this.checkString('ID3')) {
1136
+ await safeIgnore(tokenizer, 6, {
1137
+ maximumLength: 6,
1138
+ reason: 'ID3 header prefix',
1139
+ }); // Skip ID3 header until the header size
1140
+ const id3HeaderLength = await tokenizer.readToken(uint32SyncSafeToken);
1141
+ const isUnknownFileSize = hasUnknownFileSize(tokenizer);
1142
+ if (
1143
+ !Number.isFinite(id3HeaderLength)
1144
+ || id3HeaderLength < 0
1145
+ // Keep ID3 probing bounded for unknown-size streams to avoid attacker-controlled large skips.
1146
+ || (
1147
+ isUnknownFileSize
1148
+ && (
1149
+ id3HeaderLength > maximumId3HeaderSizeInBytes
1150
+ || (tokenizer.position + id3HeaderLength) > maximumId3HeaderSizeInBytes
1151
+ )
1152
+ )
1153
+ ) {
1154
+ return;
1155
+ }
783
1156
 
784
- if (
785
- checkString('MSCF') ||
786
- checkString('ISc(')
787
- ) {
788
- return {
789
- ext: 'cab',
790
- mime: 'application/vnd.ms-cab-compressed'
791
- };
792
- }
1157
+ if (tokenizer.position + id3HeaderLength > tokenizer.fileInfo.size) {
1158
+ if (isUnknownFileSize) {
1159
+ return;
1160
+ }
793
1161
 
794
- if (check([0xED, 0xAB, 0xEE, 0xDB])) {
795
- return {
796
- ext: 'rpm',
797
- mime: 'application/x-rpm'
798
- };
799
- }
1162
+ return {
1163
+ ext: 'mp3',
1164
+ mime: 'audio/mpeg',
1165
+ };
1166
+ }
800
1167
 
801
- if (check([0xC5, 0xD0, 0xD3, 0xC6])) {
802
- return {
803
- ext: 'eps',
804
- mime: 'application/eps'
805
- };
806
- }
1168
+ try {
1169
+ await safeIgnore(tokenizer, id3HeaderLength, {
1170
+ maximumLength: isUnknownFileSize ? maximumId3HeaderSizeInBytes : tokenizer.fileInfo.size,
1171
+ reason: 'ID3 payload',
1172
+ });
1173
+ } catch (error) {
1174
+ if (error instanceof strtok3.EndOfStreamError) {
1175
+ return;
1176
+ }
807
1177
 
808
- if (check([0x28, 0xB5, 0x2F, 0xFD])) {
809
- return {
810
- ext: 'zst',
811
- mime: 'application/zstd'
812
- };
813
- }
1178
+ throw error;
1179
+ }
814
1180
 
815
- // -- 5-byte signatures --
1181
+ if (this.detectionReentryCount >= maximumDetectionReentryCount) {
1182
+ return;
1183
+ }
816
1184
 
817
- if (check([0x4F, 0x54, 0x54, 0x4F, 0x00])) {
818
- return {
819
- ext: 'otf',
820
- mime: 'font/otf'
821
- };
822
- }
1185
+ this.detectionReentryCount++;
1186
+ return this.parseTokenizer(tokenizer, this.detectionReentryCount); // Skip ID3 header, recursion
1187
+ }
823
1188
 
824
- if (checkString('#!AMR')) {
825
- return {
826
- ext: 'amr',
827
- mime: 'audio/amr'
828
- };
829
- }
1189
+ // Musepack, SV7
1190
+ if (this.checkString('MP+')) {
1191
+ return {
1192
+ ext: 'mpc',
1193
+ mime: 'audio/x-musepack',
1194
+ };
1195
+ }
830
1196
 
831
- if (checkString('{\\rtf')) {
832
- return {
833
- ext: 'rtf',
834
- mime: 'application/rtf'
835
- };
836
- }
1197
+ if (
1198
+ (this.buffer[0] === 0x43 || this.buffer[0] === 0x46)
1199
+ && this.check([0x57, 0x53], {offset: 1})
1200
+ ) {
1201
+ return {
1202
+ ext: 'swf',
1203
+ mime: 'application/x-shockwave-flash',
1204
+ };
1205
+ }
837
1206
 
838
- if (check([0x46, 0x4C, 0x56, 0x01])) {
839
- return {
840
- ext: 'flv',
841
- mime: 'video/x-flv'
842
- };
843
- }
1207
+ // -- 4-byte signatures --
844
1208
 
845
- if (checkString('IMPM')) {
846
- return {
847
- ext: 'it',
848
- mime: 'audio/x-it'
849
- };
850
- }
1209
+ // Requires a sample size of 4 bytes
1210
+ if (this.check([0xFF, 0xD8, 0xFF])) {
1211
+ if (this.check([0xF7], {offset: 3})) { // JPG7/SOF55, indicating a ISO/IEC 14495 / JPEG-LS file
1212
+ return {
1213
+ ext: 'jls',
1214
+ mime: 'image/jls',
1215
+ };
1216
+ }
851
1217
 
852
- if (
853
- checkString('-lh0-', {offset: 2}) ||
854
- checkString('-lh1-', {offset: 2}) ||
855
- checkString('-lh2-', {offset: 2}) ||
856
- checkString('-lh3-', {offset: 2}) ||
857
- checkString('-lh4-', {offset: 2}) ||
858
- checkString('-lh5-', {offset: 2}) ||
859
- checkString('-lh6-', {offset: 2}) ||
860
- checkString('-lh7-', {offset: 2}) ||
861
- checkString('-lzs-', {offset: 2}) ||
862
- checkString('-lz4-', {offset: 2}) ||
863
- checkString('-lz5-', {offset: 2}) ||
864
- checkString('-lhd-', {offset: 2})
865
- ) {
866
- return {
867
- ext: 'lzh',
868
- mime: 'application/x-lzh-compressed'
869
- };
870
- }
1218
+ return {
1219
+ ext: 'jpg',
1220
+ mime: 'image/jpeg',
1221
+ };
1222
+ }
871
1223
 
872
- // MPEG program stream (PS or MPEG-PS)
873
- if (check([0x00, 0x00, 0x01, 0xBA])) {
874
- // MPEG-PS, MPEG-1 Part 1
875
- if (check([0x21], {offset: 4, mask: [0xF1]})) {
1224
+ if (this.check([0x4F, 0x62, 0x6A, 0x01])) {
876
1225
  return {
877
- ext: 'mpg', // May also be .ps, .mpeg
878
- mime: 'video/MP1S'
1226
+ ext: 'avro',
1227
+ mime: 'application/avro',
879
1228
  };
880
1229
  }
881
1230
 
882
- // MPEG-PS, MPEG-2 Part 1
883
- if (check([0x44], {offset: 4, mask: [0xC4]})) {
1231
+ if (this.checkString('FLIF')) {
884
1232
  return {
885
- ext: 'mpg', // May also be .mpg, .m2p, .vob or .sub
886
- mime: 'video/MP2P'
1233
+ ext: 'flif',
1234
+ mime: 'image/flif',
887
1235
  };
888
1236
  }
889
- }
890
1237
 
891
- if (checkString('ITSF')) {
892
- return {
893
- ext: 'chm',
894
- mime: 'application/vnd.ms-htmlhelp'
895
- };
896
- }
1238
+ if (this.checkString('8BPS')) {
1239
+ return {
1240
+ ext: 'psd',
1241
+ mime: 'image/vnd.adobe.photoshop',
1242
+ };
1243
+ }
1244
+
1245
+ // Musepack, SV8
1246
+ if (this.checkString('MPCK')) {
1247
+ return {
1248
+ ext: 'mpc',
1249
+ mime: 'audio/x-musepack',
1250
+ };
1251
+ }
1252
+
1253
+ if (this.checkString('FORM')) {
1254
+ return {
1255
+ ext: 'aif',
1256
+ mime: 'audio/aiff',
1257
+ };
1258
+ }
1259
+
1260
+ if (this.checkString('icns', {offset: 0})) {
1261
+ return {
1262
+ ext: 'icns',
1263
+ mime: 'image/icns',
1264
+ };
1265
+ }
1266
+
1267
+ // Zip-based file formats
1268
+ // Need to be before the `zip` check
1269
+ if (this.check([0x50, 0x4B, 0x3, 0x4])) { // Local file header signature
1270
+ let fileType;
1271
+ const openXmlState = createOpenXmlZipDetectionState();
1272
+
1273
+ try {
1274
+ await new ZipHandler(tokenizer).unzip(zipHeader => {
1275
+ updateOpenXmlZipDetectionStateFromFilename(openXmlState, zipHeader.filename);
1276
+
1277
+ const isOpenXmlContentTypesEntry = zipHeader.filename === '[Content_Types].xml';
1278
+ const openXmlFileTypeFromEntries = getOpenXmlFileTypeFromZipEntries(openXmlState);
1279
+ if (
1280
+ !isOpenXmlContentTypesEntry
1281
+ && openXmlFileTypeFromEntries
1282
+ ) {
1283
+ fileType = openXmlFileTypeFromEntries;
1284
+ return {
1285
+ stop: true,
1286
+ };
1287
+ }
1288
+
1289
+ switch (zipHeader.filename) {
1290
+ case 'META-INF/mozilla.rsa':
1291
+ fileType = {
1292
+ ext: 'xpi',
1293
+ mime: 'application/x-xpinstall',
1294
+ };
1295
+ return {
1296
+ stop: true,
1297
+ };
1298
+ case 'META-INF/MANIFEST.MF':
1299
+ fileType = {
1300
+ ext: 'jar',
1301
+ mime: 'application/java-archive',
1302
+ };
1303
+ return {
1304
+ stop: true,
1305
+ };
1306
+ case 'mimetype':
1307
+ if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
1308
+ return {};
1309
+ }
1310
+
1311
+ return {
1312
+ async handler(fileData) {
1313
+ // Use TextDecoder to decode the UTF-8 encoded data
1314
+ const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
1315
+ fileType = getFileTypeFromMimeType(mimeType);
1316
+ },
1317
+ stop: true,
1318
+ };
1319
+
1320
+ case '[Content_Types].xml': {
1321
+ openXmlState.hasContentTypesEntry = true;
1322
+
1323
+ if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
1324
+ openXmlState.hasUnparseableContentTypes = true;
1325
+ return {};
1326
+ }
1327
+
1328
+ openXmlState.isParsingContentTypes = true;
1329
+ return {
1330
+ async handler(fileData) {
1331
+ // Use TextDecoder to decode the UTF-8 encoded data
1332
+ const xmlContent = new TextDecoder('utf-8').decode(fileData);
1333
+ const mimeType = getOpenXmlMimeTypeFromContentTypesXml(xmlContent);
1334
+ if (mimeType) {
1335
+ fileType = getFileTypeFromMimeType(mimeType);
1336
+ }
1337
+
1338
+ openXmlState.hasParsedContentTypesEntry = true;
1339
+ openXmlState.isParsingContentTypes = false;
1340
+ },
1341
+ stop: true,
1342
+ };
1343
+ }
1344
+
1345
+ default:
1346
+ if (/classes\d*\.dex/.test(zipHeader.filename)) {
1347
+ fileType = {
1348
+ ext: 'apk',
1349
+ mime: 'application/vnd.android.package-archive',
1350
+ };
1351
+ return {stop: true};
1352
+ }
1353
+
1354
+ return {};
1355
+ }
1356
+ });
1357
+ } catch (error) {
1358
+ if (!isRecoverableZipError(error)) {
1359
+ throw error;
1360
+ }
1361
+
1362
+ if (openXmlState.isParsingContentTypes) {
1363
+ openXmlState.isParsingContentTypes = false;
1364
+ openXmlState.hasUnparseableContentTypes = true;
1365
+ }
1366
+ }
1367
+
1368
+ return fileType ?? getOpenXmlFileTypeFromZipEntries(openXmlState) ?? {
1369
+ ext: 'zip',
1370
+ mime: 'application/zip',
1371
+ };
1372
+ }
1373
+
1374
+ if (this.checkString('OggS')) {
1375
+ // This is an OGG container
1376
+ await tokenizer.ignore(28);
1377
+ const type = new Uint8Array(8);
1378
+ await tokenizer.readBuffer(type);
1379
+
1380
+ // Needs to be before `ogg` check
1381
+ if (_check(type, [0x4F, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64])) {
1382
+ return {
1383
+ ext: 'opus',
1384
+ mime: 'audio/ogg; codecs=opus',
1385
+ };
1386
+ }
1387
+
1388
+ // If ' theora' in header.
1389
+ if (_check(type, [0x80, 0x74, 0x68, 0x65, 0x6F, 0x72, 0x61])) {
1390
+ return {
1391
+ ext: 'ogv',
1392
+ mime: 'video/ogg',
1393
+ };
1394
+ }
1395
+
1396
+ // If '\x01video' in header.
1397
+ if (_check(type, [0x01, 0x76, 0x69, 0x64, 0x65, 0x6F, 0x00])) {
1398
+ return {
1399
+ ext: 'ogm',
1400
+ mime: 'video/ogg',
1401
+ };
1402
+ }
1403
+
1404
+ // If ' FLAC' in header https://xiph.org/flac/faq.html
1405
+ if (_check(type, [0x7F, 0x46, 0x4C, 0x41, 0x43])) {
1406
+ return {
1407
+ ext: 'oga',
1408
+ mime: 'audio/ogg',
1409
+ };
1410
+ }
1411
+
1412
+ // 'Speex ' in header https://en.wikipedia.org/wiki/Speex
1413
+ if (_check(type, [0x53, 0x70, 0x65, 0x65, 0x78, 0x20, 0x20])) {
1414
+ return {
1415
+ ext: 'spx',
1416
+ mime: 'audio/ogg',
1417
+ };
1418
+ }
1419
+
1420
+ // If '\x01vorbis' in header
1421
+ if (_check(type, [0x01, 0x76, 0x6F, 0x72, 0x62, 0x69, 0x73])) {
1422
+ return {
1423
+ ext: 'ogg',
1424
+ mime: 'audio/ogg',
1425
+ };
1426
+ }
1427
+
1428
+ // Default OGG container https://www.iana.org/assignments/media-types/application/ogg
1429
+ return {
1430
+ ext: 'ogx',
1431
+ mime: 'application/ogg',
1432
+ };
1433
+ }
1434
+
1435
+ if (
1436
+ this.check([0x50, 0x4B])
1437
+ && (this.buffer[2] === 0x3 || this.buffer[2] === 0x5 || this.buffer[2] === 0x7)
1438
+ && (this.buffer[3] === 0x4 || this.buffer[3] === 0x6 || this.buffer[3] === 0x8)
1439
+ ) {
1440
+ return {
1441
+ ext: 'zip',
1442
+ mime: 'application/zip',
1443
+ };
1444
+ }
1445
+
1446
+ if (this.checkString('MThd')) {
1447
+ return {
1448
+ ext: 'mid',
1449
+ mime: 'audio/midi',
1450
+ };
1451
+ }
1452
+
1453
+ if (
1454
+ this.checkString('wOFF')
1455
+ && (
1456
+ this.check([0x00, 0x01, 0x00, 0x00], {offset: 4})
1457
+ || this.checkString('OTTO', {offset: 4})
1458
+ )
1459
+ ) {
1460
+ return {
1461
+ ext: 'woff',
1462
+ mime: 'font/woff',
1463
+ };
1464
+ }
1465
+
1466
+ if (
1467
+ this.checkString('wOF2')
1468
+ && (
1469
+ this.check([0x00, 0x01, 0x00, 0x00], {offset: 4})
1470
+ || this.checkString('OTTO', {offset: 4})
1471
+ )
1472
+ ) {
1473
+ return {
1474
+ ext: 'woff2',
1475
+ mime: 'font/woff2',
1476
+ };
1477
+ }
1478
+
1479
+ if (this.check([0xD4, 0xC3, 0xB2, 0xA1]) || this.check([0xA1, 0xB2, 0xC3, 0xD4])) {
1480
+ return {
1481
+ ext: 'pcap',
1482
+ mime: 'application/vnd.tcpdump.pcap',
1483
+ };
1484
+ }
1485
+
1486
+ // Sony DSD Stream File (DSF)
1487
+ if (this.checkString('DSD ')) {
1488
+ return {
1489
+ ext: 'dsf',
1490
+ mime: 'audio/x-dsf', // Non-standard
1491
+ };
1492
+ }
1493
+
1494
+ if (this.checkString('LZIP')) {
1495
+ return {
1496
+ ext: 'lz',
1497
+ mime: 'application/x-lzip',
1498
+ };
1499
+ }
1500
+
1501
+ if (this.checkString('fLaC')) {
1502
+ return {
1503
+ ext: 'flac',
1504
+ mime: 'audio/flac',
1505
+ };
1506
+ }
1507
+
1508
+ if (this.check([0x42, 0x50, 0x47, 0xFB])) {
1509
+ return {
1510
+ ext: 'bpg',
1511
+ mime: 'image/bpg',
1512
+ };
1513
+ }
1514
+
1515
+ if (this.checkString('wvpk')) {
1516
+ return {
1517
+ ext: 'wv',
1518
+ mime: 'audio/wavpack',
1519
+ };
1520
+ }
1521
+
1522
+ if (this.checkString('%PDF')) {
1523
+ // Assume this is just a normal PDF
1524
+ return {
1525
+ ext: 'pdf',
1526
+ mime: 'application/pdf',
1527
+ };
1528
+ }
1529
+
1530
+ if (this.check([0x00, 0x61, 0x73, 0x6D])) {
1531
+ return {
1532
+ ext: 'wasm',
1533
+ mime: 'application/wasm',
1534
+ };
1535
+ }
1536
+
1537
+ // TIFF, little-endian type
1538
+ if (this.check([0x49, 0x49])) {
1539
+ const fileType = await this.readTiffHeader(false);
1540
+ if (fileType) {
1541
+ return fileType;
1542
+ }
1543
+ }
1544
+
1545
+ // TIFF, big-endian type
1546
+ if (this.check([0x4D, 0x4D])) {
1547
+ const fileType = await this.readTiffHeader(true);
1548
+ if (fileType) {
1549
+ return fileType;
1550
+ }
1551
+ }
1552
+
1553
+ if (this.checkString('MAC ')) {
1554
+ return {
1555
+ ext: 'ape',
1556
+ mime: 'audio/ape',
1557
+ };
1558
+ }
1559
+
1560
+ // https://github.com/file/file/blob/master/magic/Magdir/matroska
1561
+ if (this.check([0x1A, 0x45, 0xDF, 0xA3])) { // Root element: EBML
1562
+ async function readField() {
1563
+ const msb = await tokenizer.peekNumber(Token.UINT8);
1564
+ let mask = 0x80;
1565
+ let ic = 0; // 0 = A, 1 = B, 2 = C, 3 = D
1566
+
1567
+ while ((msb & mask) === 0 && mask !== 0) {
1568
+ ++ic;
1569
+ mask >>= 1;
1570
+ }
1571
+
1572
+ const id = new Uint8Array(ic + 1);
1573
+ await safeReadBuffer(tokenizer, id, undefined, {
1574
+ maximumLength: id.length,
1575
+ reason: 'EBML field',
1576
+ });
1577
+ return id;
1578
+ }
1579
+
1580
+ async function readElement() {
1581
+ const idField = await readField();
1582
+ const lengthField = await readField();
1583
+
1584
+ lengthField[0] ^= 0x80 >> (lengthField.length - 1);
1585
+ const nrLength = Math.min(6, lengthField.length); // JavaScript can max read 6 bytes integer
1586
+
1587
+ const idView = new DataView(idField.buffer);
1588
+ const lengthView = new DataView(lengthField.buffer, lengthField.length - nrLength, nrLength);
1589
+
1590
+ return {
1591
+ id: getUintBE(idView),
1592
+ len: getUintBE(lengthView),
1593
+ };
1594
+ }
1595
+
1596
+ async function readChildren(children) {
1597
+ let ebmlElementCount = 0;
1598
+ while (children > 0) {
1599
+ ebmlElementCount++;
1600
+ if (ebmlElementCount > maximumEbmlElementCount) {
1601
+ return;
1602
+ }
1603
+
1604
+ if (hasExceededUnknownSizeScanBudget(tokenizer, ebmlScanStart, maximumUntrustedSkipSizeInBytes)) {
1605
+ return;
1606
+ }
1607
+
1608
+ const previousPosition = tokenizer.position;
1609
+ const element = await readElement();
1610
+
1611
+ if (element.id === 0x42_82) {
1612
+ // `DocType` is a short string ("webm", "matroska", ...), reject implausible lengths to avoid large allocations.
1613
+ if (element.len > maximumEbmlDocumentTypeSizeInBytes) {
1614
+ return;
1615
+ }
1616
+
1617
+ const documentTypeLength = getSafeBound(element.len, maximumEbmlDocumentTypeSizeInBytes, 'EBML DocType');
1618
+ const rawValue = await tokenizer.readToken(new Token.StringType(documentTypeLength));
1619
+ return rawValue.replaceAll(/\00.*$/g, ''); // Return DocType
1620
+ }
1621
+
1622
+ if (
1623
+ hasUnknownFileSize(tokenizer)
1624
+ && (
1625
+ !Number.isFinite(element.len)
1626
+ || element.len < 0
1627
+ || element.len > maximumEbmlElementPayloadSizeInBytes
1628
+ )
1629
+ ) {
1630
+ return;
1631
+ }
1632
+
1633
+ await safeIgnore(tokenizer, element.len, {
1634
+ maximumLength: hasUnknownFileSize(tokenizer) ? maximumEbmlElementPayloadSizeInBytes : tokenizer.fileInfo.size,
1635
+ reason: 'EBML payload',
1636
+ }); // ignore payload
1637
+ --children;
1638
+
1639
+ // Safeguard against malformed files: bail if the position did not advance.
1640
+ if (tokenizer.position <= previousPosition) {
1641
+ return;
1642
+ }
1643
+ }
1644
+ }
1645
+
1646
+ const rootElement = await readElement();
1647
+ const ebmlScanStart = tokenizer.position;
1648
+ const documentType = await readChildren(rootElement.len);
1649
+
1650
+ switch (documentType) {
1651
+ case 'webm':
1652
+ return {
1653
+ ext: 'webm',
1654
+ mime: 'video/webm',
1655
+ };
1656
+
1657
+ case 'matroska':
1658
+ return {
1659
+ ext: 'mkv',
1660
+ mime: 'video/matroska',
1661
+ };
1662
+
1663
+ default:
1664
+ return;
1665
+ }
1666
+ }
1667
+
1668
+ if (this.checkString('SQLi')) {
1669
+ return {
1670
+ ext: 'sqlite',
1671
+ mime: 'application/x-sqlite3',
1672
+ };
1673
+ }
1674
+
1675
+ if (this.check([0x4E, 0x45, 0x53, 0x1A])) {
1676
+ return {
1677
+ ext: 'nes',
1678
+ mime: 'application/x-nintendo-nes-rom',
1679
+ };
1680
+ }
1681
+
1682
+ if (this.checkString('Cr24')) {
1683
+ return {
1684
+ ext: 'crx',
1685
+ mime: 'application/x-google-chrome-extension',
1686
+ };
1687
+ }
1688
+
1689
+ if (
1690
+ this.checkString('MSCF')
1691
+ || this.checkString('ISc(')
1692
+ ) {
1693
+ return {
1694
+ ext: 'cab',
1695
+ mime: 'application/vnd.ms-cab-compressed',
1696
+ };
1697
+ }
1698
+
1699
+ if (this.check([0xED, 0xAB, 0xEE, 0xDB])) {
1700
+ return {
1701
+ ext: 'rpm',
1702
+ mime: 'application/x-rpm',
1703
+ };
1704
+ }
1705
+
1706
+ if (this.check([0xC5, 0xD0, 0xD3, 0xC6])) {
1707
+ return {
1708
+ ext: 'eps',
1709
+ mime: 'application/eps',
1710
+ };
1711
+ }
1712
+
1713
+ if (this.check([0x28, 0xB5, 0x2F, 0xFD])) {
1714
+ return {
1715
+ ext: 'zst',
1716
+ mime: 'application/zstd',
1717
+ };
1718
+ }
1719
+
1720
+ if (this.check([0x7F, 0x45, 0x4C, 0x46])) {
1721
+ return {
1722
+ ext: 'elf',
1723
+ mime: 'application/x-elf',
1724
+ };
1725
+ }
1726
+
1727
+ if (this.check([0x21, 0x42, 0x44, 0x4E])) {
1728
+ return {
1729
+ ext: 'pst',
1730
+ mime: 'application/vnd.ms-outlook',
1731
+ };
1732
+ }
1733
+
1734
+ if (this.checkString('PAR1') || this.checkString('PARE')) {
1735
+ return {
1736
+ ext: 'parquet',
1737
+ mime: 'application/vnd.apache.parquet',
1738
+ };
1739
+ }
1740
+
1741
+ if (this.checkString('ttcf')) {
1742
+ return {
1743
+ ext: 'ttc',
1744
+ mime: 'font/collection',
1745
+ };
1746
+ }
1747
+
1748
+ if (
1749
+ this.check([0xFE, 0xED, 0xFA, 0xCE]) // 32-bit, big-endian
1750
+ || this.check([0xFE, 0xED, 0xFA, 0xCF]) // 64-bit, big-endian
1751
+ || this.check([0xCE, 0xFA, 0xED, 0xFE]) // 32-bit, little-endian
1752
+ || this.check([0xCF, 0xFA, 0xED, 0xFE]) // 64-bit, little-endian
1753
+ ) {
1754
+ return {
1755
+ ext: 'macho',
1756
+ mime: 'application/x-mach-binary',
1757
+ };
1758
+ }
1759
+
1760
+ if (this.check([0x04, 0x22, 0x4D, 0x18])) {
1761
+ return {
1762
+ ext: 'lz4',
1763
+ mime: 'application/x-lz4', // Invented by us
1764
+ };
1765
+ }
1766
+
1767
+ if (this.checkString('regf')) {
1768
+ return {
1769
+ ext: 'dat',
1770
+ mime: 'application/x-ft-windows-registry-hive',
1771
+ };
1772
+ }
1773
+
1774
+ // SPSS Statistical Data File
1775
+ if (this.checkString('$FL2') || this.checkString('$FL3')) {
1776
+ return {
1777
+ ext: 'sav',
1778
+ mime: 'application/x-spss-sav',
1779
+ };
1780
+ }
1781
+
1782
+ // -- 5-byte signatures --
1783
+
1784
+ if (this.check([0x4F, 0x54, 0x54, 0x4F, 0x00])) {
1785
+ return {
1786
+ ext: 'otf',
1787
+ mime: 'font/otf',
1788
+ };
1789
+ }
1790
+
1791
+ if (this.checkString('#!AMR')) {
1792
+ return {
1793
+ ext: 'amr',
1794
+ mime: 'audio/amr',
1795
+ };
1796
+ }
1797
+
1798
+ if (this.checkString('{\\rtf')) {
1799
+ return {
1800
+ ext: 'rtf',
1801
+ mime: 'application/rtf',
1802
+ };
1803
+ }
1804
+
1805
+ if (this.check([0x46, 0x4C, 0x56, 0x01])) {
1806
+ return {
1807
+ ext: 'flv',
1808
+ mime: 'video/x-flv',
1809
+ };
1810
+ }
1811
+
1812
+ if (this.checkString('IMPM')) {
1813
+ return {
1814
+ ext: 'it',
1815
+ mime: 'audio/x-it',
1816
+ };
1817
+ }
1818
+
1819
+ if (
1820
+ this.checkString('-lh0-', {offset: 2})
1821
+ || this.checkString('-lh1-', {offset: 2})
1822
+ || this.checkString('-lh2-', {offset: 2})
1823
+ || this.checkString('-lh3-', {offset: 2})
1824
+ || this.checkString('-lh4-', {offset: 2})
1825
+ || this.checkString('-lh5-', {offset: 2})
1826
+ || this.checkString('-lh6-', {offset: 2})
1827
+ || this.checkString('-lh7-', {offset: 2})
1828
+ || this.checkString('-lzs-', {offset: 2})
1829
+ || this.checkString('-lz4-', {offset: 2})
1830
+ || this.checkString('-lz5-', {offset: 2})
1831
+ || this.checkString('-lhd-', {offset: 2})
1832
+ ) {
1833
+ return {
1834
+ ext: 'lzh',
1835
+ mime: 'application/x-lzh-compressed',
1836
+ };
1837
+ }
1838
+
1839
+ // MPEG program stream (PS or MPEG-PS)
1840
+ if (this.check([0x00, 0x00, 0x01, 0xBA])) {
1841
+ // MPEG-PS, MPEG-1 Part 1
1842
+ if (this.check([0x21], {offset: 4, mask: [0xF1]})) {
1843
+ return {
1844
+ ext: 'mpg', // May also be .ps, .mpeg
1845
+ mime: 'video/MP1S',
1846
+ };
1847
+ }
1848
+
1849
+ // MPEG-PS, MPEG-2 Part 1
1850
+ if (this.check([0x44], {offset: 4, mask: [0xC4]})) {
1851
+ return {
1852
+ ext: 'mpg', // May also be .mpg, .m2p, .vob or .sub
1853
+ mime: 'video/MP2P',
1854
+ };
1855
+ }
1856
+ }
1857
+
1858
+ if (this.checkString('ITSF')) {
1859
+ return {
1860
+ ext: 'chm',
1861
+ mime: 'application/vnd.ms-htmlhelp',
1862
+ };
1863
+ }
1864
+
1865
+ if (this.check([0xCA, 0xFE, 0xBA, 0xBE])) {
1866
+ // Java bytecode and Mach-O universal binaries have the same magic number.
1867
+ // We disambiguate based on the next 4 bytes, as done by `file`.
1868
+ // See https://github.com/file/file/blob/master/magic/Magdir/cafebabe
1869
+ const machOArchitectureCount = Token.UINT32_BE.get(this.buffer, 4);
1870
+ const javaClassFileMajorVersion = Token.UINT16_BE.get(this.buffer, 6);
1871
+
1872
+ if (machOArchitectureCount > 0 && machOArchitectureCount <= 30) {
1873
+ return {
1874
+ ext: 'macho',
1875
+ mime: 'application/x-mach-binary',
1876
+ };
1877
+ }
1878
+
1879
+ if (javaClassFileMajorVersion > 30) {
1880
+ return {
1881
+ ext: 'class',
1882
+ mime: 'application/java-vm',
1883
+ };
1884
+ }
1885
+ }
1886
+
1887
+ if (this.checkString('.RMF')) {
1888
+ return {
1889
+ ext: 'rm',
1890
+ mime: 'application/vnd.rn-realmedia',
1891
+ };
1892
+ }
1893
+
1894
+ // -- 5-byte signatures --
1895
+
1896
+ if (this.checkString('DRACO')) {
1897
+ return {
1898
+ ext: 'drc',
1899
+ mime: 'application/vnd.google.draco', // Invented by us
1900
+ };
1901
+ }
1902
+
1903
+ // -- 6-byte signatures --
1904
+
1905
+ if (this.check([0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00])) {
1906
+ return {
1907
+ ext: 'xz',
1908
+ mime: 'application/x-xz',
1909
+ };
1910
+ }
1911
+
1912
+ if (this.checkString('<?xml ')) {
1913
+ return {
1914
+ ext: 'xml',
1915
+ mime: 'application/xml',
1916
+ };
1917
+ }
1918
+
1919
+ if (this.check([0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C])) {
1920
+ return {
1921
+ ext: '7z',
1922
+ mime: 'application/x-7z-compressed',
1923
+ };
1924
+ }
1925
+
1926
+ if (
1927
+ this.check([0x52, 0x61, 0x72, 0x21, 0x1A, 0x7])
1928
+ && (this.buffer[6] === 0x0 || this.buffer[6] === 0x1)
1929
+ ) {
1930
+ return {
1931
+ ext: 'rar',
1932
+ mime: 'application/x-rar-compressed',
1933
+ };
1934
+ }
1935
+
1936
+ if (this.checkString('solid ')) {
1937
+ return {
1938
+ ext: 'stl',
1939
+ mime: 'model/stl',
1940
+ };
1941
+ }
1942
+
1943
+ if (this.checkString('AC')) {
1944
+ const version = new Token.StringType(4, 'latin1').get(this.buffer, 2);
1945
+ if (version.match('^d*') && version >= 1000 && version <= 1050) {
1946
+ return {
1947
+ ext: 'dwg',
1948
+ mime: 'image/vnd.dwg',
1949
+ };
1950
+ }
1951
+ }
1952
+
1953
+ if (this.checkString('070707')) {
1954
+ return {
1955
+ ext: 'cpio',
1956
+ mime: 'application/x-cpio',
1957
+ };
1958
+ }
897
1959
 
898
- // -- 6-byte signatures --
1960
+ // -- 7-byte signatures --
899
1961
 
900
- if (check([0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00])) {
901
- return {
902
- ext: 'xz',
903
- mime: 'application/x-xz'
904
- };
905
- }
1962
+ if (this.checkString('BLENDER')) {
1963
+ return {
1964
+ ext: 'blend',
1965
+ mime: 'application/x-blender',
1966
+ };
1967
+ }
906
1968
 
907
- if (checkString('<?xml ')) {
908
- return {
909
- ext: 'xml',
910
- mime: 'application/xml'
911
- };
912
- }
1969
+ if (this.checkString('!<arch>')) {
1970
+ await tokenizer.ignore(8);
1971
+ const string = await tokenizer.readToken(new Token.StringType(13, 'ascii'));
1972
+ if (string === 'debian-binary') {
1973
+ return {
1974
+ ext: 'deb',
1975
+ mime: 'application/x-deb',
1976
+ };
1977
+ }
913
1978
 
914
- if (check([0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C])) {
915
- return {
916
- ext: '7z',
917
- mime: 'application/x-7z-compressed'
918
- };
919
- }
1979
+ return {
1980
+ ext: 'ar',
1981
+ mime: 'application/x-unix-archive',
1982
+ };
1983
+ }
920
1984
 
921
- if (
922
- check([0x52, 0x61, 0x72, 0x21, 0x1A, 0x7]) &&
923
- (buffer[6] === 0x0 || buffer[6] === 0x1)
924
- ) {
925
- return {
926
- ext: 'rar',
927
- mime: 'application/x-rar-compressed'
928
- };
929
- }
1985
+ if (
1986
+ this.checkString('WEBVTT')
1987
+ && (
1988
+ // One of LF, CR, tab, space, or end of file must follow "WEBVTT" per the spec (see `fixture/fixture-vtt-*.vtt` for examples). Note that `\0` is technically the null character (there is no such thing as an EOF character). However, checking for `\0` gives us the same result as checking for the end of the stream.
1989
+ (['\n', '\r', '\t', ' ', '\0'].some(char7 => this.checkString(char7, {offset: 6}))))
1990
+ ) {
1991
+ return {
1992
+ ext: 'vtt',
1993
+ mime: 'text/vtt',
1994
+ };
1995
+ }
930
1996
 
931
- if (checkString('solid ')) {
932
- return {
933
- ext: 'stl',
934
- mime: 'model/stl'
935
- };
936
- }
1997
+ // -- 8-byte signatures --
1998
+
1999
+ if (this.check([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])) {
2000
+ const pngFileType = {
2001
+ ext: 'png',
2002
+ mime: 'image/png',
2003
+ };
2004
+
2005
+ const apngFileType = {
2006
+ ext: 'apng',
2007
+ mime: 'image/apng',
2008
+ };
2009
+
2010
+ // APNG format (https://wiki.mozilla.org/APNG_Specification)
2011
+ // 1. Find the first IDAT (image data) chunk (49 44 41 54)
2012
+ // 2. Check if there is an "acTL" chunk before the IDAT one (61 63 54 4C)
2013
+
2014
+ // Offset calculated as follows:
2015
+ // - 8 bytes: PNG signature
2016
+ // - 4 (length) + 4 (chunk type) + 13 (chunk data) + 4 (CRC): IHDR chunk
2017
+
2018
+ await tokenizer.ignore(8); // ignore PNG signature
2019
+
2020
+ async function readChunkHeader() {
2021
+ return {
2022
+ length: await tokenizer.readToken(Token.INT32_BE),
2023
+ type: await tokenizer.readToken(new Token.StringType(4, 'latin1')),
2024
+ };
2025
+ }
2026
+
2027
+ const isUnknownPngStream = hasUnknownFileSize(tokenizer);
2028
+ const pngScanStart = tokenizer.position;
2029
+ let pngChunkCount = 0;
2030
+ let hasSeenImageHeader = false;
2031
+ do {
2032
+ pngChunkCount++;
2033
+ if (pngChunkCount > maximumPngChunkCount) {
2034
+ break;
2035
+ }
2036
+
2037
+ if (hasExceededUnknownSizeScanBudget(tokenizer, pngScanStart, maximumPngStreamScanBudgetInBytes)) {
2038
+ break;
2039
+ }
2040
+
2041
+ const previousPosition = tokenizer.position;
2042
+ const chunk = await readChunkHeader();
2043
+ if (chunk.length < 0) {
2044
+ return; // Invalid chunk length
2045
+ }
2046
+
2047
+ if (chunk.type === 'IHDR') {
2048
+ // PNG requires the first real image header to be a 13-byte IHDR chunk.
2049
+ if (chunk.length !== 13) {
2050
+ return;
2051
+ }
2052
+
2053
+ hasSeenImageHeader = true;
2054
+ }
2055
+
2056
+ switch (chunk.type) {
2057
+ case 'IDAT':
2058
+ return pngFileType;
2059
+ case 'acTL':
2060
+ return apngFileType;
2061
+ default:
2062
+ if (
2063
+ !hasSeenImageHeader
2064
+ && chunk.type !== 'CgBI'
2065
+ ) {
2066
+ return;
2067
+ }
2068
+
2069
+ if (
2070
+ isUnknownPngStream
2071
+ && chunk.length > maximumPngChunkSizeInBytes
2072
+ ) {
2073
+ // Avoid huge attacker-controlled skips when probing unknown-size streams.
2074
+ return hasSeenImageHeader && isPngAncillaryChunk(chunk.type) ? pngFileType : undefined;
2075
+ }
2076
+
2077
+ try {
2078
+ await safeIgnore(tokenizer, chunk.length + 4, {
2079
+ maximumLength: isUnknownPngStream ? maximumPngChunkSizeInBytes + 4 : tokenizer.fileInfo.size,
2080
+ reason: 'PNG chunk payload',
2081
+ }); // Ignore chunk-data + CRC
2082
+ } catch (error) {
2083
+ if (
2084
+ !isUnknownPngStream
2085
+ && (
2086
+ error instanceof ParserHardLimitError
2087
+ || error instanceof strtok3.EndOfStreamError
2088
+ )
2089
+ ) {
2090
+ return pngFileType;
2091
+ }
2092
+
2093
+ throw error;
2094
+ }
2095
+ }
2096
+
2097
+ // Safeguard against malformed files: bail if the position did not advance.
2098
+ if (tokenizer.position <= previousPosition) {
2099
+ break;
2100
+ }
2101
+ } while (tokenizer.position + 8 < tokenizer.fileInfo.size);
2102
+
2103
+ return pngFileType;
2104
+ }
2105
+
2106
+ if (this.check([0x41, 0x52, 0x52, 0x4F, 0x57, 0x31, 0x00, 0x00])) {
2107
+ return {
2108
+ ext: 'arrow',
2109
+ mime: 'application/vnd.apache.arrow.file',
2110
+ };
2111
+ }
2112
+
2113
+ if (this.check([0x67, 0x6C, 0x54, 0x46, 0x02, 0x00, 0x00, 0x00])) {
2114
+ return {
2115
+ ext: 'glb',
2116
+ mime: 'model/gltf-binary',
2117
+ };
2118
+ }
2119
+
2120
+ // `mov` format variants
2121
+ if (
2122
+ this.check([0x66, 0x72, 0x65, 0x65], {offset: 4}) // `free`
2123
+ || this.check([0x6D, 0x64, 0x61, 0x74], {offset: 4}) // `mdat` MJPEG
2124
+ || this.check([0x6D, 0x6F, 0x6F, 0x76], {offset: 4}) // `moov`
2125
+ || this.check([0x77, 0x69, 0x64, 0x65], {offset: 4}) // `wide`
2126
+ ) {
2127
+ return {
2128
+ ext: 'mov',
2129
+ mime: 'video/quicktime',
2130
+ };
2131
+ }
2132
+
2133
+ // -- 9-byte signatures --
2134
+
2135
+ if (this.check([0x49, 0x49, 0x52, 0x4F, 0x08, 0x00, 0x00, 0x00, 0x18])) {
2136
+ return {
2137
+ ext: 'orf',
2138
+ mime: 'image/x-olympus-orf',
2139
+ };
2140
+ }
2141
+
2142
+ if (this.checkString('gimp xcf ')) {
2143
+ return {
2144
+ ext: 'xcf',
2145
+ mime: 'image/x-xcf',
2146
+ };
2147
+ }
2148
+
2149
+ // File Type Box (https://en.wikipedia.org/wiki/ISO_base_media_file_format)
2150
+ // It's not required to be first, but it's recommended to be. Almost all ISO base media files start with `ftyp` box.
2151
+ // `ftyp` box must contain a brand major identifier, which must consist of ISO 8859-1 printable characters.
2152
+ // Here we check for 8859-1 printable characters (for simplicity, it's a mask which also catches one non-printable character).
2153
+ if (
2154
+ this.checkString('ftyp', {offset: 4})
2155
+ && (this.buffer[8] & 0x60) !== 0x00 // Brand major, first character ASCII?
2156
+ ) {
2157
+ // They all can have MIME `video/mp4` except `application/mp4` special-case which is hard to detect.
2158
+ // For some cases, we're specific, everything else falls to `video/mp4` with `mp4` extension.
2159
+ const brandMajor = new Token.StringType(4, 'latin1').get(this.buffer, 8).replace('\0', ' ').trim();
2160
+ switch (brandMajor) {
2161
+ case 'avif':
2162
+ case 'avis':
2163
+ return {ext: 'avif', mime: 'image/avif'};
2164
+ case 'mif1':
2165
+ return {ext: 'heic', mime: 'image/heif'};
2166
+ case 'msf1':
2167
+ return {ext: 'heic', mime: 'image/heif-sequence'};
2168
+ case 'heic':
2169
+ case 'heix':
2170
+ return {ext: 'heic', mime: 'image/heic'};
2171
+ case 'hevc':
2172
+ case 'hevx':
2173
+ return {ext: 'heic', mime: 'image/heic-sequence'};
2174
+ case 'qt':
2175
+ return {ext: 'mov', mime: 'video/quicktime'};
2176
+ case 'M4V':
2177
+ case 'M4VH':
2178
+ case 'M4VP':
2179
+ return {ext: 'm4v', mime: 'video/x-m4v'};
2180
+ case 'M4P':
2181
+ return {ext: 'm4p', mime: 'video/mp4'};
2182
+ case 'M4B':
2183
+ return {ext: 'm4b', mime: 'audio/mp4'};
2184
+ case 'M4A':
2185
+ return {ext: 'm4a', mime: 'audio/x-m4a'};
2186
+ case 'F4V':
2187
+ return {ext: 'f4v', mime: 'video/mp4'};
2188
+ case 'F4P':
2189
+ return {ext: 'f4p', mime: 'video/mp4'};
2190
+ case 'F4A':
2191
+ return {ext: 'f4a', mime: 'audio/mp4'};
2192
+ case 'F4B':
2193
+ return {ext: 'f4b', mime: 'audio/mp4'};
2194
+ case 'crx':
2195
+ return {ext: 'cr3', mime: 'image/x-canon-cr3'};
2196
+ default:
2197
+ if (brandMajor.startsWith('3g')) {
2198
+ if (brandMajor.startsWith('3g2')) {
2199
+ return {ext: '3g2', mime: 'video/3gpp2'};
2200
+ }
2201
+
2202
+ return {ext: '3gp', mime: 'video/3gpp'};
2203
+ }
2204
+
2205
+ return {ext: 'mp4', mime: 'video/mp4'};
2206
+ }
2207
+ }
2208
+
2209
+ // -- 10-byte signatures --
2210
+
2211
+ if (this.checkString('REGEDIT4\r\n')) {
2212
+ return {
2213
+ ext: 'reg',
2214
+ mime: 'application/x-ms-regedit',
2215
+ };
2216
+ }
2217
+
2218
+ // -- 12-byte signatures --
2219
+
2220
+ // RIFF file format which might be AVI, WAV, QCP, etc
2221
+ if (this.check([0x52, 0x49, 0x46, 0x46])) {
2222
+ if (this.checkString('WEBP', {offset: 8})) {
2223
+ return {
2224
+ ext: 'webp',
2225
+ mime: 'image/webp',
2226
+ };
2227
+ }
2228
+
2229
+ if (this.check([0x41, 0x56, 0x49], {offset: 8})) {
2230
+ return {
2231
+ ext: 'avi',
2232
+ mime: 'video/vnd.avi',
2233
+ };
2234
+ }
2235
+
2236
+ if (this.check([0x57, 0x41, 0x56, 0x45], {offset: 8})) {
2237
+ return {
2238
+ ext: 'wav',
2239
+ mime: 'audio/wav',
2240
+ };
2241
+ }
2242
+
2243
+ // QLCM, QCP file
2244
+ if (this.check([0x51, 0x4C, 0x43, 0x4D], {offset: 8})) {
2245
+ return {
2246
+ ext: 'qcp',
2247
+ mime: 'audio/qcelp',
2248
+ };
2249
+ }
2250
+ }
2251
+
2252
+ if (this.check([0x49, 0x49, 0x55, 0x00, 0x18, 0x00, 0x00, 0x00, 0x88, 0xE7, 0x74, 0xD8])) {
2253
+ return {
2254
+ ext: 'rw2',
2255
+ mime: 'image/x-panasonic-rw2',
2256
+ };
2257
+ }
2258
+
2259
+ // ASF_Header_Object first 80 bytes
2260
+ if (this.check([0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11, 0xA6, 0xD9])) {
2261
+ let isMalformedAsf = false;
2262
+ try {
2263
+ async function readHeader() {
2264
+ const guid = new Uint8Array(16);
2265
+ await safeReadBuffer(tokenizer, guid, undefined, {
2266
+ maximumLength: guid.length,
2267
+ reason: 'ASF header GUID',
2268
+ });
2269
+ return {
2270
+ id: guid,
2271
+ size: Number(await tokenizer.readToken(Token.UINT64_LE)),
2272
+ };
2273
+ }
2274
+
2275
+ await safeIgnore(tokenizer, 30, {
2276
+ maximumLength: 30,
2277
+ reason: 'ASF header prelude',
2278
+ });
2279
+ const isUnknownFileSize = hasUnknownFileSize(tokenizer);
2280
+ const asfHeaderScanStart = tokenizer.position;
2281
+ let asfHeaderObjectCount = 0;
2282
+ while (tokenizer.position + 24 < tokenizer.fileInfo.size) {
2283
+ asfHeaderObjectCount++;
2284
+ if (asfHeaderObjectCount > maximumAsfHeaderObjectCount) {
2285
+ break;
2286
+ }
2287
+
2288
+ if (hasExceededUnknownSizeScanBudget(tokenizer, asfHeaderScanStart, maximumUntrustedSkipSizeInBytes)) {
2289
+ break;
2290
+ }
2291
+
2292
+ const previousPosition = tokenizer.position;
2293
+ const header = await readHeader();
2294
+ let payload = header.size - 24;
2295
+ if (
2296
+ !Number.isFinite(payload)
2297
+ || payload < 0
2298
+ ) {
2299
+ isMalformedAsf = true;
2300
+ break;
2301
+ }
2302
+
2303
+ if (_check(header.id, [0x91, 0x07, 0xDC, 0xB7, 0xB7, 0xA9, 0xCF, 0x11, 0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65])) {
2304
+ // Sync on Stream-Properties-Object (B7DC0791-A9B7-11CF-8EE6-00C00C205365)
2305
+ const typeId = new Uint8Array(16);
2306
+ payload -= await safeReadBuffer(tokenizer, typeId, undefined, {
2307
+ maximumLength: typeId.length,
2308
+ reason: 'ASF stream type GUID',
2309
+ });
2310
+
2311
+ if (_check(typeId, [0x40, 0x9E, 0x69, 0xF8, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
2312
+ // Found audio:
2313
+ return {
2314
+ ext: 'asf',
2315
+ mime: 'audio/x-ms-asf',
2316
+ };
2317
+ }
2318
+
2319
+ if (_check(typeId, [0xC0, 0xEF, 0x19, 0xBC, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
2320
+ // Found video:
2321
+ return {
2322
+ ext: 'asf',
2323
+ mime: 'video/x-ms-asf',
2324
+ };
2325
+ }
2326
+
2327
+ break;
2328
+ }
2329
+
2330
+ if (
2331
+ isUnknownFileSize
2332
+ && payload > maximumAsfHeaderPayloadSizeInBytes
2333
+ ) {
2334
+ isMalformedAsf = true;
2335
+ break;
2336
+ }
2337
+
2338
+ await safeIgnore(tokenizer, payload, {
2339
+ maximumLength: isUnknownFileSize ? maximumAsfHeaderPayloadSizeInBytes : tokenizer.fileInfo.size,
2340
+ reason: 'ASF header payload',
2341
+ });
2342
+
2343
+ // Safeguard against malformed files: break if the position did not advance.
2344
+ if (tokenizer.position <= previousPosition) {
2345
+ isMalformedAsf = true;
2346
+ break;
2347
+ }
2348
+ }
2349
+ } catch (error) {
2350
+ if (
2351
+ error instanceof strtok3.EndOfStreamError
2352
+ || error instanceof ParserHardLimitError
2353
+ ) {
2354
+ if (hasUnknownFileSize(tokenizer)) {
2355
+ isMalformedAsf = true;
2356
+ }
2357
+ } else {
2358
+ throw error;
2359
+ }
2360
+ }
2361
+
2362
+ if (isMalformedAsf) {
2363
+ return;
2364
+ }
2365
+
2366
+ // Default to ASF generic extension
2367
+ return {
2368
+ ext: 'asf',
2369
+ mime: 'application/vnd.ms-asf',
2370
+ };
2371
+ }
2372
+
2373
+ if (this.check([0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A])) {
2374
+ return {
2375
+ ext: 'ktx',
2376
+ mime: 'image/ktx',
2377
+ };
2378
+ }
2379
+
2380
+ if ((this.check([0x7E, 0x10, 0x04]) || this.check([0x7E, 0x18, 0x04])) && this.check([0x30, 0x4D, 0x49, 0x45], {offset: 4})) {
2381
+ return {
2382
+ ext: 'mie',
2383
+ mime: 'application/x-mie',
2384
+ };
2385
+ }
2386
+
2387
+ if (this.check([0x27, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], {offset: 2})) {
2388
+ return {
2389
+ ext: 'shp',
2390
+ mime: 'application/x-esri-shape',
2391
+ };
2392
+ }
2393
+
2394
+ if (this.check([0xFF, 0x4F, 0xFF, 0x51])) {
2395
+ return {
2396
+ ext: 'j2c',
2397
+ mime: 'image/j2c',
2398
+ };
2399
+ }
2400
+
2401
+ if (this.check([0x00, 0x00, 0x00, 0x0C, 0x6A, 0x50, 0x20, 0x20, 0x0D, 0x0A, 0x87, 0x0A])) {
2402
+ // JPEG-2000 family
2403
+
2404
+ await tokenizer.ignore(20);
2405
+ const type = await tokenizer.readToken(new Token.StringType(4, 'ascii'));
2406
+ switch (type) {
2407
+ case 'jp2 ':
2408
+ return {
2409
+ ext: 'jp2',
2410
+ mime: 'image/jp2',
2411
+ };
2412
+ case 'jpx ':
2413
+ return {
2414
+ ext: 'jpx',
2415
+ mime: 'image/jpx',
2416
+ };
2417
+ case 'jpm ':
2418
+ return {
2419
+ ext: 'jpm',
2420
+ mime: 'image/jpm',
2421
+ };
2422
+ case 'mjp2':
2423
+ return {
2424
+ ext: 'mj2',
2425
+ mime: 'image/mj2',
2426
+ };
2427
+ default:
2428
+ return;
2429
+ }
2430
+ }
2431
+
2432
+ if (
2433
+ this.check([0xFF, 0x0A])
2434
+ || this.check([0x00, 0x00, 0x00, 0x0C, 0x4A, 0x58, 0x4C, 0x20, 0x0D, 0x0A, 0x87, 0x0A])
2435
+ ) {
2436
+ return {
2437
+ ext: 'jxl',
2438
+ mime: 'image/jxl',
2439
+ };
2440
+ }
2441
+
2442
+ if (this.check([0xFE, 0xFF])) { // UTF-16-BOM-BE
2443
+ if (this.checkString('<?xml ', {offset: 2, encoding: 'utf-16be'})) {
2444
+ return {
2445
+ ext: 'xml',
2446
+ mime: 'application/xml',
2447
+ };
2448
+ }
2449
+
2450
+ return undefined; // Some unknown text based format
2451
+ }
2452
+
2453
+ if (this.check([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1])) {
2454
+ // Detected Microsoft Compound File Binary File (MS-CFB) Format.
2455
+ return {
2456
+ ext: 'cfb',
2457
+ mime: 'application/x-cfb',
2458
+ };
2459
+ }
2460
+
2461
+ // Increase sample size from 32 to 256.
2462
+ await tokenizer.peekBuffer(this.buffer, {length: Math.min(256, tokenizer.fileInfo.size), mayBeLess: true});
2463
+
2464
+ if (this.check([0x61, 0x63, 0x73, 0x70], {offset: 36})) {
2465
+ return {
2466
+ ext: 'icc',
2467
+ mime: 'application/vnd.iccprofile',
2468
+ };
2469
+ }
2470
+
2471
+ // ACE: requires 14 bytes in the buffer
2472
+ if (this.checkString('**ACE', {offset: 7}) && this.checkString('**', {offset: 12})) {
2473
+ return {
2474
+ ext: 'ace',
2475
+ mime: 'application/x-ace-compressed',
2476
+ };
2477
+ }
2478
+
2479
+ // -- 15-byte signatures --
2480
+
2481
+ if (this.checkString('BEGIN:')) {
2482
+ if (this.checkString('VCARD', {offset: 6})) {
2483
+ return {
2484
+ ext: 'vcf',
2485
+ mime: 'text/vcard',
2486
+ };
2487
+ }
2488
+
2489
+ if (this.checkString('VCALENDAR', {offset: 6})) {
2490
+ return {
2491
+ ext: 'ics',
2492
+ mime: 'text/calendar',
2493
+ };
2494
+ }
2495
+ }
2496
+
2497
+ // `raf` is here just to keep all the raw image detectors together.
2498
+ if (this.checkString('FUJIFILMCCD-RAW')) {
2499
+ return {
2500
+ ext: 'raf',
2501
+ mime: 'image/x-fujifilm-raf',
2502
+ };
2503
+ }
2504
+
2505
+ if (this.checkString('Extended Module:')) {
2506
+ return {
2507
+ ext: 'xm',
2508
+ mime: 'audio/x-xm',
2509
+ };
2510
+ }
2511
+
2512
+ if (this.checkString('Creative Voice File')) {
2513
+ return {
2514
+ ext: 'voc',
2515
+ mime: 'audio/x-voc',
2516
+ };
2517
+ }
2518
+
2519
+ if (this.check([0x04, 0x00, 0x00, 0x00]) && this.buffer.length >= 16) { // Rough & quick check Pickle/ASAR
2520
+ const jsonSize = new DataView(this.buffer.buffer).getUint32(12, true);
2521
+
2522
+ if (jsonSize > 12 && this.buffer.length >= jsonSize + 16) {
2523
+ try {
2524
+ const header = new TextDecoder().decode(this.buffer.subarray(16, jsonSize + 16));
2525
+ const json = JSON.parse(header);
2526
+ // Check if Pickle is ASAR
2527
+ if (json.files) { // Final check, assuring Pickle/ASAR format
2528
+ return {
2529
+ ext: 'asar',
2530
+ mime: 'application/x-asar',
2531
+ };
2532
+ }
2533
+ } catch {}
2534
+ }
2535
+ }
2536
+
2537
+ if (this.check([0x06, 0x0E, 0x2B, 0x34, 0x02, 0x05, 0x01, 0x01, 0x0D, 0x01, 0x02, 0x01, 0x01, 0x02])) {
2538
+ return {
2539
+ ext: 'mxf',
2540
+ mime: 'application/mxf',
2541
+ };
2542
+ }
2543
+
2544
+ if (this.checkString('SCRM', {offset: 44})) {
2545
+ return {
2546
+ ext: 's3m',
2547
+ mime: 'audio/x-s3m',
2548
+ };
2549
+ }
2550
+
2551
+ // Raw MPEG-2 transport stream (188-byte packets)
2552
+ if (this.check([0x47]) && this.check([0x47], {offset: 188})) {
2553
+ return {
2554
+ ext: 'mts',
2555
+ mime: 'video/mp2t',
2556
+ };
2557
+ }
2558
+
2559
+ // Blu-ray Disc Audio-Video (BDAV) MPEG-2 transport stream has 4-byte TP_extra_header before each 188-byte packet
2560
+ if (this.check([0x47], {offset: 4}) && this.check([0x47], {offset: 196})) {
2561
+ return {
2562
+ ext: 'mts',
2563
+ mime: 'video/mp2t',
2564
+ };
2565
+ }
2566
+
2567
+ if (this.check([0x42, 0x4F, 0x4F, 0x4B, 0x4D, 0x4F, 0x42, 0x49], {offset: 60})) {
2568
+ return {
2569
+ ext: 'mobi',
2570
+ mime: 'application/x-mobipocket-ebook',
2571
+ };
2572
+ }
2573
+
2574
+ if (this.check([0x44, 0x49, 0x43, 0x4D], {offset: 128})) {
2575
+ return {
2576
+ ext: 'dcm',
2577
+ mime: 'application/dicom',
2578
+ };
2579
+ }
937
2580
 
938
- // -- 7-byte signatures --
2581
+ if (this.check([0x4C, 0x00, 0x00, 0x00, 0x01, 0x14, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46])) {
2582
+ return {
2583
+ ext: 'lnk',
2584
+ mime: 'application/x.ms.shortcut', // Invented by us
2585
+ };
2586
+ }
939
2587
 
940
- if (checkString('BLENDER')) {
941
- return {
942
- ext: 'blend',
943
- mime: 'application/x-blender'
944
- };
945
- }
2588
+ if (this.check([0x62, 0x6F, 0x6F, 0x6B, 0x00, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x72, 0x6B, 0x00, 0x00, 0x00, 0x00])) {
2589
+ return {
2590
+ ext: 'alias',
2591
+ mime: 'application/x.apple.alias', // Invented by us
2592
+ };
2593
+ }
946
2594
 
947
- if (checkString('!<arch>')) {
948
- await tokenizer.ignore(8);
949
- const str = await tokenizer.readToken(new Token.StringType(13, 'ascii'));
950
- if (str === 'debian-binary') {
2595
+ if (this.checkString('Kaydara FBX Binary \u0000')) {
951
2596
  return {
952
- ext: 'deb',
953
- mime: 'application/x-deb'
2597
+ ext: 'fbx',
2598
+ mime: 'application/x.autodesk.fbx', // Invented by us
954
2599
  };
955
2600
  }
956
2601
 
957
- return {
958
- ext: 'ar',
959
- mime: 'application/x-unix-archive'
960
- };
961
- }
2602
+ if (
2603
+ this.check([0x4C, 0x50], {offset: 34})
2604
+ && (
2605
+ this.check([0x00, 0x00, 0x01], {offset: 8})
2606
+ || this.check([0x01, 0x00, 0x02], {offset: 8})
2607
+ || this.check([0x02, 0x00, 0x02], {offset: 8})
2608
+ )
2609
+ ) {
2610
+ return {
2611
+ ext: 'eot',
2612
+ mime: 'application/vnd.ms-fontobject',
2613
+ };
2614
+ }
962
2615
 
963
- // -- 8-byte signatures --
2616
+ if (this.check([0x06, 0x06, 0xED, 0xF5, 0xD8, 0x1D, 0x46, 0xE5, 0xBD, 0x31, 0xEF, 0xE7, 0xFE, 0x74, 0xB7, 0x1D])) {
2617
+ return {
2618
+ ext: 'indd',
2619
+ mime: 'application/x-indesign',
2620
+ };
2621
+ }
964
2622
 
965
- if (check([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])) {
966
- // APNG format (https://wiki.mozilla.org/APNG_Specification)
967
- // 1. Find the first IDAT (image data) chunk (49 44 41 54)
968
- // 2. Check if there is an "acTL" chunk before the IDAT one (61 63 54 4C)
2623
+ // -- 16-byte signatures --
969
2624
 
970
- // Offset calculated as follows:
971
- // - 8 bytes: PNG signature
972
- // - 4 (length) + 4 (chunk type) + 13 (chunk data) + 4 (CRC): IHDR chunk
2625
+ // JMP files - check for both Little Endian and Big Endian signatures
2626
+ if (this.check([0xFF, 0xFF, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00])
2627
+ || this.check([0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x01, 0x00, 0x01])) {
2628
+ return {
2629
+ ext: 'jmp',
2630
+ mime: 'application/x-jmp-data',
2631
+ };
2632
+ }
973
2633
 
974
- await tokenizer.ignore(8); // ignore PNG signature
2634
+ // Increase sample size from 256 to 512
2635
+ await tokenizer.peekBuffer(this.buffer, {length: Math.min(512, tokenizer.fileInfo.size), mayBeLess: true});
975
2636
 
976
- async function readChunkHeader() {
2637
+ // Requires a buffer size of 512 bytes
2638
+ if ((this.checkString('ustar', {offset: 257}) && (this.checkString('\0', {offset: 262}) || this.checkString(' ', {offset: 262})))
2639
+ || (this.check([0, 0, 0, 0, 0, 0], {offset: 257}) && tarHeaderChecksumMatches(this.buffer))) {
977
2640
  return {
978
- length: await tokenizer.readToken(Token.INT32_BE),
979
- type: await tokenizer.readToken(new Token.StringType(4, 'binary'))
2641
+ ext: 'tar',
2642
+ mime: 'application/x-tar',
980
2643
  };
981
2644
  }
982
2645
 
983
- do {
984
- const chunk = await readChunkHeader();
985
- if (chunk.length < 0) {
986
- return; // Invalid chunk length
2646
+ if (this.check([0xFF, 0xFE])) { // UTF-16-BOM-LE
2647
+ const encoding = 'utf-16le';
2648
+ if (this.checkString('<?xml ', {offset: 2, encoding})) {
2649
+ return {
2650
+ ext: 'xml',
2651
+ mime: 'application/xml',
2652
+ };
987
2653
  }
988
2654
 
989
- switch (chunk.type) {
990
- case 'IDAT':
991
- return {
992
- ext: 'png',
993
- mime: 'image/png'
994
- };
995
- case 'acTL':
996
- return {
997
- ext: 'apng',
998
- mime: 'image/apng'
999
- };
1000
- default:
1001
- await tokenizer.ignore(chunk.length + 4); // Ignore chunk-data + CRC
2655
+ if (this.check([0xFF, 0x0E], {offset: 2}) && this.checkString('SketchUp Model', {offset: 4, encoding})) {
2656
+ return {
2657
+ ext: 'skp',
2658
+ mime: 'application/vnd.sketchup.skp',
2659
+ };
1002
2660
  }
1003
- } while (tokenizer.position + 8 < tokenizer.fileInfo.size);
1004
-
1005
- return {
1006
- ext: 'png',
1007
- mime: 'image/png'
1008
- };
1009
- }
1010
-
1011
- if (check([0x41, 0x52, 0x52, 0x4F, 0x57, 0x31, 0x00, 0x00])) {
1012
- return {
1013
- ext: 'arrow',
1014
- mime: 'application/x-apache-arrow'
1015
- };
1016
- }
1017
-
1018
- if (check([0x67, 0x6C, 0x54, 0x46, 0x02, 0x00, 0x00, 0x00])) {
1019
- return {
1020
- ext: 'glb',
1021
- mime: 'model/gltf-binary'
1022
- };
1023
- }
1024
2661
 
1025
- // `mov` format variants
1026
- if (
1027
- check([0x66, 0x72, 0x65, 0x65], {offset: 4}) || // `free`
1028
- check([0x6D, 0x64, 0x61, 0x74], {offset: 4}) || // `mdat` MJPEG
1029
- check([0x6D, 0x6F, 0x6F, 0x76], {offset: 4}) || // `moov`
1030
- check([0x77, 0x69, 0x64, 0x65], {offset: 4}) // `wide`
1031
- ) {
1032
- return {
1033
- ext: 'mov',
1034
- mime: 'video/quicktime'
1035
- };
1036
- }
2662
+ if (this.checkString('Windows Registry Editor Version 5.00\r\n', {offset: 2, encoding})) {
2663
+ return {
2664
+ ext: 'reg',
2665
+ mime: 'application/x-ms-regedit',
2666
+ };
2667
+ }
1037
2668
 
1038
- // -- 9-byte signatures --
2669
+ return undefined; // Some text based format
2670
+ }
1039
2671
 
1040
- if (check([0x49, 0x49, 0x52, 0x4F, 0x08, 0x00, 0x00, 0x00, 0x18])) {
1041
- return {
1042
- ext: 'orf',
1043
- mime: 'image/x-olympus-orf'
1044
- };
1045
- }
2672
+ if (this.checkString('-----BEGIN PGP MESSAGE-----')) {
2673
+ return {
2674
+ ext: 'pgp',
2675
+ mime: 'application/pgp-encrypted',
2676
+ };
2677
+ }
2678
+ };
2679
+ // Detections with limited supporting data, resulting in a higher likelihood of false positives
2680
+ detectImprecise = async tokenizer => {
2681
+ this.buffer = new Uint8Array(reasonableDetectionSizeInBytes);
2682
+ const fileSize = getKnownFileSizeOrMaximum(tokenizer.fileInfo.size);
1046
2683
 
1047
- if (checkString('gimp xcf ')) {
1048
- return {
1049
- ext: 'xcf',
1050
- mime: 'image/x-xcf'
1051
- };
1052
- }
2684
+ // Read initial sample size of 8 bytes
2685
+ await tokenizer.peekBuffer(this.buffer, {length: Math.min(8, fileSize), mayBeLess: true});
1053
2686
 
1054
- // -- 12-byte signatures --
2687
+ if (
2688
+ this.check([0x0, 0x0, 0x1, 0xBA])
2689
+ || this.check([0x0, 0x0, 0x1, 0xB3])
2690
+ ) {
2691
+ return {
2692
+ ext: 'mpg',
2693
+ mime: 'video/mpeg',
2694
+ };
2695
+ }
1055
2696
 
1056
- if (check([0x49, 0x49, 0x55, 0x00, 0x18, 0x00, 0x00, 0x00, 0x88, 0xE7, 0x74, 0xD8])) {
1057
- return {
1058
- ext: 'rw2',
1059
- mime: 'image/x-panasonic-rw2'
1060
- };
1061
- }
2697
+ if (this.check([0x00, 0x01, 0x00, 0x00, 0x00])) {
2698
+ return {
2699
+ ext: 'ttf',
2700
+ mime: 'font/ttf',
2701
+ };
2702
+ }
1062
2703
 
1063
- // ASF_Header_Object first 80 bytes
1064
- if (check([0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11, 0xA6, 0xD9])) {
1065
- async function readHeader() {
1066
- const guid = Buffer.alloc(16);
1067
- await tokenizer.readBuffer(guid);
2704
+ if (this.check([0x00, 0x00, 0x01, 0x00])) {
1068
2705
  return {
1069
- id: guid,
1070
- size: Number(await tokenizer.readToken(Token.UINT64_LE))
2706
+ ext: 'ico',
2707
+ mime: 'image/x-icon',
1071
2708
  };
1072
2709
  }
1073
2710
 
1074
- await tokenizer.ignore(30);
1075
- // Search for header should be in first 1KB of file.
1076
- while (tokenizer.position + 24 < tokenizer.fileInfo.size) {
1077
- const header = await readHeader();
1078
- let payload = header.size - 24;
1079
- if (_check(header.id, [0x91, 0x07, 0xDC, 0xB7, 0xB7, 0xA9, 0xCF, 0x11, 0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65])) {
1080
- // Sync on Stream-Properties-Object (B7DC0791-A9B7-11CF-8EE6-00C00C205365)
1081
- const typeId = Buffer.alloc(16);
1082
- payload -= await tokenizer.readBuffer(typeId);
2711
+ if (this.check([0x00, 0x00, 0x02, 0x00])) {
2712
+ return {
2713
+ ext: 'cur',
2714
+ mime: 'image/x-icon',
2715
+ };
2716
+ }
1083
2717
 
1084
- if (_check(typeId, [0x40, 0x9E, 0x69, 0xF8, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
1085
- // Found audio:
1086
- return {
1087
- ext: 'asf',
1088
- mime: 'audio/x-ms-asf'
1089
- };
1090
- }
2718
+ // Adjust buffer to `mpegOffsetTolerance`
2719
+ await tokenizer.peekBuffer(this.buffer, {length: Math.min(2 + this.options.mpegOffsetTolerance, fileSize), mayBeLess: true});
1091
2720
 
1092
- if (_check(typeId, [0xC0, 0xEF, 0x19, 0xBC, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
1093
- // Found video:
1094
- return {
1095
- ext: 'asf',
1096
- mime: 'video/x-ms-asf'
1097
- };
2721
+ // Check MPEG 1 or 2 Layer 3 header, or 'layer 0' for ADTS (MPEG sync-word 0xFFE)
2722
+ if (this.buffer.length >= (2 + this.options.mpegOffsetTolerance)) {
2723
+ for (let depth = 0; depth <= this.options.mpegOffsetTolerance; ++depth) {
2724
+ const type = this.scanMpeg(depth);
2725
+ if (type) {
2726
+ return type;
1098
2727
  }
1099
-
1100
- break;
1101
2728
  }
1102
-
1103
- await tokenizer.ignore(payload);
1104
2729
  }
2730
+ };
1105
2731
 
1106
- // Default to ASF generic extension
1107
- return {
1108
- ext: 'asf',
1109
- mime: 'application/vnd.ms-asf'
1110
- };
1111
- }
1112
-
1113
- if (check([0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A])) {
1114
- return {
1115
- ext: 'ktx',
1116
- mime: 'image/ktx'
1117
- };
1118
- }
1119
-
1120
- if ((check([0x7E, 0x10, 0x04]) || check([0x7E, 0x18, 0x04])) && check([0x30, 0x4D, 0x49, 0x45], {offset: 4})) {
1121
- return {
1122
- ext: 'mie',
1123
- mime: 'application/x-mie'
1124
- };
1125
- }
1126
-
1127
- if (check([0x27, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], {offset: 2})) {
1128
- return {
1129
- ext: 'shp',
1130
- mime: 'application/x-esri-shape'
1131
- };
1132
- }
1133
-
1134
- if (check([0x00, 0x00, 0x00, 0x0C, 0x6A, 0x50, 0x20, 0x20, 0x0D, 0x0A, 0x87, 0x0A])) {
1135
- // JPEG-2000 family
1136
-
1137
- await tokenizer.ignore(20);
1138
- const type = await tokenizer.readToken(new Token.StringType(4, 'ascii'));
1139
- switch (type) {
1140
- case 'jp2 ':
1141
- return {
1142
- ext: 'jp2',
1143
- mime: 'image/jp2'
1144
- };
1145
- case 'jpx ':
2732
+ async readTiffTag(bigEndian) {
2733
+ const tagId = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
2734
+ await this.tokenizer.ignore(10);
2735
+ switch (tagId) {
2736
+ case 50_341:
1146
2737
  return {
1147
- ext: 'jpx',
1148
- mime: 'image/jpx'
2738
+ ext: 'arw',
2739
+ mime: 'image/x-sony-arw',
1149
2740
  };
1150
- case 'jpm ':
2741
+ case 50_706:
1151
2742
  return {
1152
- ext: 'jpm',
1153
- mime: 'image/jpm'
1154
- };
1155
- case 'mjp2':
1156
- return {
1157
- ext: 'mj2',
1158
- mime: 'image/mj2'
2743
+ ext: 'dng',
2744
+ mime: 'image/x-adobe-dng',
1159
2745
  };
1160
2746
  default:
1161
- return;
1162
2747
  }
1163
2748
  }
1164
2749
 
1165
- if (
1166
- check([0xFF, 0x0A]) ||
1167
- check([0x00, 0x00, 0x00, 0x0C, 0x4A, 0x58, 0x4C, 0x20, 0x0D, 0x0A, 0x87, 0x0A])
1168
- ) {
1169
- return {
1170
- ext: 'jxl',
1171
- mime: 'image/jxl'
1172
- };
1173
- }
1174
-
1175
- // -- Unsafe signatures --
1176
-
1177
- if (
1178
- check([0x0, 0x0, 0x1, 0xBA]) ||
1179
- check([0x0, 0x0, 0x1, 0xB3])
1180
- ) {
1181
- return {
1182
- ext: 'mpg',
1183
- mime: 'video/mpeg'
1184
- };
1185
- }
1186
-
1187
- if (check([0x00, 0x01, 0x00, 0x00, 0x00])) {
1188
- return {
1189
- ext: 'ttf',
1190
- mime: 'font/ttf'
1191
- };
1192
- }
1193
-
1194
- if (check([0x00, 0x00, 0x01, 0x00])) {
1195
- return {
1196
- ext: 'ico',
1197
- mime: 'image/x-icon'
1198
- };
1199
- }
1200
-
1201
- if (check([0x00, 0x00, 0x02, 0x00])) {
1202
- return {
1203
- ext: 'cur',
1204
- mime: 'image/x-icon'
1205
- };
1206
- }
1207
-
1208
- if (check([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1])) {
1209
- // Detected Microsoft Compound File Binary File (MS-CFB) Format.
1210
- return {
1211
- ext: 'cfb',
1212
- mime: 'application/x-cfb'
1213
- };
1214
- }
1215
-
1216
- // Increase sample size from 12 to 256.
1217
- await tokenizer.peekBuffer(buffer, {length: Math.min(256, tokenizer.fileInfo.size), mayBeLess: true});
1218
-
1219
- // -- 15-byte signatures --
1220
-
1221
- if (checkString('BEGIN:')) {
1222
- if (checkString('VCARD', {offset: 6})) {
1223
- return {
1224
- ext: 'vcf',
1225
- mime: 'text/vcard'
1226
- };
2750
+ async readTiffIFD(bigEndian) {
2751
+ const numberOfTags = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
2752
+ if (numberOfTags > maximumTiffTagCount) {
2753
+ return;
1227
2754
  }
1228
2755
 
1229
- if (checkString('VCALENDAR', {offset: 6})) {
1230
- return {
1231
- ext: 'ics',
1232
- mime: 'text/calendar'
1233
- };
2756
+ if (
2757
+ hasUnknownFileSize(this.tokenizer)
2758
+ && (2 + (numberOfTags * 12)) > maximumTiffIfdOffsetInBytes
2759
+ ) {
2760
+ return;
1234
2761
  }
1235
- }
1236
2762
 
1237
- // `raf` is here just to keep all the raw image detectors together.
1238
- if (checkString('FUJIFILMCCD-RAW')) {
1239
- return {
1240
- ext: 'raf',
1241
- mime: 'image/x-fujifilm-raf'
1242
- };
2763
+ for (let n = 0; n < numberOfTags; ++n) {
2764
+ const fileType = await this.readTiffTag(bigEndian);
2765
+ if (fileType) {
2766
+ return fileType;
2767
+ }
2768
+ }
1243
2769
  }
1244
2770
 
1245
- if (checkString('Extended Module:')) {
1246
- return {
1247
- ext: 'xm',
1248
- mime: 'audio/x-xm'
2771
+ async readTiffHeader(bigEndian) {
2772
+ const tiffFileType = {
2773
+ ext: 'tif',
2774
+ mime: 'image/tiff',
1249
2775
  };
1250
- }
1251
2776
 
1252
- if (checkString('Creative Voice File')) {
1253
- return {
1254
- ext: 'voc',
1255
- mime: 'audio/x-voc'
1256
- };
1257
- }
2777
+ const version = (bigEndian ? Token.UINT16_BE : Token.UINT16_LE).get(this.buffer, 2);
2778
+ const ifdOffset = (bigEndian ? Token.UINT32_BE : Token.UINT32_LE).get(this.buffer, 4);
1258
2779
 
1259
- if (check([0x04, 0x00, 0x00, 0x00]) && buffer.length >= 16) { // Rough & quick check Pickle/ASAR
1260
- const jsonSize = buffer.readUInt32LE(12);
1261
- if (jsonSize > 12 && buffer.length >= jsonSize + 16) {
1262
- try {
1263
- const header = buffer.slice(16, jsonSize + 16).toString();
1264
- const json = JSON.parse(header);
1265
- // Check if Pickle is ASAR
1266
- if (json.files) { // Final check, assuring Pickle/ASAR format
2780
+ if (version === 42) {
2781
+ // TIFF file header
2782
+ if (ifdOffset >= 6) {
2783
+ if (this.checkString('CR', {offset: 8})) {
1267
2784
  return {
1268
- ext: 'asar',
1269
- mime: 'application/x-asar'
2785
+ ext: 'cr2',
2786
+ mime: 'image/x-canon-cr2',
1270
2787
  };
1271
2788
  }
1272
- } catch (_) {
1273
- }
1274
- }
1275
- }
1276
-
1277
- if (check([0x06, 0x0E, 0x2B, 0x34, 0x02, 0x05, 0x01, 0x01, 0x0D, 0x01, 0x02, 0x01, 0x01, 0x02])) {
1278
- return {
1279
- ext: 'mxf',
1280
- mime: 'application/mxf'
1281
- };
1282
- }
1283
-
1284
- if (checkString('SCRM', {offset: 44})) {
1285
- return {
1286
- ext: 's3m',
1287
- mime: 'audio/x-s3m'
1288
- };
1289
- }
1290
2789
 
1291
- if (check([0x47], {offset: 4}) && (check([0x47], {offset: 192}) || check([0x47], {offset: 196}))) {
1292
- return {
1293
- ext: 'mts',
1294
- mime: 'video/mp2t'
1295
- };
1296
- }
2790
+ if (ifdOffset >= 8) {
2791
+ const someId1 = (bigEndian ? Token.UINT16_BE : Token.UINT16_LE).get(this.buffer, 8);
2792
+ const someId2 = (bigEndian ? Token.UINT16_BE : Token.UINT16_LE).get(this.buffer, 10);
2793
+
2794
+ if (
2795
+ (someId1 === 0x1C && someId2 === 0xFE)
2796
+ || (someId1 === 0x1F && someId2 === 0x0B)) {
2797
+ return {
2798
+ ext: 'nef',
2799
+ mime: 'image/x-nikon-nef',
2800
+ };
2801
+ }
2802
+ }
2803
+ }
1297
2804
 
1298
- if (check([0x42, 0x4F, 0x4F, 0x4B, 0x4D, 0x4F, 0x42, 0x49], {offset: 60})) {
1299
- return {
1300
- ext: 'mobi',
1301
- mime: 'application/x-mobipocket-ebook'
1302
- };
1303
- }
2805
+ if (
2806
+ hasUnknownFileSize(this.tokenizer)
2807
+ && ifdOffset > maximumTiffStreamIfdOffsetInBytes
2808
+ ) {
2809
+ return tiffFileType;
2810
+ }
1304
2811
 
1305
- if (check([0x44, 0x49, 0x43, 0x4D], {offset: 128})) {
1306
- return {
1307
- ext: 'dcm',
1308
- mime: 'application/dicom'
1309
- };
1310
- }
2812
+ const maximumTiffOffset = hasUnknownFileSize(this.tokenizer) ? maximumTiffIfdOffsetInBytes : this.tokenizer.fileInfo.size;
1311
2813
 
1312
- if (check([0x4C, 0x00, 0x00, 0x00, 0x01, 0x14, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46])) {
1313
- return {
1314
- ext: 'lnk',
1315
- mime: 'application/x.ms.shortcut' // Invented by us
1316
- };
1317
- }
2814
+ try {
2815
+ await safeIgnore(this.tokenizer, ifdOffset, {
2816
+ maximumLength: maximumTiffOffset,
2817
+ reason: 'TIFF IFD offset',
2818
+ });
2819
+ } catch (error) {
2820
+ if (error instanceof strtok3.EndOfStreamError) {
2821
+ return;
2822
+ }
1318
2823
 
1319
- if (check([0x62, 0x6F, 0x6F, 0x6B, 0x00, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x72, 0x6B, 0x00, 0x00, 0x00, 0x00])) {
1320
- return {
1321
- ext: 'alias',
1322
- mime: 'application/x.apple.alias' // Invented by us
1323
- };
1324
- }
2824
+ throw error;
2825
+ }
1325
2826
 
1326
- if (
1327
- check([0x4C, 0x50], {offset: 34}) &&
1328
- (
1329
- check([0x00, 0x00, 0x01], {offset: 8}) ||
1330
- check([0x01, 0x00, 0x02], {offset: 8}) ||
1331
- check([0x02, 0x00, 0x02], {offset: 8})
1332
- )
1333
- ) {
1334
- return {
1335
- ext: 'eot',
1336
- mime: 'application/vnd.ms-fontobject'
1337
- };
1338
- }
2827
+ let fileType;
2828
+ try {
2829
+ fileType = await this.readTiffIFD(bigEndian);
2830
+ } catch (error) {
2831
+ if (error instanceof strtok3.EndOfStreamError) {
2832
+ return;
2833
+ }
1339
2834
 
1340
- if (check([0x06, 0x06, 0xED, 0xF5, 0xD8, 0x1D, 0x46, 0xE5, 0xBD, 0x31, 0xEF, 0xE7, 0xFE, 0x74, 0xB7, 0x1D])) {
1341
- return {
1342
- ext: 'indd',
1343
- mime: 'application/x-indesign'
1344
- };
1345
- }
2835
+ throw error;
2836
+ }
1346
2837
 
1347
- // Increase sample size from 256 to 512
1348
- await tokenizer.peekBuffer(buffer, {length: Math.min(512, tokenizer.fileInfo.size), mayBeLess: true});
2838
+ return fileType ?? tiffFileType;
2839
+ }
1349
2840
 
1350
- // Requires a buffer size of 512 bytes
1351
- if (tarHeaderChecksumMatches(buffer)) {
1352
- return {
1353
- ext: 'tar',
1354
- mime: 'application/x-tar'
1355
- };
2841
+ if (version === 43) { // Big TIFF file header
2842
+ return tiffFileType;
2843
+ }
1356
2844
  }
1357
2845
 
1358
- if (check([0xFF, 0xFE, 0xFF, 0x0E, 0x53, 0x00, 0x6B, 0x00, 0x65, 0x00, 0x74, 0x00, 0x63, 0x00, 0x68, 0x00, 0x55, 0x00, 0x70, 0x00, 0x20, 0x00, 0x4D, 0x00, 0x6F, 0x00, 0x64, 0x00, 0x65, 0x00, 0x6C, 0x00])) {
1359
- return {
1360
- ext: 'skp',
1361
- mime: 'application/vnd.sketchup.skp'
1362
- };
1363
- }
2846
+ /**
2847
+ Scan check MPEG 1 or 2 Layer 3 header, or 'layer 0' for ADTS (MPEG sync-word 0xFFE).
1364
2848
 
1365
- if (checkString('-----BEGIN PGP MESSAGE-----')) {
1366
- return {
1367
- ext: 'pgp',
1368
- mime: 'application/pgp-encrypted'
1369
- };
1370
- }
2849
+ @param offset - Offset to scan for sync-preamble.
2850
+ @returns {{ext: string, mime: string}}
2851
+ */
2852
+ scanMpeg(offset) {
2853
+ if (this.check([0xFF, 0xE0], {offset, mask: [0xFF, 0xE0]})) {
2854
+ if (this.check([0x10], {offset: offset + 1, mask: [0x16]})) {
2855
+ // Check for (ADTS) MPEG-2
2856
+ if (this.check([0x08], {offset: offset + 1, mask: [0x08]})) {
2857
+ return {
2858
+ ext: 'aac',
2859
+ mime: 'audio/aac',
2860
+ };
2861
+ }
1371
2862
 
1372
- // Check MPEG 1 or 2 Layer 3 header, or 'layer 0' for ADTS (MPEG sync-word 0xFFE)
1373
- if (buffer.length >= 2 && check([0xFF, 0xE0], {offset: 0, mask: [0xFF, 0xE0]})) {
1374
- if (check([0x10], {offset: 1, mask: [0x16]})) {
1375
- // Check for (ADTS) MPEG-2
1376
- if (check([0x08], {offset: 1, mask: [0x08]})) {
2863
+ // Must be (ADTS) MPEG-4
1377
2864
  return {
1378
2865
  ext: 'aac',
1379
- mime: 'audio/aac'
2866
+ mime: 'audio/aac',
1380
2867
  };
1381
2868
  }
1382
2869
 
1383
- // Must be (ADTS) MPEG-4
1384
- return {
1385
- ext: 'aac',
1386
- mime: 'audio/aac'
1387
- };
1388
- }
1389
-
1390
- // MPEG 1 or 2 Layer 3 header
1391
- // Check for MPEG layer 3
1392
- if (check([0x02], {offset: 1, mask: [0x06]})) {
1393
- return {
1394
- ext: 'mp3',
1395
- mime: 'audio/mpeg'
1396
- };
1397
- }
2870
+ // MPEG 1 or 2 Layer 3 header
2871
+ // Check for MPEG layer 3
2872
+ if (this.check([0x02], {offset: offset + 1, mask: [0x06]})) {
2873
+ return {
2874
+ ext: 'mp3',
2875
+ mime: 'audio/mpeg',
2876
+ };
2877
+ }
1398
2878
 
1399
- // Check for MPEG layer 2
1400
- if (check([0x04], {offset: 1, mask: [0x06]})) {
1401
- return {
1402
- ext: 'mp2',
1403
- mime: 'audio/mpeg'
1404
- };
1405
- }
2879
+ // Check for MPEG layer 2
2880
+ if (this.check([0x04], {offset: offset + 1, mask: [0x06]})) {
2881
+ return {
2882
+ ext: 'mp2',
2883
+ mime: 'audio/mpeg',
2884
+ };
2885
+ }
1406
2886
 
1407
- // Check for MPEG layer 1
1408
- if (check([0x06], {offset: 1, mask: [0x06]})) {
1409
- return {
1410
- ext: 'mp1',
1411
- mime: 'audio/mpeg'
1412
- };
2887
+ // Check for MPEG layer 1
2888
+ if (this.check([0x06], {offset: offset + 1, mask: [0x06]})) {
2889
+ return {
2890
+ ext: 'mp1',
2891
+ mime: 'audio/mpeg',
2892
+ };
2893
+ }
1413
2894
  }
1414
2895
  }
1415
2896
  }
1416
2897
 
1417
- const stream = readableStream => new Promise((resolve, reject) => {
1418
- // Using `eval` to work around issues when bundling with Webpack
1419
- const stream = eval('require')('stream'); // eslint-disable-line no-eval
1420
-
1421
- readableStream.on('error', reject);
1422
- readableStream.once('readable', async () => {
1423
- // Set up output stream
1424
- const pass = new stream.PassThrough();
1425
- let outputStream;
1426
- if (stream.pipeline) {
1427
- outputStream = stream.pipeline(readableStream, pass, () => {
1428
- });
1429
- } else {
1430
- outputStream = readableStream.pipe(pass);
1431
- }
1432
-
1433
- // Read the input stream and detect the filetype
1434
- const chunk = readableStream.read(minimumBytes) || readableStream.read() || Buffer.alloc(0);
1435
- try {
1436
- const fileType = await fromBuffer(chunk);
1437
- pass.fileType = fileType;
1438
- } catch (error) {
1439
- reject(error);
1440
- }
1441
-
1442
- resolve(outputStream);
1443
- });
1444
- });
1445
-
1446
- const fileType = {
1447
- fromStream,
1448
- fromTokenizer,
1449
- fromBuffer,
1450
- stream
1451
- };
1452
-
1453
- Object.defineProperty(fileType, 'extensions', {
1454
- get() {
1455
- return new Set(supported.extensions);
1456
- }
1457
- });
1458
-
1459
- Object.defineProperty(fileType, 'mimeTypes', {
1460
- get() {
1461
- return new Set(supported.mimeTypes);
1462
- }
1463
- });
1464
-
1465
- module.exports = fileType;
2898
+ export const supportedExtensions = new Set(extensions);
2899
+ export const supportedMimeTypes = new Set(mimeTypes);