file-type 19.6.0 → 20.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core.d.ts +36 -335
- package/core.js +322 -246
- package/index.d.ts +9 -3
- package/index.js +12 -6
- package/package.json +27 -9
- package/readme.md +78 -33
- package/supported.js +34 -0
package/core.js
CHANGED
|
@@ -4,7 +4,8 @@ Primary entry point, Node.js specific entry point is index.js
|
|
|
4
4
|
|
|
5
5
|
import * as Token from 'token-types';
|
|
6
6
|
import * as strtok3 from 'strtok3/core';
|
|
7
|
-
import {
|
|
7
|
+
import {ZipHandler} from '@tokenizer/inflate';
|
|
8
|
+
import {includes, getUintBE} from 'uint8array-extras';
|
|
8
9
|
import {
|
|
9
10
|
stringToBytes,
|
|
10
11
|
tarHeaderChecksumMatches,
|
|
@@ -26,6 +27,127 @@ export async function fileTypeFromBlob(blob) {
|
|
|
26
27
|
return new FileTypeParser().fromBlob(blob);
|
|
27
28
|
}
|
|
28
29
|
|
|
30
|
+
function getFileTypeFromMimeType(mimeType) {
|
|
31
|
+
switch (mimeType) {
|
|
32
|
+
case 'application/epub+zip':
|
|
33
|
+
return {
|
|
34
|
+
ext: 'epub',
|
|
35
|
+
mime: 'application/epub+zip',
|
|
36
|
+
};
|
|
37
|
+
case 'application/vnd.oasis.opendocument.text':
|
|
38
|
+
return {
|
|
39
|
+
ext: 'odt',
|
|
40
|
+
mime: 'application/vnd.oasis.opendocument.text',
|
|
41
|
+
};
|
|
42
|
+
case 'application/vnd.oasis.opendocument.text-template':
|
|
43
|
+
return {
|
|
44
|
+
ext: 'ott',
|
|
45
|
+
mime: 'application/vnd.oasis.opendocument.text-template',
|
|
46
|
+
};
|
|
47
|
+
case 'application/vnd.oasis.opendocument.spreadsheet':
|
|
48
|
+
return {
|
|
49
|
+
ext: 'ods',
|
|
50
|
+
mime: 'application/vnd.oasis.opendocument.spreadsheet',
|
|
51
|
+
};
|
|
52
|
+
case 'application/vnd.oasis.opendocument.spreadsheet-template':
|
|
53
|
+
return {
|
|
54
|
+
ext: 'ots',
|
|
55
|
+
mime: 'application/vnd.oasis.opendocument.spreadsheet-template',
|
|
56
|
+
};
|
|
57
|
+
case 'application/vnd.oasis.opendocument.presentation':
|
|
58
|
+
return {
|
|
59
|
+
ext: 'odp',
|
|
60
|
+
mime: 'application/vnd.oasis.opendocument.presentation',
|
|
61
|
+
};
|
|
62
|
+
case 'application/vnd.oasis.opendocument.presentation-template':
|
|
63
|
+
return {
|
|
64
|
+
ext: 'otp',
|
|
65
|
+
mime: 'application/vnd.oasis.opendocument.presentation-template',
|
|
66
|
+
};
|
|
67
|
+
case 'application/vnd.oasis.opendocument.graphics':
|
|
68
|
+
return {
|
|
69
|
+
ext: 'odg',
|
|
70
|
+
mime: 'application/vnd.oasis.opendocument.graphics',
|
|
71
|
+
};
|
|
72
|
+
case 'application/vnd.oasis.opendocument.graphics-template':
|
|
73
|
+
return {
|
|
74
|
+
ext: 'otg',
|
|
75
|
+
mime: 'application/vnd.oasis.opendocument.graphics-template',
|
|
76
|
+
};
|
|
77
|
+
case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
|
|
78
|
+
return {
|
|
79
|
+
ext: 'xlsx',
|
|
80
|
+
mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
81
|
+
};
|
|
82
|
+
case 'application/vnd.ms-excel.sheet.macroEnabled':
|
|
83
|
+
return {
|
|
84
|
+
ext: 'xlsm',
|
|
85
|
+
mime: 'application/vnd.ms-excel.sheet.macroEnabled.12',
|
|
86
|
+
};
|
|
87
|
+
case 'application/vnd.openxmlformats-officedocument.spreadsheetml.template':
|
|
88
|
+
return {
|
|
89
|
+
ext: 'xltx',
|
|
90
|
+
mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
|
|
91
|
+
};
|
|
92
|
+
case 'application/vnd.ms-excel.template.macroEnabled':
|
|
93
|
+
return {
|
|
94
|
+
ext: 'xltm',
|
|
95
|
+
mime: 'application/vnd.ms-excel.template.macroenabled.12',
|
|
96
|
+
};
|
|
97
|
+
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
|
|
98
|
+
return {
|
|
99
|
+
ext: 'docx',
|
|
100
|
+
mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
101
|
+
};
|
|
102
|
+
case 'application/vnd.ms-word.document.macroEnabled':
|
|
103
|
+
return {
|
|
104
|
+
ext: 'docm',
|
|
105
|
+
mime: 'application/vnd.ms-word.document.macroEnabled.12',
|
|
106
|
+
};
|
|
107
|
+
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.template':
|
|
108
|
+
return {
|
|
109
|
+
ext: 'dotx',
|
|
110
|
+
mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
|
|
111
|
+
};
|
|
112
|
+
case 'application/vnd.ms-word.template.macroEnabledTemplate':
|
|
113
|
+
return {
|
|
114
|
+
ext: 'dotm',
|
|
115
|
+
mime: 'application/vnd.ms-word.template.macroEnabled.12',
|
|
116
|
+
};
|
|
117
|
+
case 'application/vnd.openxmlformats-officedocument.presentationml.template':
|
|
118
|
+
return {
|
|
119
|
+
ext: 'potx',
|
|
120
|
+
mime: 'application/vnd.openxmlformats-officedocument.presentationml.template',
|
|
121
|
+
};
|
|
122
|
+
case 'application/vnd.ms-powerpoint.template.macroEnabled':
|
|
123
|
+
return {
|
|
124
|
+
ext: 'potm',
|
|
125
|
+
mime: 'application/vnd.ms-powerpoint.template.macroEnabled.12',
|
|
126
|
+
};
|
|
127
|
+
case 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
|
|
128
|
+
return {
|
|
129
|
+
ext: 'pptx',
|
|
130
|
+
mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
131
|
+
};
|
|
132
|
+
case 'application/vnd.ms-powerpoint.presentation.macroEnabled':
|
|
133
|
+
return {
|
|
134
|
+
ext: 'pptm',
|
|
135
|
+
mime: 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
|
|
136
|
+
};
|
|
137
|
+
case 'application/vnd.ms-visio.drawing':
|
|
138
|
+
return {
|
|
139
|
+
ext: 'vsdx',
|
|
140
|
+
mime: 'application/vnd.visio',
|
|
141
|
+
};
|
|
142
|
+
case 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml':
|
|
143
|
+
return {
|
|
144
|
+
ext: '3mf',
|
|
145
|
+
mime: 'model/3mf',
|
|
146
|
+
};
|
|
147
|
+
default:
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
29
151
|
function _check(buffer, headers, options) {
|
|
30
152
|
options = {
|
|
31
153
|
offset: 0,
|
|
@@ -57,20 +179,20 @@ export async function fileTypeStream(webStream, options) {
|
|
|
57
179
|
|
|
58
180
|
export class FileTypeParser {
|
|
59
181
|
constructor(options) {
|
|
60
|
-
this.detectors = options?.customDetectors
|
|
182
|
+
this.detectors = [...(options?.customDetectors ?? []),
|
|
183
|
+
{id: 'core', detect: this.detectConfident},
|
|
184
|
+
{id: 'core.imprecise', detect: this.detectImprecise}];
|
|
61
185
|
this.tokenizerOptions = {
|
|
62
186
|
abortSignal: options?.signal,
|
|
63
187
|
};
|
|
64
|
-
this.fromTokenizer = this.fromTokenizer.bind(this);
|
|
65
|
-
this.fromBuffer = this.fromBuffer.bind(this);
|
|
66
|
-
this.parse = this.parse.bind(this);
|
|
67
188
|
}
|
|
68
189
|
|
|
69
190
|
async fromTokenizer(tokenizer) {
|
|
70
191
|
const initialPosition = tokenizer.position;
|
|
71
192
|
|
|
72
|
-
|
|
73
|
-
|
|
193
|
+
// Iterate through all file-type detectors
|
|
194
|
+
for (const detector of this.detectors) {
|
|
195
|
+
const fileType = await detector.detect(tokenizer);
|
|
74
196
|
if (fileType) {
|
|
75
197
|
return fileType;
|
|
76
198
|
}
|
|
@@ -79,8 +201,6 @@ export class FileTypeParser {
|
|
|
79
201
|
return undefined; // Cannot proceed scanning of the tokenizer is at an arbitrary position
|
|
80
202
|
}
|
|
81
203
|
}
|
|
82
|
-
|
|
83
|
-
return this.parse(tokenizer);
|
|
84
204
|
}
|
|
85
205
|
|
|
86
206
|
async fromBuffer(input) {
|
|
@@ -163,7 +283,8 @@ export class FileTypeParser {
|
|
|
163
283
|
return this.check(stringToBytes(header), options);
|
|
164
284
|
}
|
|
165
285
|
|
|
166
|
-
|
|
286
|
+
// Detections with a high degree of certainty in identifying the correct file type
|
|
287
|
+
detectConfident = async tokenizer => {
|
|
167
288
|
this.buffer = new Uint8Array(reasonableDetectionSizeInBytes);
|
|
168
289
|
|
|
169
290
|
// Keep reading until EOF if the file size is unknown.
|
|
@@ -253,7 +374,7 @@ export class FileTypeParser {
|
|
|
253
374
|
if (this.check([0xEF, 0xBB, 0xBF])) { // UTF-8-BOM
|
|
254
375
|
// Strip off UTF-8-BOM
|
|
255
376
|
this.tokenizer.ignore(3);
|
|
256
|
-
return this.
|
|
377
|
+
return this.detectConfident(tokenizer);
|
|
257
378
|
}
|
|
258
379
|
|
|
259
380
|
if (this.check([0x47, 0x49, 0x46])) {
|
|
@@ -387,140 +508,69 @@ export class FileTypeParser {
|
|
|
387
508
|
// Zip-based file formats
|
|
388
509
|
// Need to be before the `zip` check
|
|
389
510
|
if (this.check([0x50, 0x4B, 0x3, 0x4])) { // Local file header signature
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
compressedSize: view.getUint32(18, true),
|
|
399
|
-
uncompressedSize: view.getUint32(22, true),
|
|
400
|
-
filenameLength: view.getUint16(26, true),
|
|
401
|
-
extraFieldLength: view.getUint16(28, true),
|
|
402
|
-
};
|
|
403
|
-
|
|
404
|
-
zipHeader.filename = await tokenizer.readToken(new Token.StringType(zipHeader.filenameLength, 'utf-8'));
|
|
405
|
-
await tokenizer.ignore(zipHeader.extraFieldLength);
|
|
406
|
-
|
|
407
|
-
if (/classes\d*\.dex/.test(zipHeader.filename)) {
|
|
511
|
+
let fileType;
|
|
512
|
+
await new ZipHandler(tokenizer).unzip(zipHeader => {
|
|
513
|
+
switch (zipHeader.filename) {
|
|
514
|
+
case 'META-INF/mozilla.rsa':
|
|
515
|
+
fileType = {
|
|
516
|
+
ext: 'xpi',
|
|
517
|
+
mime: 'application/x-xpinstall',
|
|
518
|
+
};
|
|
408
519
|
return {
|
|
409
|
-
|
|
410
|
-
|
|
520
|
+
stop: true,
|
|
521
|
+
};
|
|
522
|
+
case 'META-INF/MANIFEST.MF':
|
|
523
|
+
fileType = {
|
|
524
|
+
ext: 'jar',
|
|
525
|
+
mime: 'application/java-archive',
|
|
411
526
|
};
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
// Assumes signed `.xpi` from addons.mozilla.org
|
|
415
|
-
if (zipHeader.filename === 'META-INF/mozilla.rsa') {
|
|
416
527
|
return {
|
|
417
|
-
|
|
418
|
-
mime: 'application/x-xpinstall',
|
|
528
|
+
stop: true,
|
|
419
529
|
};
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
if (zipHeader.filename.endsWith('.rels') || zipHeader.filename.endsWith('.xml')) {
|
|
423
|
-
const type = zipHeader.filename.split('/')[0];
|
|
424
|
-
switch (type) {
|
|
425
|
-
case '_rels':
|
|
426
|
-
break;
|
|
427
|
-
case 'word':
|
|
428
|
-
return {
|
|
429
|
-
ext: 'docx',
|
|
430
|
-
mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
431
|
-
};
|
|
432
|
-
case 'ppt':
|
|
433
|
-
return {
|
|
434
|
-
ext: 'pptx',
|
|
435
|
-
mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
436
|
-
};
|
|
437
|
-
case 'xl':
|
|
438
|
-
return {
|
|
439
|
-
ext: 'xlsx',
|
|
440
|
-
mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
441
|
-
};
|
|
442
|
-
case 'visio':
|
|
443
|
-
return {
|
|
444
|
-
ext: 'vsdx',
|
|
445
|
-
mime: 'application/vnd.visio',
|
|
446
|
-
};
|
|
447
|
-
default:
|
|
448
|
-
break;
|
|
449
|
-
}
|
|
450
|
-
}
|
|
451
|
-
|
|
452
|
-
if (zipHeader.filename.startsWith('xl/')) {
|
|
530
|
+
case 'mimetype':
|
|
453
531
|
return {
|
|
454
|
-
|
|
455
|
-
|
|
532
|
+
async handler(fileData) {
|
|
533
|
+
// Use TextDecoder to decode the UTF-8 encoded data
|
|
534
|
+
const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
|
|
535
|
+
fileType = getFileTypeFromMimeType(mimeType);
|
|
536
|
+
},
|
|
537
|
+
stop: true,
|
|
456
538
|
};
|
|
457
|
-
}
|
|
458
539
|
|
|
459
|
-
|
|
540
|
+
case '[Content_Types].xml':
|
|
460
541
|
return {
|
|
461
|
-
|
|
462
|
-
|
|
542
|
+
async handler(fileData) {
|
|
543
|
+
// Use TextDecoder to decode the UTF-8 encoded data
|
|
544
|
+
let xmlContent = new TextDecoder('utf-8').decode(fileData);
|
|
545
|
+
const endPos = xmlContent.indexOf('.main+xml"');
|
|
546
|
+
if (endPos === -1) {
|
|
547
|
+
const mimeType = 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml';
|
|
548
|
+
if (xmlContent.includes(`ContentType="${mimeType}"`)) {
|
|
549
|
+
fileType = getFileTypeFromMimeType(mimeType);
|
|
550
|
+
}
|
|
551
|
+
} else {
|
|
552
|
+
xmlContent = xmlContent.slice(0, Math.max(0, endPos));
|
|
553
|
+
const firstPos = xmlContent.lastIndexOf('"');
|
|
554
|
+
const mimeType = xmlContent.slice(Math.max(0, firstPos + 1));
|
|
555
|
+
fileType = getFileTypeFromMimeType(mimeType);
|
|
556
|
+
}
|
|
557
|
+
},
|
|
558
|
+
stop: true,
|
|
463
559
|
};
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
// MS Office, OpenOffice and LibreOffice may put the parts in different order, so the check should not rely on it.
|
|
472
|
-
if (zipHeader.filename === 'mimetype' && zipHeader.compressedSize === zipHeader.uncompressedSize) {
|
|
473
|
-
let mimeType = await tokenizer.readToken(new Token.StringType(zipHeader.compressedSize, 'utf-8'));
|
|
474
|
-
mimeType = mimeType.trim();
|
|
475
|
-
|
|
476
|
-
switch (mimeType) {
|
|
477
|
-
case 'application/epub+zip':
|
|
478
|
-
return {
|
|
479
|
-
ext: 'epub',
|
|
480
|
-
mime: 'application/epub+zip',
|
|
481
|
-
};
|
|
482
|
-
case 'application/vnd.oasis.opendocument.text':
|
|
483
|
-
return {
|
|
484
|
-
ext: 'odt',
|
|
485
|
-
mime: 'application/vnd.oasis.opendocument.text',
|
|
486
|
-
};
|
|
487
|
-
case 'application/vnd.oasis.opendocument.spreadsheet':
|
|
488
|
-
return {
|
|
489
|
-
ext: 'ods',
|
|
490
|
-
mime: 'application/vnd.oasis.opendocument.spreadsheet',
|
|
491
|
-
};
|
|
492
|
-
case 'application/vnd.oasis.opendocument.presentation':
|
|
493
|
-
return {
|
|
494
|
-
ext: 'odp',
|
|
495
|
-
mime: 'application/vnd.oasis.opendocument.presentation',
|
|
496
|
-
};
|
|
497
|
-
default:
|
|
560
|
+
default:
|
|
561
|
+
if (/classes\d*\.dex/.test(zipHeader.filename)) {
|
|
562
|
+
fileType = {
|
|
563
|
+
ext: 'apk',
|
|
564
|
+
mime: 'application/vnd.android.package-archive',
|
|
565
|
+
};
|
|
566
|
+
return {stop: true};
|
|
498
567
|
}
|
|
499
|
-
}
|
|
500
568
|
|
|
501
|
-
|
|
502
|
-
if (zipHeader.compressedSize === 0) {
|
|
503
|
-
let nextHeaderIndex = -1;
|
|
504
|
-
|
|
505
|
-
while (nextHeaderIndex < 0 && (tokenizer.position < tokenizer.fileInfo.size)) {
|
|
506
|
-
await tokenizer.peekBuffer(this.buffer, {mayBeLess: true});
|
|
507
|
-
|
|
508
|
-
nextHeaderIndex = indexOf(this.buffer, new Uint8Array([0x50, 0x4B, 0x03, 0x04]));
|
|
509
|
-
|
|
510
|
-
// Move position to the next header if found, skip the whole buffer otherwise
|
|
511
|
-
await tokenizer.ignore(nextHeaderIndex >= 0 ? nextHeaderIndex : this.buffer.length);
|
|
512
|
-
}
|
|
513
|
-
} else {
|
|
514
|
-
await tokenizer.ignore(zipHeader.compressedSize);
|
|
515
|
-
}
|
|
569
|
+
return {};
|
|
516
570
|
}
|
|
517
|
-
}
|
|
518
|
-
if (!(error instanceof strtok3.EndOfStreamError)) {
|
|
519
|
-
throw error;
|
|
520
|
-
}
|
|
521
|
-
}
|
|
571
|
+
});
|
|
522
572
|
|
|
523
|
-
return {
|
|
573
|
+
return fileType ?? {
|
|
524
574
|
ext: 'zip',
|
|
525
575
|
mime: 'application/zip',
|
|
526
576
|
};
|
|
@@ -598,68 +648,6 @@ export class FileTypeParser {
|
|
|
598
648
|
};
|
|
599
649
|
}
|
|
600
650
|
|
|
601
|
-
//
|
|
602
|
-
|
|
603
|
-
// File Type Box (https://en.wikipedia.org/wiki/ISO_base_media_file_format)
|
|
604
|
-
// It's not required to be first, but it's recommended to be. Almost all ISO base media files start with `ftyp` box.
|
|
605
|
-
// `ftyp` box must contain a brand major identifier, which must consist of ISO 8859-1 printable characters.
|
|
606
|
-
// Here we check for 8859-1 printable characters (for simplicity, it's a mask which also catches one non-printable character).
|
|
607
|
-
if (
|
|
608
|
-
this.checkString('ftyp', {offset: 4})
|
|
609
|
-
&& (this.buffer[8] & 0x60) !== 0x00 // Brand major, first character ASCII?
|
|
610
|
-
) {
|
|
611
|
-
// They all can have MIME `video/mp4` except `application/mp4` special-case which is hard to detect.
|
|
612
|
-
// For some cases, we're specific, everything else falls to `video/mp4` with `mp4` extension.
|
|
613
|
-
const brandMajor = new Token.StringType(4, 'latin1').get(this.buffer, 8).replace('\0', ' ').trim();
|
|
614
|
-
switch (brandMajor) {
|
|
615
|
-
case 'avif':
|
|
616
|
-
case 'avis':
|
|
617
|
-
return {ext: 'avif', mime: 'image/avif'};
|
|
618
|
-
case 'mif1':
|
|
619
|
-
return {ext: 'heic', mime: 'image/heif'};
|
|
620
|
-
case 'msf1':
|
|
621
|
-
return {ext: 'heic', mime: 'image/heif-sequence'};
|
|
622
|
-
case 'heic':
|
|
623
|
-
case 'heix':
|
|
624
|
-
return {ext: 'heic', mime: 'image/heic'};
|
|
625
|
-
case 'hevc':
|
|
626
|
-
case 'hevx':
|
|
627
|
-
return {ext: 'heic', mime: 'image/heic-sequence'};
|
|
628
|
-
case 'qt':
|
|
629
|
-
return {ext: 'mov', mime: 'video/quicktime'};
|
|
630
|
-
case 'M4V':
|
|
631
|
-
case 'M4VH':
|
|
632
|
-
case 'M4VP':
|
|
633
|
-
return {ext: 'm4v', mime: 'video/x-m4v'};
|
|
634
|
-
case 'M4P':
|
|
635
|
-
return {ext: 'm4p', mime: 'video/mp4'};
|
|
636
|
-
case 'M4B':
|
|
637
|
-
return {ext: 'm4b', mime: 'audio/mp4'};
|
|
638
|
-
case 'M4A':
|
|
639
|
-
return {ext: 'm4a', mime: 'audio/x-m4a'};
|
|
640
|
-
case 'F4V':
|
|
641
|
-
return {ext: 'f4v', mime: 'video/mp4'};
|
|
642
|
-
case 'F4P':
|
|
643
|
-
return {ext: 'f4p', mime: 'video/mp4'};
|
|
644
|
-
case 'F4A':
|
|
645
|
-
return {ext: 'f4a', mime: 'audio/mp4'};
|
|
646
|
-
case 'F4B':
|
|
647
|
-
return {ext: 'f4b', mime: 'audio/mp4'};
|
|
648
|
-
case 'crx':
|
|
649
|
-
return {ext: 'cr3', mime: 'image/x-canon-cr3'};
|
|
650
|
-
default:
|
|
651
|
-
if (brandMajor.startsWith('3g')) {
|
|
652
|
-
if (brandMajor.startsWith('3g2')) {
|
|
653
|
-
return {ext: '3g2', mime: 'video/3gpp2'};
|
|
654
|
-
}
|
|
655
|
-
|
|
656
|
-
return {ext: '3gp', mime: 'video/3gpp'};
|
|
657
|
-
}
|
|
658
|
-
|
|
659
|
-
return {ext: 'mp4', mime: 'video/mp4'};
|
|
660
|
-
}
|
|
661
|
-
}
|
|
662
|
-
|
|
663
651
|
if (this.checkString('MThd')) {
|
|
664
652
|
return {
|
|
665
653
|
ext: 'mid',
|
|
@@ -738,17 +726,19 @@ export class FileTypeParser {
|
|
|
738
726
|
|
|
739
727
|
if (this.checkString('%PDF')) {
|
|
740
728
|
try {
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
729
|
+
const skipBytes = 1350;
|
|
730
|
+
if (skipBytes === await tokenizer.ignore(skipBytes)) {
|
|
731
|
+
const maxBufferSize = 10 * 1024 * 1024;
|
|
732
|
+
const buffer = new Uint8Array(Math.min(maxBufferSize, tokenizer.fileInfo.size - skipBytes));
|
|
733
|
+
await tokenizer.readBuffer(buffer, {mayBeLess: true});
|
|
734
|
+
|
|
735
|
+
// Check if this is an Adobe Illustrator file
|
|
736
|
+
if (includes(buffer, new TextEncoder().encode('AIPrivateData'))) {
|
|
737
|
+
return {
|
|
738
|
+
ext: 'ai',
|
|
739
|
+
mime: 'application/postscript',
|
|
740
|
+
};
|
|
741
|
+
}
|
|
752
742
|
}
|
|
753
743
|
} catch (error) {
|
|
754
744
|
// Swallow end of stream error if file is too small for the Adobe AI check
|
|
@@ -841,9 +831,9 @@ export class FileTypeParser {
|
|
|
841
831
|
}
|
|
842
832
|
|
|
843
833
|
const re = await readElement();
|
|
844
|
-
const
|
|
834
|
+
const documentType = await readChildren(re.len);
|
|
845
835
|
|
|
846
|
-
switch (
|
|
836
|
+
switch (documentType) {
|
|
847
837
|
case 'webm':
|
|
848
838
|
return {
|
|
849
839
|
ext: 'webm',
|
|
@@ -966,6 +956,13 @@ export class FileTypeParser {
|
|
|
966
956
|
};
|
|
967
957
|
}
|
|
968
958
|
|
|
959
|
+
if (this.check([0x04, 0x22, 0x4D, 0x18])) {
|
|
960
|
+
return {
|
|
961
|
+
ext: 'lz4',
|
|
962
|
+
mime: 'application/x-lz4', // Invented by us
|
|
963
|
+
};
|
|
964
|
+
}
|
|
965
|
+
|
|
969
966
|
// -- 5-byte signatures --
|
|
970
967
|
|
|
971
968
|
if (this.check([0x4F, 0x54, 0x54, 0x4F, 0x00])) {
|
|
@@ -1056,6 +1053,13 @@ export class FileTypeParser {
|
|
|
1056
1053
|
};
|
|
1057
1054
|
}
|
|
1058
1055
|
|
|
1056
|
+
if (this.checkString('DRACO')) {
|
|
1057
|
+
return {
|
|
1058
|
+
ext: 'drc',
|
|
1059
|
+
mime: 'application/vnd.google.draco', // Invented by us
|
|
1060
|
+
};
|
|
1061
|
+
}
|
|
1062
|
+
|
|
1059
1063
|
// -- 6-byte signatures --
|
|
1060
1064
|
|
|
1061
1065
|
if (this.check([0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00])) {
|
|
@@ -1241,6 +1245,66 @@ export class FileTypeParser {
|
|
|
1241
1245
|
};
|
|
1242
1246
|
}
|
|
1243
1247
|
|
|
1248
|
+
// File Type Box (https://en.wikipedia.org/wiki/ISO_base_media_file_format)
|
|
1249
|
+
// It's not required to be first, but it's recommended to be. Almost all ISO base media files start with `ftyp` box.
|
|
1250
|
+
// `ftyp` box must contain a brand major identifier, which must consist of ISO 8859-1 printable characters.
|
|
1251
|
+
// Here we check for 8859-1 printable characters (for simplicity, it's a mask which also catches one non-printable character).
|
|
1252
|
+
if (
|
|
1253
|
+
this.checkString('ftyp', {offset: 4})
|
|
1254
|
+
&& (this.buffer[8] & 0x60) !== 0x00 // Brand major, first character ASCII?
|
|
1255
|
+
) {
|
|
1256
|
+
// They all can have MIME `video/mp4` except `application/mp4` special-case which is hard to detect.
|
|
1257
|
+
// For some cases, we're specific, everything else falls to `video/mp4` with `mp4` extension.
|
|
1258
|
+
const brandMajor = new Token.StringType(4, 'latin1').get(this.buffer, 8).replace('\0', ' ').trim();
|
|
1259
|
+
switch (brandMajor) {
|
|
1260
|
+
case 'avif':
|
|
1261
|
+
case 'avis':
|
|
1262
|
+
return {ext: 'avif', mime: 'image/avif'};
|
|
1263
|
+
case 'mif1':
|
|
1264
|
+
return {ext: 'heic', mime: 'image/heif'};
|
|
1265
|
+
case 'msf1':
|
|
1266
|
+
return {ext: 'heic', mime: 'image/heif-sequence'};
|
|
1267
|
+
case 'heic':
|
|
1268
|
+
case 'heix':
|
|
1269
|
+
return {ext: 'heic', mime: 'image/heic'};
|
|
1270
|
+
case 'hevc':
|
|
1271
|
+
case 'hevx':
|
|
1272
|
+
return {ext: 'heic', mime: 'image/heic-sequence'};
|
|
1273
|
+
case 'qt':
|
|
1274
|
+
return {ext: 'mov', mime: 'video/quicktime'};
|
|
1275
|
+
case 'M4V':
|
|
1276
|
+
case 'M4VH':
|
|
1277
|
+
case 'M4VP':
|
|
1278
|
+
return {ext: 'm4v', mime: 'video/x-m4v'};
|
|
1279
|
+
case 'M4P':
|
|
1280
|
+
return {ext: 'm4p', mime: 'video/mp4'};
|
|
1281
|
+
case 'M4B':
|
|
1282
|
+
return {ext: 'm4b', mime: 'audio/mp4'};
|
|
1283
|
+
case 'M4A':
|
|
1284
|
+
return {ext: 'm4a', mime: 'audio/x-m4a'};
|
|
1285
|
+
case 'F4V':
|
|
1286
|
+
return {ext: 'f4v', mime: 'video/mp4'};
|
|
1287
|
+
case 'F4P':
|
|
1288
|
+
return {ext: 'f4p', mime: 'video/mp4'};
|
|
1289
|
+
case 'F4A':
|
|
1290
|
+
return {ext: 'f4a', mime: 'audio/mp4'};
|
|
1291
|
+
case 'F4B':
|
|
1292
|
+
return {ext: 'f4b', mime: 'audio/mp4'};
|
|
1293
|
+
case 'crx':
|
|
1294
|
+
return {ext: 'cr3', mime: 'image/x-canon-cr3'};
|
|
1295
|
+
default:
|
|
1296
|
+
if (brandMajor.startsWith('3g')) {
|
|
1297
|
+
if (brandMajor.startsWith('3g2')) {
|
|
1298
|
+
return {ext: '3g2', mime: 'video/3gpp2'};
|
|
1299
|
+
}
|
|
1300
|
+
|
|
1301
|
+
return {ext: '3gp', mime: 'video/3gpp'};
|
|
1302
|
+
}
|
|
1303
|
+
|
|
1304
|
+
return {ext: 'mp4', mime: 'video/mp4'};
|
|
1305
|
+
}
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1244
1308
|
// -- 12-byte signatures --
|
|
1245
1309
|
|
|
1246
1310
|
if (this.check([0x49, 0x49, 0x55, 0x00, 0x18, 0x00, 0x00, 0x00, 0x88, 0xE7, 0x74, 0xD8])) {
|
|
@@ -1380,39 +1444,6 @@ export class FileTypeParser {
|
|
|
1380
1444
|
return undefined; // Some unknown text based format
|
|
1381
1445
|
}
|
|
1382
1446
|
|
|
1383
|
-
// -- Unsafe signatures --
|
|
1384
|
-
|
|
1385
|
-
if (
|
|
1386
|
-
this.check([0x0, 0x0, 0x1, 0xBA])
|
|
1387
|
-
|| this.check([0x0, 0x0, 0x1, 0xB3])
|
|
1388
|
-
) {
|
|
1389
|
-
return {
|
|
1390
|
-
ext: 'mpg',
|
|
1391
|
-
mime: 'video/mpeg',
|
|
1392
|
-
};
|
|
1393
|
-
}
|
|
1394
|
-
|
|
1395
|
-
if (this.check([0x00, 0x01, 0x00, 0x00, 0x00])) {
|
|
1396
|
-
return {
|
|
1397
|
-
ext: 'ttf',
|
|
1398
|
-
mime: 'font/ttf',
|
|
1399
|
-
};
|
|
1400
|
-
}
|
|
1401
|
-
|
|
1402
|
-
if (this.check([0x00, 0x00, 0x01, 0x00])) {
|
|
1403
|
-
return {
|
|
1404
|
-
ext: 'ico',
|
|
1405
|
-
mime: 'image/x-icon',
|
|
1406
|
-
};
|
|
1407
|
-
}
|
|
1408
|
-
|
|
1409
|
-
if (this.check([0x00, 0x00, 0x02, 0x00])) {
|
|
1410
|
-
return {
|
|
1411
|
-
ext: 'cur',
|
|
1412
|
-
mime: 'image/x-icon',
|
|
1413
|
-
};
|
|
1414
|
-
}
|
|
1415
|
-
|
|
1416
1447
|
if (this.check([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1])) {
|
|
1417
1448
|
// Detected Microsoft Compound File Binary File (MS-CFB) Format.
|
|
1418
1449
|
return {
|
|
@@ -1618,6 +1649,44 @@ export class FileTypeParser {
|
|
|
1618
1649
|
mime: 'application/pgp-encrypted',
|
|
1619
1650
|
};
|
|
1620
1651
|
}
|
|
1652
|
+
};
|
|
1653
|
+
// Detections with limited supporting data, resulting in a higher likelihood of false positives
|
|
1654
|
+
detectImprecise = async tokenizer => {
|
|
1655
|
+
this.buffer = new Uint8Array(reasonableDetectionSizeInBytes);
|
|
1656
|
+
|
|
1657
|
+
// Read initial sample size of 8 bytes
|
|
1658
|
+
await tokenizer.peekBuffer(this.buffer, {length: Math.min(8, tokenizer.fileInfo.size), mayBeLess: true});
|
|
1659
|
+
|
|
1660
|
+
if (
|
|
1661
|
+
this.check([0x0, 0x0, 0x1, 0xBA])
|
|
1662
|
+
|| this.check([0x0, 0x0, 0x1, 0xB3])
|
|
1663
|
+
) {
|
|
1664
|
+
return {
|
|
1665
|
+
ext: 'mpg',
|
|
1666
|
+
mime: 'video/mpeg',
|
|
1667
|
+
};
|
|
1668
|
+
}
|
|
1669
|
+
|
|
1670
|
+
if (this.check([0x00, 0x01, 0x00, 0x00, 0x00])) {
|
|
1671
|
+
return {
|
|
1672
|
+
ext: 'ttf',
|
|
1673
|
+
mime: 'font/ttf',
|
|
1674
|
+
};
|
|
1675
|
+
}
|
|
1676
|
+
|
|
1677
|
+
if (this.check([0x00, 0x00, 0x01, 0x00])) {
|
|
1678
|
+
return {
|
|
1679
|
+
ext: 'ico',
|
|
1680
|
+
mime: 'image/x-icon',
|
|
1681
|
+
};
|
|
1682
|
+
}
|
|
1683
|
+
|
|
1684
|
+
if (this.check([0x00, 0x00, 0x02, 0x00])) {
|
|
1685
|
+
return {
|
|
1686
|
+
ext: 'cur',
|
|
1687
|
+
mime: 'image/x-icon',
|
|
1688
|
+
};
|
|
1689
|
+
}
|
|
1621
1690
|
|
|
1622
1691
|
// Check MPEG 1 or 2 Layer 3 header, or 'layer 0' for ADTS (MPEG sync-word 0xFFE)
|
|
1623
1692
|
if (this.buffer.length >= 2 && this.check([0xFF, 0xE0], {offset: 0, mask: [0xFF, 0xE0]})) {
|
|
@@ -1662,7 +1731,7 @@ export class FileTypeParser {
|
|
|
1662
1731
|
};
|
|
1663
1732
|
}
|
|
1664
1733
|
}
|
|
1665
|
-
}
|
|
1734
|
+
};
|
|
1666
1735
|
|
|
1667
1736
|
async readTiffTag(bigEndian) {
|
|
1668
1737
|
const tagId = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
|
|
@@ -1706,11 +1775,18 @@ export class FileTypeParser {
|
|
|
1706
1775
|
};
|
|
1707
1776
|
}
|
|
1708
1777
|
|
|
1709
|
-
if (ifdOffset >= 8
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
|
|
1778
|
+
if (ifdOffset >= 8) {
|
|
1779
|
+
const someId1 = (bigEndian ? Token.UINT16_BE : Token.UINT16_LE).get(this.buffer, 8);
|
|
1780
|
+
const someId2 = (bigEndian ? Token.UINT16_BE : Token.UINT16_LE).get(this.buffer, 10);
|
|
1781
|
+
|
|
1782
|
+
if (
|
|
1783
|
+
(someId1 === 0x1C && someId2 === 0xFE)
|
|
1784
|
+
|| (someId1 === 0x1F && someId2 === 0x0B)) {
|
|
1785
|
+
return {
|
|
1786
|
+
ext: 'nef',
|
|
1787
|
+
mime: 'image/x-nikon-nef',
|
|
1788
|
+
};
|
|
1789
|
+
}
|
|
1714
1790
|
}
|
|
1715
1791
|
}
|
|
1716
1792
|
|