file-type 19.5.0 → 20.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core.d.ts +36 -333
- package/core.js +324 -243
- package/index.d.ts +9 -3
- package/index.js +14 -8
- package/package.json +28 -9
- package/readme.md +89 -28
- package/supported.js +37 -1
package/core.js
CHANGED
|
@@ -4,7 +4,8 @@ Primary entry point, Node.js specific entry point is index.js
|
|
|
4
4
|
|
|
5
5
|
import * as Token from 'token-types';
|
|
6
6
|
import * as strtok3 from 'strtok3/core';
|
|
7
|
-
import {
|
|
7
|
+
import {ZipHandler} from '@tokenizer/inflate';
|
|
8
|
+
import {includes, getUintBE} from 'uint8array-extras';
|
|
8
9
|
import {
|
|
9
10
|
stringToBytes,
|
|
10
11
|
tarHeaderChecksumMatches,
|
|
@@ -26,6 +27,127 @@ export async function fileTypeFromBlob(blob) {
|
|
|
26
27
|
return new FileTypeParser().fromBlob(blob);
|
|
27
28
|
}
|
|
28
29
|
|
|
30
|
+
function getFileTypeFromMimeType(mimeType) {
|
|
31
|
+
switch (mimeType) {
|
|
32
|
+
case 'application/epub+zip':
|
|
33
|
+
return {
|
|
34
|
+
ext: 'epub',
|
|
35
|
+
mime: 'application/epub+zip',
|
|
36
|
+
};
|
|
37
|
+
case 'application/vnd.oasis.opendocument.text':
|
|
38
|
+
return {
|
|
39
|
+
ext: 'odt',
|
|
40
|
+
mime: 'application/vnd.oasis.opendocument.text',
|
|
41
|
+
};
|
|
42
|
+
case 'application/vnd.oasis.opendocument.text-template':
|
|
43
|
+
return {
|
|
44
|
+
ext: 'ott',
|
|
45
|
+
mime: 'application/vnd.oasis.opendocument.text-template',
|
|
46
|
+
};
|
|
47
|
+
case 'application/vnd.oasis.opendocument.spreadsheet':
|
|
48
|
+
return {
|
|
49
|
+
ext: 'ods',
|
|
50
|
+
mime: 'application/vnd.oasis.opendocument.spreadsheet',
|
|
51
|
+
};
|
|
52
|
+
case 'application/vnd.oasis.opendocument.spreadsheet-template':
|
|
53
|
+
return {
|
|
54
|
+
ext: 'ots',
|
|
55
|
+
mime: 'application/vnd.oasis.opendocument.spreadsheet-template',
|
|
56
|
+
};
|
|
57
|
+
case 'application/vnd.oasis.opendocument.presentation':
|
|
58
|
+
return {
|
|
59
|
+
ext: 'odp',
|
|
60
|
+
mime: 'application/vnd.oasis.opendocument.presentation',
|
|
61
|
+
};
|
|
62
|
+
case 'application/vnd.oasis.opendocument.presentation-template':
|
|
63
|
+
return {
|
|
64
|
+
ext: 'otp',
|
|
65
|
+
mime: 'application/vnd.oasis.opendocument.presentation-template',
|
|
66
|
+
};
|
|
67
|
+
case 'application/vnd.oasis.opendocument.graphics':
|
|
68
|
+
return {
|
|
69
|
+
ext: 'odg',
|
|
70
|
+
mime: 'application/vnd.oasis.opendocument.graphics',
|
|
71
|
+
};
|
|
72
|
+
case 'application/vnd.oasis.opendocument.graphics-template':
|
|
73
|
+
return {
|
|
74
|
+
ext: 'otg',
|
|
75
|
+
mime: 'application/vnd.oasis.opendocument.graphics-template',
|
|
76
|
+
};
|
|
77
|
+
case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
|
|
78
|
+
return {
|
|
79
|
+
ext: 'xlsx',
|
|
80
|
+
mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
81
|
+
};
|
|
82
|
+
case 'application/vnd.ms-excel.sheet.macroEnabled':
|
|
83
|
+
return {
|
|
84
|
+
ext: 'xlsm',
|
|
85
|
+
mime: 'application/vnd.ms-excel.sheet.macroEnabled.12',
|
|
86
|
+
};
|
|
87
|
+
case 'application/vnd.openxmlformats-officedocument.spreadsheetml.template':
|
|
88
|
+
return {
|
|
89
|
+
ext: 'xltx',
|
|
90
|
+
mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
|
|
91
|
+
};
|
|
92
|
+
case 'application/vnd.ms-excel.template.macroEnabled':
|
|
93
|
+
return {
|
|
94
|
+
ext: 'xltm',
|
|
95
|
+
mime: 'application/vnd.ms-excel.template.macroenabled.12',
|
|
96
|
+
};
|
|
97
|
+
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
|
|
98
|
+
return {
|
|
99
|
+
ext: 'docx',
|
|
100
|
+
mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
101
|
+
};
|
|
102
|
+
case 'application/vnd.ms-word.document.macroEnabled':
|
|
103
|
+
return {
|
|
104
|
+
ext: 'docm',
|
|
105
|
+
mime: 'application/vnd.ms-word.document.macroEnabled.12',
|
|
106
|
+
};
|
|
107
|
+
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.template':
|
|
108
|
+
return {
|
|
109
|
+
ext: 'dotx',
|
|
110
|
+
mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
|
|
111
|
+
};
|
|
112
|
+
case 'application/vnd.ms-word.template.macroEnabledTemplate':
|
|
113
|
+
return {
|
|
114
|
+
ext: 'dotm',
|
|
115
|
+
mime: 'application/vnd.ms-word.template.macroEnabled.12',
|
|
116
|
+
};
|
|
117
|
+
case 'application/vnd.openxmlformats-officedocument.presentationml.template':
|
|
118
|
+
return {
|
|
119
|
+
ext: 'potx',
|
|
120
|
+
mime: 'application/vnd.openxmlformats-officedocument.presentationml.template',
|
|
121
|
+
};
|
|
122
|
+
case 'application/vnd.ms-powerpoint.template.macroEnabled':
|
|
123
|
+
return {
|
|
124
|
+
ext: 'potm',
|
|
125
|
+
mime: 'application/vnd.ms-powerpoint.template.macroEnabled.12',
|
|
126
|
+
};
|
|
127
|
+
case 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
|
|
128
|
+
return {
|
|
129
|
+
ext: 'pptx',
|
|
130
|
+
mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
131
|
+
};
|
|
132
|
+
case 'application/vnd.ms-powerpoint.presentation.macroEnabled':
|
|
133
|
+
return {
|
|
134
|
+
ext: 'pptm',
|
|
135
|
+
mime: 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
|
|
136
|
+
};
|
|
137
|
+
case 'application/vnd.ms-visio.drawing':
|
|
138
|
+
return {
|
|
139
|
+
ext: 'vsdx',
|
|
140
|
+
mime: 'application/vnd.visio',
|
|
141
|
+
};
|
|
142
|
+
case 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml':
|
|
143
|
+
return {
|
|
144
|
+
ext: '3mf',
|
|
145
|
+
mime: 'model/3mf',
|
|
146
|
+
};
|
|
147
|
+
default:
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
29
151
|
function _check(buffer, headers, options) {
|
|
30
152
|
options = {
|
|
31
153
|
offset: 0,
|
|
@@ -57,18 +179,20 @@ export async function fileTypeStream(webStream, options) {
|
|
|
57
179
|
|
|
58
180
|
export class FileTypeParser {
|
|
59
181
|
constructor(options) {
|
|
60
|
-
this.detectors = options?.customDetectors
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
this.
|
|
64
|
-
|
|
182
|
+
this.detectors = [...(options?.customDetectors ?? []),
|
|
183
|
+
{id: 'core', detect: this.detectConfident},
|
|
184
|
+
{id: 'core.imprecise', detect: this.detectImprecise}];
|
|
185
|
+
this.tokenizerOptions = {
|
|
186
|
+
abortSignal: options?.signal,
|
|
187
|
+
};
|
|
65
188
|
}
|
|
66
189
|
|
|
67
190
|
async fromTokenizer(tokenizer) {
|
|
68
191
|
const initialPosition = tokenizer.position;
|
|
69
192
|
|
|
70
|
-
|
|
71
|
-
|
|
193
|
+
// Iterate through all file-type detectors
|
|
194
|
+
for (const detector of this.detectors) {
|
|
195
|
+
const fileType = await detector.detect(tokenizer);
|
|
72
196
|
if (fileType) {
|
|
73
197
|
return fileType;
|
|
74
198
|
}
|
|
@@ -77,8 +201,6 @@ export class FileTypeParser {
|
|
|
77
201
|
return undefined; // Cannot proceed scanning of the tokenizer is at an arbitrary position
|
|
78
202
|
}
|
|
79
203
|
}
|
|
80
|
-
|
|
81
|
-
return this.parse(tokenizer);
|
|
82
204
|
}
|
|
83
205
|
|
|
84
206
|
async fromBuffer(input) {
|
|
@@ -92,7 +214,7 @@ export class FileTypeParser {
|
|
|
92
214
|
return;
|
|
93
215
|
}
|
|
94
216
|
|
|
95
|
-
return this.fromTokenizer(strtok3.fromBuffer(buffer));
|
|
217
|
+
return this.fromTokenizer(strtok3.fromBuffer(buffer, this.tokenizerOptions));
|
|
96
218
|
}
|
|
97
219
|
|
|
98
220
|
async fromBlob(blob) {
|
|
@@ -100,7 +222,7 @@ export class FileTypeParser {
|
|
|
100
222
|
}
|
|
101
223
|
|
|
102
224
|
async fromStream(stream) {
|
|
103
|
-
const tokenizer = await strtok3.fromWebStream(stream);
|
|
225
|
+
const tokenizer = await strtok3.fromWebStream(stream, this.tokenizerOptions);
|
|
104
226
|
try {
|
|
105
227
|
return await this.fromTokenizer(tokenizer);
|
|
106
228
|
} finally {
|
|
@@ -161,7 +283,8 @@ export class FileTypeParser {
|
|
|
161
283
|
return this.check(stringToBytes(header), options);
|
|
162
284
|
}
|
|
163
285
|
|
|
164
|
-
|
|
286
|
+
// Detections with a high degree of certainty in identifying the correct file type
|
|
287
|
+
detectConfident = async tokenizer => {
|
|
165
288
|
this.buffer = new Uint8Array(reasonableDetectionSizeInBytes);
|
|
166
289
|
|
|
167
290
|
// Keep reading until EOF if the file size is unknown.
|
|
@@ -251,7 +374,7 @@ export class FileTypeParser {
|
|
|
251
374
|
if (this.check([0xEF, 0xBB, 0xBF])) { // UTF-8-BOM
|
|
252
375
|
// Strip off UTF-8-BOM
|
|
253
376
|
this.tokenizer.ignore(3);
|
|
254
|
-
return this.
|
|
377
|
+
return this.detectConfident(tokenizer);
|
|
255
378
|
}
|
|
256
379
|
|
|
257
380
|
if (this.check([0x47, 0x49, 0x46])) {
|
|
@@ -385,133 +508,69 @@ export class FileTypeParser {
|
|
|
385
508
|
// Zip-based file formats
|
|
386
509
|
// Need to be before the `zip` check
|
|
387
510
|
if (this.check([0x50, 0x4B, 0x3, 0x4])) { // Local file header signature
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
// https://en.wikipedia.org/wiki/Zip_(file_format)#File_headers
|
|
395
|
-
const zipHeader = {
|
|
396
|
-
compressedSize: view.getUint32(18, true),
|
|
397
|
-
uncompressedSize: view.getUint32(22, true),
|
|
398
|
-
filenameLength: view.getUint16(26, true),
|
|
399
|
-
extraFieldLength: view.getUint16(28, true),
|
|
400
|
-
};
|
|
401
|
-
|
|
402
|
-
zipHeader.filename = await tokenizer.readToken(new Token.StringType(zipHeader.filenameLength, 'utf-8'));
|
|
403
|
-
await tokenizer.ignore(zipHeader.extraFieldLength);
|
|
404
|
-
|
|
405
|
-
// Assumes signed `.xpi` from addons.mozilla.org
|
|
406
|
-
if (zipHeader.filename === 'META-INF/mozilla.rsa') {
|
|
407
|
-
return {
|
|
511
|
+
let fileType;
|
|
512
|
+
await new ZipHandler(tokenizer).unzip(zipHeader => {
|
|
513
|
+
switch (zipHeader.filename) {
|
|
514
|
+
case 'META-INF/mozilla.rsa':
|
|
515
|
+
fileType = {
|
|
408
516
|
ext: 'xpi',
|
|
409
517
|
mime: 'application/x-xpinstall',
|
|
410
518
|
};
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
if (zipHeader.filename.endsWith('.rels') || zipHeader.filename.endsWith('.xml')) {
|
|
414
|
-
const type = zipHeader.filename.split('/')[0];
|
|
415
|
-
switch (type) {
|
|
416
|
-
case '_rels':
|
|
417
|
-
break;
|
|
418
|
-
case 'word':
|
|
419
|
-
return {
|
|
420
|
-
ext: 'docx',
|
|
421
|
-
mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
422
|
-
};
|
|
423
|
-
case 'ppt':
|
|
424
|
-
return {
|
|
425
|
-
ext: 'pptx',
|
|
426
|
-
mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
427
|
-
};
|
|
428
|
-
case 'xl':
|
|
429
|
-
return {
|
|
430
|
-
ext: 'xlsx',
|
|
431
|
-
mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
432
|
-
};
|
|
433
|
-
case 'visio':
|
|
434
|
-
return {
|
|
435
|
-
ext: 'vsdx',
|
|
436
|
-
mime: 'application/vnd.visio',
|
|
437
|
-
};
|
|
438
|
-
default:
|
|
439
|
-
break;
|
|
440
|
-
}
|
|
441
|
-
}
|
|
442
|
-
|
|
443
|
-
if (zipHeader.filename.startsWith('xl/')) {
|
|
444
519
|
return {
|
|
445
|
-
|
|
446
|
-
|
|
520
|
+
stop: true,
|
|
521
|
+
};
|
|
522
|
+
case 'META-INF/MANIFEST.MF':
|
|
523
|
+
fileType = {
|
|
524
|
+
ext: 'jar',
|
|
525
|
+
mime: 'application/java-archive',
|
|
447
526
|
};
|
|
448
|
-
}
|
|
449
|
-
|
|
450
|
-
if (zipHeader.filename.startsWith('3D/') && zipHeader.filename.endsWith('.model')) {
|
|
451
527
|
return {
|
|
452
|
-
|
|
453
|
-
|
|
528
|
+
stop: true,
|
|
529
|
+
};
|
|
530
|
+
case 'mimetype':
|
|
531
|
+
return {
|
|
532
|
+
async handler(fileData) {
|
|
533
|
+
// Use TextDecoder to decode the UTF-8 encoded data
|
|
534
|
+
const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
|
|
535
|
+
fileType = getFileTypeFromMimeType(mimeType);
|
|
536
|
+
},
|
|
537
|
+
stop: true,
|
|
454
538
|
};
|
|
455
|
-
}
|
|
456
539
|
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
return {
|
|
485
|
-
ext: 'odp',
|
|
486
|
-
mime: 'application/vnd.oasis.opendocument.presentation',
|
|
487
|
-
};
|
|
488
|
-
default:
|
|
540
|
+
case '[Content_Types].xml':
|
|
541
|
+
return {
|
|
542
|
+
async handler(fileData) {
|
|
543
|
+
// Use TextDecoder to decode the UTF-8 encoded data
|
|
544
|
+
let xmlContent = new TextDecoder('utf-8').decode(fileData);
|
|
545
|
+
const endPos = xmlContent.indexOf('.main+xml"');
|
|
546
|
+
if (endPos === -1) {
|
|
547
|
+
const mimeType = 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml';
|
|
548
|
+
if (xmlContent.includes(`ContentType="${mimeType}"`)) {
|
|
549
|
+
fileType = getFileTypeFromMimeType(mimeType);
|
|
550
|
+
}
|
|
551
|
+
} else {
|
|
552
|
+
xmlContent = xmlContent.slice(0, Math.max(0, endPos));
|
|
553
|
+
const firstPos = xmlContent.lastIndexOf('"');
|
|
554
|
+
const mimeType = xmlContent.slice(Math.max(0, firstPos + 1));
|
|
555
|
+
fileType = getFileTypeFromMimeType(mimeType);
|
|
556
|
+
}
|
|
557
|
+
},
|
|
558
|
+
stop: true,
|
|
559
|
+
};
|
|
560
|
+
default:
|
|
561
|
+
if (/classes\d*\.dex/.test(zipHeader.filename)) {
|
|
562
|
+
fileType = {
|
|
563
|
+
ext: 'apk',
|
|
564
|
+
mime: 'application/vnd.android.package-archive',
|
|
565
|
+
};
|
|
566
|
+
return {stop: true};
|
|
489
567
|
}
|
|
490
|
-
}
|
|
491
|
-
|
|
492
|
-
// Try to find next header manually when current one is corrupted
|
|
493
|
-
if (zipHeader.compressedSize === 0) {
|
|
494
|
-
let nextHeaderIndex = -1;
|
|
495
|
-
|
|
496
|
-
while (nextHeaderIndex < 0 && (tokenizer.position < tokenizer.fileInfo.size)) {
|
|
497
|
-
await tokenizer.peekBuffer(this.buffer, {mayBeLess: true});
|
|
498
568
|
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
// Move position to the next header if found, skip the whole buffer otherwise
|
|
502
|
-
await tokenizer.ignore(nextHeaderIndex >= 0 ? nextHeaderIndex : this.buffer.length);
|
|
503
|
-
}
|
|
504
|
-
} else {
|
|
505
|
-
await tokenizer.ignore(zipHeader.compressedSize);
|
|
506
|
-
}
|
|
507
|
-
}
|
|
508
|
-
} catch (error) {
|
|
509
|
-
if (!(error instanceof strtok3.EndOfStreamError)) {
|
|
510
|
-
throw error;
|
|
569
|
+
return {};
|
|
511
570
|
}
|
|
512
|
-
}
|
|
571
|
+
});
|
|
513
572
|
|
|
514
|
-
return {
|
|
573
|
+
return fileType ?? {
|
|
515
574
|
ext: 'zip',
|
|
516
575
|
mime: 'application/zip',
|
|
517
576
|
};
|
|
@@ -527,7 +586,7 @@ export class FileTypeParser {
|
|
|
527
586
|
if (_check(type, [0x4F, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64])) {
|
|
528
587
|
return {
|
|
529
588
|
ext: 'opus',
|
|
530
|
-
mime: 'audio/opus',
|
|
589
|
+
mime: 'audio/ogg; codecs=opus',
|
|
531
590
|
};
|
|
532
591
|
}
|
|
533
592
|
|
|
@@ -589,68 +648,6 @@ export class FileTypeParser {
|
|
|
589
648
|
};
|
|
590
649
|
}
|
|
591
650
|
|
|
592
|
-
//
|
|
593
|
-
|
|
594
|
-
// File Type Box (https://en.wikipedia.org/wiki/ISO_base_media_file_format)
|
|
595
|
-
// It's not required to be first, but it's recommended to be. Almost all ISO base media files start with `ftyp` box.
|
|
596
|
-
// `ftyp` box must contain a brand major identifier, which must consist of ISO 8859-1 printable characters.
|
|
597
|
-
// Here we check for 8859-1 printable characters (for simplicity, it's a mask which also catches one non-printable character).
|
|
598
|
-
if (
|
|
599
|
-
this.checkString('ftyp', {offset: 4})
|
|
600
|
-
&& (this.buffer[8] & 0x60) !== 0x00 // Brand major, first character ASCII?
|
|
601
|
-
) {
|
|
602
|
-
// They all can have MIME `video/mp4` except `application/mp4` special-case which is hard to detect.
|
|
603
|
-
// For some cases, we're specific, everything else falls to `video/mp4` with `mp4` extension.
|
|
604
|
-
const brandMajor = new Token.StringType(4, 'latin1').get(this.buffer, 8).replace('\0', ' ').trim();
|
|
605
|
-
switch (brandMajor) {
|
|
606
|
-
case 'avif':
|
|
607
|
-
case 'avis':
|
|
608
|
-
return {ext: 'avif', mime: 'image/avif'};
|
|
609
|
-
case 'mif1':
|
|
610
|
-
return {ext: 'heic', mime: 'image/heif'};
|
|
611
|
-
case 'msf1':
|
|
612
|
-
return {ext: 'heic', mime: 'image/heif-sequence'};
|
|
613
|
-
case 'heic':
|
|
614
|
-
case 'heix':
|
|
615
|
-
return {ext: 'heic', mime: 'image/heic'};
|
|
616
|
-
case 'hevc':
|
|
617
|
-
case 'hevx':
|
|
618
|
-
return {ext: 'heic', mime: 'image/heic-sequence'};
|
|
619
|
-
case 'qt':
|
|
620
|
-
return {ext: 'mov', mime: 'video/quicktime'};
|
|
621
|
-
case 'M4V':
|
|
622
|
-
case 'M4VH':
|
|
623
|
-
case 'M4VP':
|
|
624
|
-
return {ext: 'm4v', mime: 'video/x-m4v'};
|
|
625
|
-
case 'M4P':
|
|
626
|
-
return {ext: 'm4p', mime: 'video/mp4'};
|
|
627
|
-
case 'M4B':
|
|
628
|
-
return {ext: 'm4b', mime: 'audio/mp4'};
|
|
629
|
-
case 'M4A':
|
|
630
|
-
return {ext: 'm4a', mime: 'audio/x-m4a'};
|
|
631
|
-
case 'F4V':
|
|
632
|
-
return {ext: 'f4v', mime: 'video/mp4'};
|
|
633
|
-
case 'F4P':
|
|
634
|
-
return {ext: 'f4p', mime: 'video/mp4'};
|
|
635
|
-
case 'F4A':
|
|
636
|
-
return {ext: 'f4a', mime: 'audio/mp4'};
|
|
637
|
-
case 'F4B':
|
|
638
|
-
return {ext: 'f4b', mime: 'audio/mp4'};
|
|
639
|
-
case 'crx':
|
|
640
|
-
return {ext: 'cr3', mime: 'image/x-canon-cr3'};
|
|
641
|
-
default:
|
|
642
|
-
if (brandMajor.startsWith('3g')) {
|
|
643
|
-
if (brandMajor.startsWith('3g2')) {
|
|
644
|
-
return {ext: '3g2', mime: 'video/3gpp2'};
|
|
645
|
-
}
|
|
646
|
-
|
|
647
|
-
return {ext: '3gp', mime: 'video/3gpp'};
|
|
648
|
-
}
|
|
649
|
-
|
|
650
|
-
return {ext: 'mp4', mime: 'video/mp4'};
|
|
651
|
-
}
|
|
652
|
-
}
|
|
653
|
-
|
|
654
651
|
if (this.checkString('MThd')) {
|
|
655
652
|
return {
|
|
656
653
|
ext: 'mid',
|
|
@@ -832,9 +829,9 @@ export class FileTypeParser {
|
|
|
832
829
|
}
|
|
833
830
|
|
|
834
831
|
const re = await readElement();
|
|
835
|
-
const
|
|
832
|
+
const documentType = await readChildren(re.len);
|
|
836
833
|
|
|
837
|
-
switch (
|
|
834
|
+
switch (documentType) {
|
|
838
835
|
case 'webm':
|
|
839
836
|
return {
|
|
840
837
|
ext: 'webm',
|
|
@@ -957,6 +954,13 @@ export class FileTypeParser {
|
|
|
957
954
|
};
|
|
958
955
|
}
|
|
959
956
|
|
|
957
|
+
if (this.check([0x04, 0x22, 0x4D, 0x18])) {
|
|
958
|
+
return {
|
|
959
|
+
ext: 'lz4',
|
|
960
|
+
mime: 'application/x-lz4', // Invented by us
|
|
961
|
+
};
|
|
962
|
+
}
|
|
963
|
+
|
|
960
964
|
// -- 5-byte signatures --
|
|
961
965
|
|
|
962
966
|
if (this.check([0x4F, 0x54, 0x54, 0x4F, 0x00])) {
|
|
@@ -1047,6 +1051,13 @@ export class FileTypeParser {
|
|
|
1047
1051
|
};
|
|
1048
1052
|
}
|
|
1049
1053
|
|
|
1054
|
+
if (this.checkString('DRACO')) {
|
|
1055
|
+
return {
|
|
1056
|
+
ext: 'drc',
|
|
1057
|
+
mime: 'application/vnd.google.draco', // Invented by us
|
|
1058
|
+
};
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1050
1061
|
// -- 6-byte signatures --
|
|
1051
1062
|
|
|
1052
1063
|
if (this.check([0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00])) {
|
|
@@ -1129,16 +1140,6 @@ export class FileTypeParser {
|
|
|
1129
1140
|
};
|
|
1130
1141
|
}
|
|
1131
1142
|
|
|
1132
|
-
if (this.checkString('**ACE', {offset: 7})) {
|
|
1133
|
-
await tokenizer.peekBuffer(this.buffer, {length: 14, mayBeLess: true});
|
|
1134
|
-
if (this.checkString('**', {offset: 12})) {
|
|
1135
|
-
return {
|
|
1136
|
-
ext: 'ace',
|
|
1137
|
-
mime: 'application/x-ace-compressed',
|
|
1138
|
-
};
|
|
1139
|
-
}
|
|
1140
|
-
}
|
|
1141
|
-
|
|
1142
1143
|
if (
|
|
1143
1144
|
this.checkString('WEBVTT')
|
|
1144
1145
|
&& (
|
|
@@ -1242,6 +1243,66 @@ export class FileTypeParser {
|
|
|
1242
1243
|
};
|
|
1243
1244
|
}
|
|
1244
1245
|
|
|
1246
|
+
// File Type Box (https://en.wikipedia.org/wiki/ISO_base_media_file_format)
|
|
1247
|
+
// It's not required to be first, but it's recommended to be. Almost all ISO base media files start with `ftyp` box.
|
|
1248
|
+
// `ftyp` box must contain a brand major identifier, which must consist of ISO 8859-1 printable characters.
|
|
1249
|
+
// Here we check for 8859-1 printable characters (for simplicity, it's a mask which also catches one non-printable character).
|
|
1250
|
+
if (
|
|
1251
|
+
this.checkString('ftyp', {offset: 4})
|
|
1252
|
+
&& (this.buffer[8] & 0x60) !== 0x00 // Brand major, first character ASCII?
|
|
1253
|
+
) {
|
|
1254
|
+
// They all can have MIME `video/mp4` except `application/mp4` special-case which is hard to detect.
|
|
1255
|
+
// For some cases, we're specific, everything else falls to `video/mp4` with `mp4` extension.
|
|
1256
|
+
const brandMajor = new Token.StringType(4, 'latin1').get(this.buffer, 8).replace('\0', ' ').trim();
|
|
1257
|
+
switch (brandMajor) {
|
|
1258
|
+
case 'avif':
|
|
1259
|
+
case 'avis':
|
|
1260
|
+
return {ext: 'avif', mime: 'image/avif'};
|
|
1261
|
+
case 'mif1':
|
|
1262
|
+
return {ext: 'heic', mime: 'image/heif'};
|
|
1263
|
+
case 'msf1':
|
|
1264
|
+
return {ext: 'heic', mime: 'image/heif-sequence'};
|
|
1265
|
+
case 'heic':
|
|
1266
|
+
case 'heix':
|
|
1267
|
+
return {ext: 'heic', mime: 'image/heic'};
|
|
1268
|
+
case 'hevc':
|
|
1269
|
+
case 'hevx':
|
|
1270
|
+
return {ext: 'heic', mime: 'image/heic-sequence'};
|
|
1271
|
+
case 'qt':
|
|
1272
|
+
return {ext: 'mov', mime: 'video/quicktime'};
|
|
1273
|
+
case 'M4V':
|
|
1274
|
+
case 'M4VH':
|
|
1275
|
+
case 'M4VP':
|
|
1276
|
+
return {ext: 'm4v', mime: 'video/x-m4v'};
|
|
1277
|
+
case 'M4P':
|
|
1278
|
+
return {ext: 'm4p', mime: 'video/mp4'};
|
|
1279
|
+
case 'M4B':
|
|
1280
|
+
return {ext: 'm4b', mime: 'audio/mp4'};
|
|
1281
|
+
case 'M4A':
|
|
1282
|
+
return {ext: 'm4a', mime: 'audio/x-m4a'};
|
|
1283
|
+
case 'F4V':
|
|
1284
|
+
return {ext: 'f4v', mime: 'video/mp4'};
|
|
1285
|
+
case 'F4P':
|
|
1286
|
+
return {ext: 'f4p', mime: 'video/mp4'};
|
|
1287
|
+
case 'F4A':
|
|
1288
|
+
return {ext: 'f4a', mime: 'audio/mp4'};
|
|
1289
|
+
case 'F4B':
|
|
1290
|
+
return {ext: 'f4b', mime: 'audio/mp4'};
|
|
1291
|
+
case 'crx':
|
|
1292
|
+
return {ext: 'cr3', mime: 'image/x-canon-cr3'};
|
|
1293
|
+
default:
|
|
1294
|
+
if (brandMajor.startsWith('3g')) {
|
|
1295
|
+
if (brandMajor.startsWith('3g2')) {
|
|
1296
|
+
return {ext: '3g2', mime: 'video/3gpp2'};
|
|
1297
|
+
}
|
|
1298
|
+
|
|
1299
|
+
return {ext: '3gp', mime: 'video/3gpp'};
|
|
1300
|
+
}
|
|
1301
|
+
|
|
1302
|
+
return {ext: 'mp4', mime: 'video/mp4'};
|
|
1303
|
+
}
|
|
1304
|
+
}
|
|
1305
|
+
|
|
1245
1306
|
// -- 12-byte signatures --
|
|
1246
1307
|
|
|
1247
1308
|
if (this.check([0x49, 0x49, 0x55, 0x00, 0x18, 0x00, 0x00, 0x00, 0x88, 0xE7, 0x74, 0xD8])) {
|
|
@@ -1381,39 +1442,6 @@ export class FileTypeParser {
|
|
|
1381
1442
|
return undefined; // Some unknown text based format
|
|
1382
1443
|
}
|
|
1383
1444
|
|
|
1384
|
-
// -- Unsafe signatures --
|
|
1385
|
-
|
|
1386
|
-
if (
|
|
1387
|
-
this.check([0x0, 0x0, 0x1, 0xBA])
|
|
1388
|
-
|| this.check([0x0, 0x0, 0x1, 0xB3])
|
|
1389
|
-
) {
|
|
1390
|
-
return {
|
|
1391
|
-
ext: 'mpg',
|
|
1392
|
-
mime: 'video/mpeg',
|
|
1393
|
-
};
|
|
1394
|
-
}
|
|
1395
|
-
|
|
1396
|
-
if (this.check([0x00, 0x01, 0x00, 0x00, 0x00])) {
|
|
1397
|
-
return {
|
|
1398
|
-
ext: 'ttf',
|
|
1399
|
-
mime: 'font/ttf',
|
|
1400
|
-
};
|
|
1401
|
-
}
|
|
1402
|
-
|
|
1403
|
-
if (this.check([0x00, 0x00, 0x01, 0x00])) {
|
|
1404
|
-
return {
|
|
1405
|
-
ext: 'ico',
|
|
1406
|
-
mime: 'image/x-icon',
|
|
1407
|
-
};
|
|
1408
|
-
}
|
|
1409
|
-
|
|
1410
|
-
if (this.check([0x00, 0x00, 0x02, 0x00])) {
|
|
1411
|
-
return {
|
|
1412
|
-
ext: 'cur',
|
|
1413
|
-
mime: 'image/x-icon',
|
|
1414
|
-
};
|
|
1415
|
-
}
|
|
1416
|
-
|
|
1417
1445
|
if (this.check([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1])) {
|
|
1418
1446
|
// Detected Microsoft Compound File Binary File (MS-CFB) Format.
|
|
1419
1447
|
return {
|
|
@@ -1432,6 +1460,14 @@ export class FileTypeParser {
|
|
|
1432
1460
|
};
|
|
1433
1461
|
}
|
|
1434
1462
|
|
|
1463
|
+
// ACE: requires 14 bytes in the buffer
|
|
1464
|
+
if (this.checkString('**ACE', {offset: 7}) && this.checkString('**', {offset: 12})) {
|
|
1465
|
+
return {
|
|
1466
|
+
ext: 'ace',
|
|
1467
|
+
mime: 'application/x-ace-compressed',
|
|
1468
|
+
};
|
|
1469
|
+
}
|
|
1470
|
+
|
|
1435
1471
|
// -- 15-byte signatures --
|
|
1436
1472
|
|
|
1437
1473
|
if (this.checkString('BEGIN:')) {
|
|
@@ -1611,6 +1647,44 @@ export class FileTypeParser {
|
|
|
1611
1647
|
mime: 'application/pgp-encrypted',
|
|
1612
1648
|
};
|
|
1613
1649
|
}
|
|
1650
|
+
};
|
|
1651
|
+
// Detections with limited supporting data, resulting in a higher likelihood of false positives
|
|
1652
|
+
detectImprecise = async tokenizer => {
|
|
1653
|
+
this.buffer = new Uint8Array(reasonableDetectionSizeInBytes);
|
|
1654
|
+
|
|
1655
|
+
// Read initial sample size of 8 bytes
|
|
1656
|
+
await tokenizer.peekBuffer(this.buffer, {length: Math.min(8, tokenizer.fileInfo.size), mayBeLess: true});
|
|
1657
|
+
|
|
1658
|
+
if (
|
|
1659
|
+
this.check([0x0, 0x0, 0x1, 0xBA])
|
|
1660
|
+
|| this.check([0x0, 0x0, 0x1, 0xB3])
|
|
1661
|
+
) {
|
|
1662
|
+
return {
|
|
1663
|
+
ext: 'mpg',
|
|
1664
|
+
mime: 'video/mpeg',
|
|
1665
|
+
};
|
|
1666
|
+
}
|
|
1667
|
+
|
|
1668
|
+
if (this.check([0x00, 0x01, 0x00, 0x00, 0x00])) {
|
|
1669
|
+
return {
|
|
1670
|
+
ext: 'ttf',
|
|
1671
|
+
mime: 'font/ttf',
|
|
1672
|
+
};
|
|
1673
|
+
}
|
|
1674
|
+
|
|
1675
|
+
if (this.check([0x00, 0x00, 0x01, 0x00])) {
|
|
1676
|
+
return {
|
|
1677
|
+
ext: 'ico',
|
|
1678
|
+
mime: 'image/x-icon',
|
|
1679
|
+
};
|
|
1680
|
+
}
|
|
1681
|
+
|
|
1682
|
+
if (this.check([0x00, 0x00, 0x02, 0x00])) {
|
|
1683
|
+
return {
|
|
1684
|
+
ext: 'cur',
|
|
1685
|
+
mime: 'image/x-icon',
|
|
1686
|
+
};
|
|
1687
|
+
}
|
|
1614
1688
|
|
|
1615
1689
|
// Check MPEG 1 or 2 Layer 3 header, or 'layer 0' for ADTS (MPEG sync-word 0xFFE)
|
|
1616
1690
|
if (this.buffer.length >= 2 && this.check([0xFF, 0xE0], {offset: 0, mask: [0xFF, 0xE0]})) {
|
|
@@ -1655,7 +1729,7 @@ export class FileTypeParser {
|
|
|
1655
1729
|
};
|
|
1656
1730
|
}
|
|
1657
1731
|
}
|
|
1658
|
-
}
|
|
1732
|
+
};
|
|
1659
1733
|
|
|
1660
1734
|
async readTiffTag(bigEndian) {
|
|
1661
1735
|
const tagId = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
|
|
@@ -1699,11 +1773,18 @@ export class FileTypeParser {
|
|
|
1699
1773
|
};
|
|
1700
1774
|
}
|
|
1701
1775
|
|
|
1702
|
-
if (ifdOffset >= 8
|
|
1703
|
-
|
|
1704
|
-
|
|
1705
|
-
|
|
1706
|
-
|
|
1776
|
+
if (ifdOffset >= 8) {
|
|
1777
|
+
const someId1 = (bigEndian ? Token.UINT16_BE : Token.UINT16_LE).get(this.buffer, 8);
|
|
1778
|
+
const someId2 = (bigEndian ? Token.UINT16_BE : Token.UINT16_LE).get(this.buffer, 10);
|
|
1779
|
+
|
|
1780
|
+
if (
|
|
1781
|
+
(someId1 === 0x1C && someId2 === 0xFE)
|
|
1782
|
+
|| (someId1 === 0x1F && someId2 === 0x0B)) {
|
|
1783
|
+
return {
|
|
1784
|
+
ext: 'nef',
|
|
1785
|
+
mime: 'image/x-nikon-nef',
|
|
1786
|
+
};
|
|
1787
|
+
}
|
|
1707
1788
|
}
|
|
1708
1789
|
}
|
|
1709
1790
|
|