file-type 20.5.0 → 21.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/core.d.ts CHANGED
@@ -24,16 +24,17 @@ export type FileTypeResult = {
24
24
  };
25
25
 
26
26
  /**
27
- Detect the file type of a `Uint8Array`, or `ArrayBuffer`.
27
+ Detect the file type of a `Uint8Array` or `ArrayBuffer`.
28
28
 
29
29
  The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.
30
30
 
31
31
  If file access is available, it is recommended to use `.fromFile()` instead.
32
32
 
33
33
  @param buffer - An Uint8Array or ArrayBuffer representing file data. It works best if the buffer contains the entire file. It may work with a smaller portion as well.
34
+ @param options - Options to override default behavior.
34
35
  @returns The detected file type, or `undefined` when there is no match.
35
36
  */
36
- export function fileTypeFromBuffer(buffer: Uint8Array | ArrayBuffer): Promise<FileTypeResult | undefined>;
37
+ export function fileTypeFromBuffer(buffer: Uint8Array | ArrayBuffer, options?: FileTypeOptions): Promise<FileTypeResult | undefined>;
37
38
 
38
39
  /**
39
40
  Detect the file type of a [web `ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream).
@@ -41,9 +42,10 @@ Detect the file type of a [web `ReadableStream`](https://developer.mozilla.org/e
41
42
  The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.
42
43
 
43
44
  @param stream - A [web `ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream) streaming a file to examine.
45
+ @param options - Options to override default behavior.
44
46
  @returns A `Promise` for an object with the detected file type, or `undefined` when there is no match.
45
47
  */
46
- export function fileTypeFromStream(stream: AnyWebByteStream): Promise<FileTypeResult | undefined>;
48
+ export function fileTypeFromStream(stream: AnyWebByteStream, options?: FileTypeOptions): Promise<FileTypeResult | undefined>;
47
49
 
48
50
  /**
49
51
  Detect the file type from an [`ITokenizer`](https://github.com/Borewit/strtok3#tokenizer) source.
@@ -53,6 +55,7 @@ This method is used internally, but can also be used for a special "tokenizer" r
53
55
  A tokenizer propagates the internal read functions, allowing alternative transport mechanisms, to access files, to be implemented and used.
54
56
 
55
57
  @param tokenizer - File source implementing the tokenizer interface.
58
+ @param options - Options to override default behavior.
56
59
  @returns The detected file type, or `undefined` when there is no match.
57
60
 
58
61
  An example is [`@tokenizer/http`](https://github.com/Borewit/tokenizer-http), which requests data using [HTTP-range-requests](https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests). A difference with a conventional stream and the [*tokenizer*](https://github.com/Borewit/strtok3#tokenizer), is that it is able to *ignore* (seek, fast-forward) in the stream. For example, you may only need and read the first 6 bytes, and the last 128 bytes, which may be an advantage in case reading the entire file would take longer.
@@ -71,7 +74,7 @@ console.log(fileType);
71
74
  //=> {ext: 'mp3', mime: 'audio/mpeg'}
72
75
  ```
73
76
  */
74
- export function fileTypeFromTokenizer(tokenizer: ITokenizer): Promise<FileTypeResult | undefined>;
77
+ export function fileTypeFromTokenizer(tokenizer: ITokenizer, options?: FileTypeOptions): Promise<FileTypeResult | undefined>;
75
78
 
76
79
  /**
77
80
  Supported file extensions.
@@ -96,6 +99,7 @@ export type StreamOptions = {
96
99
  Detect the file type of a [`Blob`](https://nodejs.org/api/buffer.html#class-blob) or [`File`](https://developer.mozilla.org/en-US/docs/Web/API/File).
97
100
 
98
101
  @param blob - The [`Blob`](https://nodejs.org/api/buffer.html#class-blob) used for file detection.
102
+ @param options - Options to override default behavior.
99
103
  @returns The detected file type, or `undefined` when there is no match.
100
104
 
101
105
  @example
@@ -111,7 +115,7 @@ console.log(await fileTypeFromBlob(blob));
111
115
  //=> {ext: 'txt', mime: 'text/plain'}
112
116
  ```
113
117
  */
114
- export declare function fileTypeFromBlob(blob: Blob): Promise<FileTypeResult | undefined>;
118
+ export declare function fileTypeFromBlob(blob: Blob, options?: FileTypeOptions): Promise<FileTypeResult | undefined>;
115
119
 
116
120
  /**
117
121
  A custom file type detector.
@@ -184,6 +188,17 @@ export type Detector = {
184
188
 
185
189
  export type FileTypeOptions = {
186
190
  customDetectors?: Iterable<Detector>;
191
+
192
+ /**
193
+ Specifies the byte tolerance for locating the first MPEG audio frame (e.g. `.mp1`, `.mp2`, `.mp3`, `.aac`).
194
+
195
+ Allows detection to handle slight sync offsets between the expected and actual frame start. Common in malformed or incorrectly muxed files, which, while technically invalid, do occur in the wild.
196
+
197
+ A tolerance of 10 bytes covers most cases.
198
+
199
+ @default 0
200
+ */
201
+ mpegOffsetTolerance?: number;
187
202
  };
188
203
 
189
204
  export declare class TokenizerPositionError extends Error {
package/core.js CHANGED
@@ -4,8 +4,8 @@ Primary entry point, Node.js specific entry point is index.js
4
4
 
5
5
  import * as Token from 'token-types';
6
6
  import * as strtok3 from 'strtok3/core';
7
- import {ZipHandler} from '@tokenizer/inflate';
8
- import {includes, getUintBE} from 'uint8array-extras';
7
+ import {ZipHandler, GzipHandler} from '@tokenizer/inflate';
8
+ import {getUintBE} from 'uint8array-extras';
9
9
  import {
10
10
  stringToBytes,
11
11
  tarHeaderChecksumMatches,
@@ -15,74 +15,75 @@ import {extensions, mimeTypes} from './supported.js';
15
15
 
16
16
  export const reasonableDetectionSizeInBytes = 4100; // A fair amount of file-types are detectable within this range.
17
17
 
18
- export async function fileTypeFromStream(stream) {
19
- return new FileTypeParser().fromStream(stream);
18
+ export async function fileTypeFromStream(stream, options) {
19
+ return new FileTypeParser(options).fromStream(stream);
20
20
  }
21
21
 
22
- export async function fileTypeFromBuffer(input) {
23
- return new FileTypeParser().fromBuffer(input);
22
+ export async function fileTypeFromBuffer(input, options) {
23
+ return new FileTypeParser(options).fromBuffer(input);
24
24
  }
25
25
 
26
- export async function fileTypeFromBlob(blob) {
27
- return new FileTypeParser().fromBlob(blob);
26
+ export async function fileTypeFromBlob(blob, options) {
27
+ return new FileTypeParser(options).fromBlob(blob);
28
28
  }
29
29
 
30
30
  function getFileTypeFromMimeType(mimeType) {
31
- switch (mimeType.toLowerCase()) {
31
+ mimeType = mimeType.toLowerCase();
32
+ switch (mimeType) {
32
33
  case 'application/epub+zip':
33
34
  return {
34
35
  ext: 'epub',
35
- mime: 'application/epub+zip',
36
+ mime: mimeType,
36
37
  };
37
38
  case 'application/vnd.oasis.opendocument.text':
38
39
  return {
39
40
  ext: 'odt',
40
- mime: 'application/vnd.oasis.opendocument.text',
41
+ mime: mimeType,
41
42
  };
42
43
  case 'application/vnd.oasis.opendocument.text-template':
43
44
  return {
44
45
  ext: 'ott',
45
- mime: 'application/vnd.oasis.opendocument.text-template',
46
+ mime: mimeType,
46
47
  };
47
48
  case 'application/vnd.oasis.opendocument.spreadsheet':
48
49
  return {
49
50
  ext: 'ods',
50
- mime: 'application/vnd.oasis.opendocument.spreadsheet',
51
+ mime: mimeType,
51
52
  };
52
53
  case 'application/vnd.oasis.opendocument.spreadsheet-template':
53
54
  return {
54
55
  ext: 'ots',
55
- mime: 'application/vnd.oasis.opendocument.spreadsheet-template',
56
+ mime: mimeType,
56
57
  };
57
58
  case 'application/vnd.oasis.opendocument.presentation':
58
59
  return {
59
60
  ext: 'odp',
60
- mime: 'application/vnd.oasis.opendocument.presentation',
61
+ mime: mimeType,
61
62
  };
62
63
  case 'application/vnd.oasis.opendocument.presentation-template':
63
64
  return {
64
65
  ext: 'otp',
65
- mime: 'application/vnd.oasis.opendocument.presentation-template',
66
+ mime: mimeType,
66
67
  };
67
68
  case 'application/vnd.oasis.opendocument.graphics':
68
69
  return {
69
70
  ext: 'odg',
70
- mime: 'application/vnd.oasis.opendocument.graphics',
71
+ mime: mimeType,
71
72
  };
72
73
  case 'application/vnd.oasis.opendocument.graphics-template':
73
74
  return {
74
75
  ext: 'otg',
75
- mime: 'application/vnd.oasis.opendocument.graphics-template',
76
+ mime: mimeType,
76
77
  };
77
78
  case 'application/vnd.openxmlformats-officedocument.presentationml.slideshow':
78
79
  return {
79
80
  ext: 'ppsx',
80
- mime: 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
81
+ mime: mimeType,
81
82
  };
82
83
  case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
83
84
  return {
84
85
  ext: 'xlsx',
85
- mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
86
+ mime: mimeType,
86
87
  };
87
88
  case 'application/vnd.ms-excel.sheet.macroenabled':
88
89
  return {
@@ -92,7 +93,7 @@ function getFileTypeFromMimeType(mimeType) {
92
93
  case 'application/vnd.openxmlformats-officedocument.spreadsheetml.template':
93
94
  return {
94
95
  ext: 'xltx',
95
- mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
96
+ mime: mimeType,
96
97
  };
97
98
  case 'application/vnd.ms-excel.template.macroenabled':
98
99
  return {
@@ -107,7 +108,7 @@ function getFileTypeFromMimeType(mimeType) {
107
108
  case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
108
109
  return {
109
110
  ext: 'docx',
110
- mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
111
+ mime: mimeType,
111
112
  };
112
113
  case 'application/vnd.ms-word.document.macroenabled':
113
114
  return {
@@ -117,7 +118,7 @@ function getFileTypeFromMimeType(mimeType) {
117
118
  case 'application/vnd.openxmlformats-officedocument.wordprocessingml.template':
118
119
  return {
119
120
  ext: 'dotx',
120
- mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
121
+ mime: mimeType,
121
122
  };
122
123
  case 'application/vnd.ms-word.template.macroenabledtemplate':
123
124
  return {
@@ -127,7 +128,7 @@ function getFileTypeFromMimeType(mimeType) {
127
128
  case 'application/vnd.openxmlformats-officedocument.presentationml.template':
128
129
  return {
129
130
  ext: 'potx',
130
- mime: 'application/vnd.openxmlformats-officedocument.presentationml.template',
131
+ mime: mimeType,
131
132
  };
132
133
  case 'application/vnd.ms-powerpoint.template.macroenabled':
133
134
  return {
@@ -137,7 +138,7 @@ function getFileTypeFromMimeType(mimeType) {
137
138
  case 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
138
139
  return {
139
140
  ext: 'pptx',
140
- mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
141
+ mime: mimeType,
141
142
  };
142
143
  case 'application/vnd.ms-powerpoint.presentation.macroenabled':
143
144
  return {
@@ -179,8 +180,8 @@ function _check(buffer, headers, options) {
179
180
  return true;
180
181
  }
181
182
 
182
- export async function fileTypeFromTokenizer(tokenizer) {
183
- return new FileTypeParser().fromTokenizer(tokenizer);
183
+ export async function fileTypeFromTokenizer(tokenizer, options) {
184
+ return new FileTypeParser(options).fromTokenizer(tokenizer);
184
185
  }
185
186
 
186
187
  export async function fileTypeStream(webStream, options) {
@@ -189,6 +190,11 @@ export async function fileTypeStream(webStream, options) {
189
190
 
190
191
  export class FileTypeParser {
191
192
  constructor(options) {
193
+ this.options = {
194
+ mpegOffsetTolerance: 0,
195
+ ...options,
196
+ };
197
+
192
198
  this.detectors = [...(options?.customDetectors ?? []),
193
199
  {id: 'core', detect: this.detectConfident},
194
200
  {id: 'core.imprecise', detect: this.detectImprecise}];
@@ -228,11 +234,16 @@ export class FileTypeParser {
228
234
  }
229
235
 
230
236
  async fromBlob(blob) {
231
- return this.fromStream(blob.stream());
237
+ const tokenizer = strtok3.fromBlob(blob, this.tokenizerOptions);
238
+ try {
239
+ return await this.fromTokenizer(tokenizer);
240
+ } finally {
241
+ await tokenizer.close();
242
+ }
232
243
  }
233
244
 
234
245
  async fromStream(stream) {
235
- const tokenizer = await strtok3.fromWebStream(stream, this.tokenizerOptions);
246
+ const tokenizer = strtok3.fromWebStream(stream, this.tokenizerOptions);
236
247
  try {
237
248
  return await this.fromTokenizer(tokenizer);
238
249
  } finally {
@@ -253,7 +264,7 @@ export class FileTypeParser {
253
264
  if (!done && chunk) {
254
265
  try {
255
266
  // Attempt to detect the file type from the chunk
256
- detectedFileType = await this.fromBuffer(chunk.slice(0, sampleSize));
267
+ detectedFileType = await this.fromBuffer(chunk.subarray(0, sampleSize));
257
268
  } catch (error) {
258
269
  if (!(error instanceof strtok3.EndOfStreamError)) {
259
270
  throw error; // Re-throw non-EndOfStreamError
@@ -290,7 +301,7 @@ export class FileTypeParser {
290
301
  }
291
302
 
292
303
  checkString(header, options) {
293
- return this.check(stringToBytes(header), options);
304
+ return this.check(stringToBytes(header, options?.encoding), options);
294
305
  }
295
306
 
296
307
  // Detections with a high degree of certainty in identifying the correct file type
@@ -304,7 +315,7 @@ export class FileTypeParser {
304
315
 
305
316
  this.tokenizer = tokenizer;
306
317
 
307
- await tokenizer.peekBuffer(this.buffer, {length: 12, mayBeLess: true});
318
+ await tokenizer.peekBuffer(this.buffer, {length: 32, mayBeLess: true});
308
319
 
309
320
  // -- 2-byte signatures --
310
321
 
@@ -402,6 +413,21 @@ export class FileTypeParser {
402
413
  }
403
414
 
404
415
  if (this.check([0x1F, 0x8B, 0x8])) {
416
+ const gzipHandler = new GzipHandler(tokenizer);
417
+
418
+ const stream = gzipHandler.inflate();
419
+ try {
420
+ const compressedFileType = await this.fromStream(stream);
421
+ if (compressedFileType && compressedFileType.ext === 'tar') {
422
+ return {
423
+ ext: 'tar.gz',
424
+ mime: 'application/gzip',
425
+ };
426
+ }
427
+ } finally {
428
+ await stream.cancel();
429
+ }
430
+
405
431
  return {
406
432
  ext: 'gz',
407
433
  mime: 'application/gzip',
@@ -571,6 +597,10 @@ export class FileTypeParser {
571
597
 
572
598
  return {};
573
599
  }
600
+ }).catch(error => {
601
+ if (!(error instanceof strtok3.EndOfStreamError)) {
602
+ throw error; // Re-throw non-EndOfStreamError
603
+ }
574
604
  });
575
605
 
576
606
  return fileType ?? {
@@ -709,7 +739,7 @@ export class FileTypeParser {
709
739
  if (this.checkString('fLaC')) {
710
740
  return {
711
741
  ext: 'flac',
712
- mime: 'audio/x-flac',
742
+ mime: 'audio/flac',
713
743
  };
714
744
  }
715
745
 
@@ -728,28 +758,6 @@ export class FileTypeParser {
728
758
  }
729
759
 
730
760
  if (this.checkString('%PDF')) {
731
- try {
732
- const skipBytes = 1350;
733
- if (skipBytes === await tokenizer.ignore(skipBytes)) {
734
- const maxBufferSize = 10 * 1024 * 1024;
735
- const buffer = new Uint8Array(Math.min(maxBufferSize, tokenizer.fileInfo.size - skipBytes));
736
- await tokenizer.readBuffer(buffer, {mayBeLess: true});
737
-
738
- // Check if this is an Adobe Illustrator file
739
- if (includes(buffer, new TextEncoder().encode('AIPrivateData'))) {
740
- return {
741
- ext: 'ai',
742
- mime: 'application/postscript',
743
- };
744
- }
745
- }
746
- } catch (error) {
747
- // Swallow end of stream error if file is too small for the Adobe AI check
748
- if (!(error instanceof strtok3.EndOfStreamError)) {
749
- throw error;
750
- }
751
- }
752
-
753
761
  // Assume this is just a normal PDF
754
762
  return {
755
763
  ext: 'pdf',
@@ -846,7 +854,7 @@ export class FileTypeParser {
846
854
  case 'matroska':
847
855
  return {
848
856
  ext: 'mkv',
849
- mime: 'video/x-matroska',
857
+ mime: 'video/matroska',
850
858
  };
851
859
 
852
860
  default:
@@ -920,10 +928,10 @@ export class FileTypeParser {
920
928
  };
921
929
  }
922
930
 
923
- if (this.checkString('PAR1')) {
931
+ if (this.checkString('PAR1') || this.checkString('PARE')) {
924
932
  return {
925
933
  ext: 'parquet',
926
- mime: 'application/x-parquet',
934
+ mime: 'application/vnd.apache.parquet',
927
935
  };
928
936
  }
929
937
 
@@ -948,6 +956,13 @@ export class FileTypeParser {
948
956
  };
949
957
  }
950
958
 
959
+ if (this.checkString('regf')) {
960
+ return {
961
+ ext: 'dat',
962
+ mime: 'application/x-ft-windows-registry-hive',
963
+ };
964
+ }
965
+
951
966
  // -- 5-byte signatures --
952
967
 
953
968
  if (this.check([0x4F, 0x54, 0x54, 0x4F, 0x00])) {
@@ -1199,7 +1214,7 @@ export class FileTypeParser {
1199
1214
  if (this.check([0x41, 0x52, 0x52, 0x4F, 0x57, 0x31, 0x00, 0x00])) {
1200
1215
  return {
1201
1216
  ext: 'arrow',
1202
- mime: 'application/x-apache-arrow',
1217
+ mime: 'application/vnd.apache.arrow.file',
1203
1218
  };
1204
1219
  }
1205
1220
 
@@ -1299,6 +1314,15 @@ export class FileTypeParser {
1299
1314
  }
1300
1315
  }
1301
1316
 
1317
+ // -- 10-byte signatures --
1318
+
1319
+ if (this.checkString('REGEDIT4\r\n')) {
1320
+ return {
1321
+ ext: 'reg',
1322
+ mime: 'application/x-ms-regedit',
1323
+ };
1324
+ }
1325
+
1302
1326
  // -- 12-byte signatures --
1303
1327
 
1304
1328
  // RIFF file format which might be AVI, WAV, QCP, etc
@@ -1459,8 +1483,8 @@ export class FileTypeParser {
1459
1483
  };
1460
1484
  }
1461
1485
 
1462
- if (this.check([0xFE, 0xFF])) { // UTF-16-BOM-LE
1463
- if (this.check([0, 60, 0, 63, 0, 120, 0, 109, 0, 108], {offset: 2})) {
1486
+ if (this.check([0xFE, 0xFF])) { // UTF-16-BOM-BE
1487
+ if (this.checkString('<?xml ', {offset: 2, encoding: 'utf-16be'})) {
1464
1488
  return {
1465
1489
  ext: 'xml',
1466
1490
  mime: 'application/xml',
@@ -1478,7 +1502,7 @@ export class FileTypeParser {
1478
1502
  };
1479
1503
  }
1480
1504
 
1481
- // Increase sample size from 12 to 256.
1505
+ // Increase sample size from 32 to 256.
1482
1506
  await tokenizer.peekBuffer(this.buffer, {length: Math.min(256, tokenizer.fileInfo.size), mayBeLess: true});
1483
1507
 
1484
1508
  if (this.check([0x61, 0x63, 0x73, 0x70], {offset: 36})) {
@@ -1541,7 +1565,7 @@ export class FileTypeParser {
1541
1565
 
1542
1566
  if (jsonSize > 12 && this.buffer.length >= jsonSize + 16) {
1543
1567
  try {
1544
- const header = new TextDecoder().decode(this.buffer.slice(16, jsonSize + 16));
1568
+ const header = new TextDecoder().decode(this.buffer.subarray(16, jsonSize + 16));
1545
1569
  const json = JSON.parse(header);
1546
1570
  // Check if Pickle is ASAR
1547
1571
  if (json.files) { // Final check, assuring Pickle/ASAR format
@@ -1644,28 +1668,37 @@ export class FileTypeParser {
1644
1668
  await tokenizer.peekBuffer(this.buffer, {length: Math.min(512, tokenizer.fileInfo.size), mayBeLess: true});
1645
1669
 
1646
1670
  // Requires a buffer size of 512 bytes
1647
- if (tarHeaderChecksumMatches(this.buffer)) {
1671
+ if ((this.checkString('ustar', {offset: 257}) && (this.checkString('\0', {offset: 262}) || this.checkString(' ', {offset: 262})))
1672
+ || (this.check([0, 0, 0, 0, 0, 0], {offset: 257}) && tarHeaderChecksumMatches(this.buffer))) {
1648
1673
  return {
1649
1674
  ext: 'tar',
1650
1675
  mime: 'application/x-tar',
1651
1676
  };
1652
1677
  }
1653
1678
 
1654
- if (this.check([0xFF, 0xFE])) { // UTF-16-BOM-BE
1655
- if (this.check([60, 0, 63, 0, 120, 0, 109, 0, 108, 0], {offset: 2})) {
1679
+ if (this.check([0xFF, 0xFE])) { // UTF-16-BOM-LE
1680
+ const encoding = 'utf-16le';
1681
+ if (this.checkString('<?xml ', {offset: 2, encoding})) {
1656
1682
  return {
1657
1683
  ext: 'xml',
1658
1684
  mime: 'application/xml',
1659
1685
  };
1660
1686
  }
1661
1687
 
1662
- if (this.check([0xFF, 0x0E, 0x53, 0x00, 0x6B, 0x00, 0x65, 0x00, 0x74, 0x00, 0x63, 0x00, 0x68, 0x00, 0x55, 0x00, 0x70, 0x00, 0x20, 0x00, 0x4D, 0x00, 0x6F, 0x00, 0x64, 0x00, 0x65, 0x00, 0x6C, 0x00], {offset: 2})) {
1688
+ if (this.check([0xFF, 0x0E], {offset: 2}) && this.checkString('SketchUp Model', {offset: 4, encoding})) {
1663
1689
  return {
1664
1690
  ext: 'skp',
1665
1691
  mime: 'application/vnd.sketchup.skp',
1666
1692
  };
1667
1693
  }
1668
1694
 
1695
+ if (this.checkString('Windows Registry Editor Version 5.00\r\n', {offset: 2, encoding})) {
1696
+ return {
1697
+ ext: 'reg',
1698
+ mime: 'application/x-ms-regedit',
1699
+ };
1700
+ }
1701
+
1669
1702
  return undefined; // Some text based format
1670
1703
  }
1671
1704
 
@@ -1714,47 +1747,16 @@ export class FileTypeParser {
1714
1747
  };
1715
1748
  }
1716
1749
 
1750
+ // Adjust buffer to `mpegOffsetTolerance`
1751
+ await tokenizer.peekBuffer(this.buffer, {length: Math.min(2 + this.options.mpegOffsetTolerance, tokenizer.fileInfo.size), mayBeLess: true});
1752
+
1717
1753
  // Check MPEG 1 or 2 Layer 3 header, or 'layer 0' for ADTS (MPEG sync-word 0xFFE)
1718
- if (this.buffer.length >= 2 && this.check([0xFF, 0xE0], {offset: 0, mask: [0xFF, 0xE0]})) {
1719
- if (this.check([0x10], {offset: 1, mask: [0x16]})) {
1720
- // Check for (ADTS) MPEG-2
1721
- if (this.check([0x08], {offset: 1, mask: [0x08]})) {
1722
- return {
1723
- ext: 'aac',
1724
- mime: 'audio/aac',
1725
- };
1754
+ if (this.buffer.length >= (2 + this.options.mpegOffsetTolerance)) {
1755
+ for (let depth = 0; depth <= this.options.mpegOffsetTolerance; ++depth) {
1756
+ const type = this.scanMpeg(depth);
1757
+ if (type) {
1758
+ return type;
1726
1759
  }
1727
-
1728
- // Must be (ADTS) MPEG-4
1729
- return {
1730
- ext: 'aac',
1731
- mime: 'audio/aac',
1732
- };
1733
- }
1734
-
1735
- // MPEG 1 or 2 Layer 3 header
1736
- // Check for MPEG layer 3
1737
- if (this.check([0x02], {offset: 1, mask: [0x06]})) {
1738
- return {
1739
- ext: 'mp3',
1740
- mime: 'audio/mpeg',
1741
- };
1742
- }
1743
-
1744
- // Check for MPEG layer 2
1745
- if (this.check([0x04], {offset: 1, mask: [0x06]})) {
1746
- return {
1747
- ext: 'mp2',
1748
- mime: 'audio/mpeg',
1749
- };
1750
- }
1751
-
1752
- // Check for MPEG layer 1
1753
- if (this.check([0x06], {offset: 1, mask: [0x06]})) {
1754
- return {
1755
- ext: 'mp1',
1756
- mime: 'audio/mpeg',
1757
- };
1758
1760
  }
1759
1761
  }
1760
1762
  };
@@ -1831,6 +1833,57 @@ export class FileTypeParser {
1831
1833
  };
1832
1834
  }
1833
1835
  }
1836
+
1837
+ /**
1838
+ Scan check MPEG 1 or 2 Layer 3 header, or 'layer 0' for ADTS (MPEG sync-word 0xFFE).
1839
+
1840
+ @param offset - Offset to scan for sync-preamble.
1841
+ @returns {{ext: string, mime: string}}
1842
+ */
1843
+ scanMpeg(offset) {
1844
+ if (this.check([0xFF, 0xE0], {offset, mask: [0xFF, 0xE0]})) {
1845
+ if (this.check([0x10], {offset: offset + 1, mask: [0x16]})) {
1846
+ // Check for (ADTS) MPEG-2
1847
+ if (this.check([0x08], {offset: offset + 1, mask: [0x08]})) {
1848
+ return {
1849
+ ext: 'aac',
1850
+ mime: 'audio/aac',
1851
+ };
1852
+ }
1853
+
1854
+ // Must be (ADTS) MPEG-4
1855
+ return {
1856
+ ext: 'aac',
1857
+ mime: 'audio/aac',
1858
+ };
1859
+ }
1860
+
1861
+ // MPEG 1 or 2 Layer 3 header
1862
+ // Check for MPEG layer 3
1863
+ if (this.check([0x02], {offset: offset + 1, mask: [0x06]})) {
1864
+ return {
1865
+ ext: 'mp3',
1866
+ mime: 'audio/mpeg',
1867
+ };
1868
+ }
1869
+
1870
+ // Check for MPEG layer 2
1871
+ if (this.check([0x04], {offset: offset + 1, mask: [0x06]})) {
1872
+ return {
1873
+ ext: 'mp2',
1874
+ mime: 'audio/mpeg',
1875
+ };
1876
+ }
1877
+
1878
+ // Check for MPEG layer 1
1879
+ if (this.check([0x06], {offset: offset + 1, mask: [0x06]})) {
1880
+ return {
1881
+ ext: 'mp1',
1882
+ mime: 'audio/mpeg',
1883
+ };
1884
+ }
1885
+ }
1886
+ }
1834
1887
  }
1835
1888
 
1836
1889
  export const supportedExtensions = new Set(extensions);
package/index.d.ts CHANGED
@@ -8,8 +8,8 @@ import {
8
8
  type FileTypeResult,
9
9
  type StreamOptions,
10
10
  type AnyWebReadableStream,
11
- type Detector,
12
11
  type AnyWebReadableByteStreamWithFileType,
12
+ type FileTypeOptions,
13
13
  FileTypeParser as DefaultFileTypeParser,
14
14
  } from './core.js';
15
15
 
@@ -31,8 +31,8 @@ export declare class FileTypeParser extends DefaultFileTypeParser {
31
31
  /**
32
32
  Works the same way as {@link fileTypeStream}, additionally taking into account custom detectors (if any were provided to the constructor).
33
33
  */
34
- toDetectionStream(readableStream: NodeReadableStream, options?: StreamOptions): Promise<ReadableStreamWithFileType>;
35
- toDetectionStream(webStream: AnyWebReadableStream<Uint8Array>, options?: StreamOptions): Promise<AnyWebReadableByteStreamWithFileType>;
34
+ toDetectionStream(readableStream: NodeReadableStream, options?: FileTypeOptions & StreamOptions): Promise<ReadableStreamWithFileType>;
35
+ toDetectionStream(webStream: AnyWebReadableStream<Uint8Array>, options?: FileTypeOptions & StreamOptions): Promise<AnyWebReadableByteStreamWithFileType>;
36
36
  }
37
37
 
38
38
  /**
@@ -48,7 +48,7 @@ The file type is detected by checking the [magic number](https://en.wikipedia.or
48
48
 
49
49
  @returns The detected file type and MIME type or `undefined` when there is no match.
50
50
  */
51
- export function fileTypeFromFile(filePath: string, options?: {customDetectors?: Iterable<Detector>}): Promise<FileTypeResult | undefined>;
51
+ export function fileTypeFromFile(filePath: string, options?: FileTypeOptions): Promise<FileTypeResult | undefined>;
52
52
 
53
53
  /**
54
54
  Detect the file type of a [web `ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream).
@@ -60,9 +60,10 @@ Direct support for Node.js streams will be dropped in the future, when Node.js s
60
60
  The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.
61
61
 
62
62
  @param stream - A [web `ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream) or [Node.js `stream.Readable`](https://nodejs.org/api/stream.html#stream_class_stream_readable) streaming a file to examine.
63
- @returns A `Promise` for an object with the detected file type, or `undefined` when there is no match.
63
+ @param options - Options to override default behaviour.
64
+ @returns A `Promise` for an object with the detected file type, or `undefined` when there is no match.
64
65
  */
65
- export function fileTypeFromStream(stream: AnyWebReadableStream<Uint8Array> | NodeReadableStream): Promise<FileTypeResult | undefined>;
66
+ export function fileTypeFromStream(stream: AnyWebReadableStream<Uint8Array> | NodeReadableStream, options?: FileTypeOptions): Promise<FileTypeResult | undefined>;
66
67
 
67
68
  /**
68
69
  Returns a `Promise` which resolves to the original readable stream argument, but with an added `fileType` property, which is an object like the one returned from `fileTypeFromFile()`.
@@ -91,7 +92,7 @@ if (stream2.fileType?.mime === 'image/jpeg') {
91
92
  }
92
93
  ```
93
94
  */
94
- export function fileTypeStream(readableStream: NodeReadableStream, options?: StreamOptions): Promise<ReadableStreamWithFileType>;
95
- export function fileTypeStream(webStream: AnyWebByteStream, options?: StreamOptions): Promise<AnyWebReadableByteStreamWithFileType>;
95
+ export function fileTypeStream(readableStream: NodeReadableStream, options?: FileTypeOptions & StreamOptions): Promise<ReadableStreamWithFileType>;
96
+ export function fileTypeStream(webStream: AnyWebByteStream, options?: FileTypeOptions & StreamOptions): Promise<AnyWebReadableByteStreamWithFileType>;
96
97
 
97
98
  export * from './core.js';
package/index.js CHANGED
@@ -65,12 +65,12 @@ export class FileTypeParser extends DefaultFileTypeParser {
65
65
  }
66
66
  }
67
67
 
68
- export async function fileTypeFromFile(path, fileTypeOptions) {
69
- return (new FileTypeParser(fileTypeOptions)).fromFile(path, fileTypeOptions);
68
+ export async function fileTypeFromFile(path, options) {
69
+ return (new FileTypeParser(options)).fromFile(path, options);
70
70
  }
71
71
 
72
- export async function fileTypeFromStream(stream, fileTypeOptions) {
73
- return (new FileTypeParser(fileTypeOptions)).fromStream(stream);
72
+ export async function fileTypeFromStream(stream, options) {
73
+ return (new FileTypeParser(options)).fromStream(stream);
74
74
  }
75
75
 
76
76
  export async function fileTypeStream(readableStream, options = {}) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "file-type",
3
- "version": "20.5.0",
3
+ "version": "21.1.0",
4
4
  "description": "Detect the file type of a file, stream, or data",
5
5
  "license": "MIT",
6
6
  "repository": "sindresorhus/file-type",
@@ -35,7 +35,7 @@
35
35
  },
36
36
  "sideEffects": false,
37
37
  "engines": {
38
- "node": ">=18"
38
+ "node": ">=20"
39
39
  },
40
40
  "scripts": {
41
41
  "test": "xo && ava && tsd"
@@ -198,7 +198,6 @@
198
198
  "it",
199
199
  "s3m",
200
200
  "xm",
201
- "ai",
202
201
  "skp",
203
202
  "avif",
204
203
  "eps",
@@ -244,22 +243,25 @@
244
243
  "jar",
245
244
  "rm",
246
245
  "ppsm",
247
- "ppsx"
246
+ "ppsx",
247
+ "tar.gz",
248
+ "reg",
249
+ "dat"
248
250
  ],
249
251
  "dependencies": {
250
- "@tokenizer/inflate": "^0.2.6",
251
- "strtok3": "^10.2.0",
252
+ "@tokenizer/inflate": "^0.3.1",
253
+ "strtok3": "^10.3.1",
252
254
  "token-types": "^6.0.0",
253
255
  "uint8array-extras": "^1.4.0"
254
256
  },
255
257
  "devDependencies": {
256
258
  "@tokenizer/token": "^0.3.0",
257
- "@types/node": "^22.10.5",
258
- "ava": "^6.0.1",
259
+ "@types/node": "^22.15.21",
260
+ "ava": "^6.3.0",
259
261
  "commonmark": "^0.31.2",
260
262
  "get-stream": "^9.0.1",
261
263
  "noop-stream": "^1.0.0",
262
- "tsd": "^0.31.2",
264
+ "tsd": "^0.32.0",
263
265
  "xo": "^0.60.0"
264
266
  },
265
267
  "xo": {
package/readme.md CHANGED
@@ -109,7 +109,7 @@ console.log(fileType);
109
109
 
110
110
  ## API
111
111
 
112
- ### fileTypeFromBuffer(buffer)
112
+ ### fileTypeFromBuffer(buffer, options)
113
113
 
114
114
  Detect the file type of a `Uint8Array`, or `ArrayBuffer`.
115
115
 
@@ -130,13 +130,13 @@ Type: `Uint8Array | ArrayBuffer`
130
130
 
131
131
  A buffer representing file data. It works best if the buffer contains the entire file. It may work with a smaller portion as well.
132
132
 
133
- ### fileTypeFromFile(filePath)
133
+ ### fileTypeFromFile(filePath, options)
134
134
 
135
135
  Detect the file type of a file path.
136
136
 
137
137
  This is for Node.js only.
138
138
 
139
- To read from a [`File`](https://developer.mozilla.org/docs/Web/API/File), see [`fileTypeFromBlob()`](#filetypefromblobblob).
139
+ To read from a [`File`](https://developer.mozilla.org/docs/Web/API/File), see [`fileTypeFromBlob()`](#filetypefromblobblob-options).
140
140
 
141
141
  The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.
142
142
 
@@ -176,7 +176,7 @@ Type: [Web `ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/Re
176
176
 
177
177
  A readable stream representing file data.
178
178
 
179
- ### fileTypeFromBlob(blob)
179
+ ### fileTypeFromBlob(blob, options)
180
180
 
181
181
  Detect the file type of a [`Blob`](https://developer.mozilla.org/docs/Web/API/Blob),
182
182
 
@@ -225,7 +225,7 @@ async function readFromBlobWithoutStreaming(blob) {
225
225
 
226
226
  Type: [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob)
227
227
 
228
- ### fileTypeFromTokenizer(tokenizer)
228
+ ### fileTypeFromTokenizer(tokenizer, options)
229
229
 
230
230
  Detect the file type from an `ITokenizer` source.
231
231
 
@@ -304,6 +304,8 @@ Type: [`stream.Readable`](https://nodejs.org/api/stream.html#stream_class_stream
304
304
 
305
305
  Type: `object`
306
306
 
307
+ Supports the following options in addition to the [general options](#options):
308
+
307
309
  ##### sampleSize
308
310
 
309
311
  Type: `number`\
@@ -341,6 +343,32 @@ Returns a `Set<string>` of supported file extensions.
341
343
 
342
344
  Returns a `Set<string>` of supported MIME types.
343
345
 
346
+ ### Options
347
+
348
+ #### customDetectors
349
+
350
+ Array of custom file type detectors to run before default detectors.
351
+
352
+ For example:
353
+
354
+ ```js
355
+ import {fileTypeFromFile} from 'file-type';
356
+ import {detectXml} from '@file-type/xml';
357
+
358
+ const fileType = await fileTypeFromFile('sample.kml', {customDetectors: [detectXml]});
359
+ console.log(fileType);
360
+ ```
361
+
362
+ #### mpegOffsetTolerance
363
+
364
+ Default: `0`
365
+
366
+ Specifies the byte tolerance for locating the first MPEG audio frame (e.g. `.mp1`, `.mp2`, `.mp3`, `.aac`).
367
+
368
+ Allows detection to handle slight sync offsets between the expected and actual frame start. Common in malformed or incorrectly muxed files, which, while technically invalid, do occur in the wild.
369
+
370
+ A tolerance of 10 bytes covers most cases.
371
+
344
372
  ## Custom detectors
345
373
 
346
374
  Custom file type detectors are plugins designed to extend the default detection capabilities.
@@ -353,6 +381,8 @@ Detectors can be added via the constructor options or by directly modifying `Fil
353
381
 
354
382
  ### Example adding a detector
355
383
 
384
+ For example:
385
+
356
386
  ```js
357
387
  import {FileTypeParser} from 'file-type';
358
388
  import {detectXml} from '@file-type/xml';
@@ -364,6 +394,7 @@ console.log(fileType);
364
394
 
365
395
  ### Available third-party file-type detectors
366
396
 
397
+ - [@file-type/av](https://github.com/Borewit/file-type-av): Improves detection of audio and video file formats, with accurate differentiation between the two
367
398
  - [@file-type/xml](https://github.com/Borewit/file-type-xml): Detects common XML file types, such as GLM, KML, MusicXML, RSS, SVG, and XHTML
368
399
 
369
400
  ### Detector execution flow
@@ -436,7 +467,6 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
436
467
  - [`aac`](https://en.wikipedia.org/wiki/Advanced_Audio_Coding) - Advanced Audio Coding
437
468
  - [`ac3`](https://www.atsc.org/standard/a522012-digital-audio-compression-ac-3-e-ac-3-standard-12172012/) - ATSC A/52 Audio File
438
469
  - [`ace`](https://en.wikipedia.org/wiki/ACE_(compressed_file_format)) - ACE archive
439
- - [`ai`](https://en.wikipedia.org/wiki/Adobe_Illustrator_Artwork) - Adobe Illustrator Artwork
440
470
  - [`aif`](https://en.wikipedia.org/wiki/Audio_Interchange_File_Format) - Audio Interchange file
441
471
  - [`alias`](https://en.wikipedia.org/wiki/Alias_%28Mac_OS%29) - macOS Alias file
442
472
  - [`amr`](https://en.wikipedia.org/wiki/Adaptive_Multi-Rate_audio_codec) - Adaptive Multi-Rate audio codec
@@ -465,6 +495,7 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
465
495
  - [`cr3`](https://fileinfo.com/extension/cr3) - Canon Raw image file (v3)
466
496
  - [`crx`](https://developer.chrome.com/extensions/crx) - Google Chrome extension
467
497
  - [`cur`](https://en.wikipedia.org/wiki/ICO_(file_format)) - Icon file
498
+ - [`dat`](https://en.wikipedia.org/wiki/Windows_Registry) - Windows registry hive file
468
499
  - [`dcm`](https://en.wikipedia.org/wiki/DICOM#Data_format) - DICOM Image File
469
500
  - [`deb`](https://en.wikipedia.org/wiki/Deb_(file_format)) - Debian package
470
501
  - [`dmg`](https://en.wikipedia.org/wiki/Apple_Disk_Image) - Apple Disk Image
@@ -567,6 +598,7 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
567
598
  - [`qcp`](https://en.wikipedia.org/wiki/QCP) - Tagged and chunked data
568
599
  - [`raf`](https://en.wikipedia.org/wiki/Raw_image_format) - Fujifilm RAW image file
569
600
  - [`rar`](https://en.wikipedia.org/wiki/RAR_(file_format)) - Archive file
601
+ - [`reg`](https://en.wikipedia.org/wiki/Windows_Registry) - Windows registry (entries) file format
570
602
  - [`rm`](https://en.wikipedia.org/wiki/RealMedia) - RealMedia
571
603
  - [`rpm`](https://fileinfo.com/extension/rpm) - Red Hat Package Manager file
572
604
  - [`rtf`](https://en.wikipedia.org/wiki/Rich_Text_Format) - Rich Text Format
@@ -578,7 +610,8 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
578
610
  - [`sqlite`](https://www.sqlite.org/fileformat2.html) - SQLite file
579
611
  - [`stl`](https://en.wikipedia.org/wiki/STL_(file_format)) - Standard Tesselated Geometry File Format (ASCII only)
580
612
  - [`swf`](https://en.wikipedia.org/wiki/SWF) - Adobe Flash Player file
581
- - [`tar`](https://en.wikipedia.org/wiki/Tar_(computing)#File_format) - Tarball archive file
613
+ - [`tar`](https://en.wikipedia.org/wiki/Tar_(computing)#File_format) - Tape archive or tarball
614
+ - [`tar.gz`](https://en.wikipedia.org/wiki/Gzip) - Gzipped tape archive (tarball)
582
615
  - [`tif`](https://en.wikipedia.org/wiki/Tagged_Image_File_Format) - Tagged Image file
583
616
  - [`ttc`](https://en.wikipedia.org/wiki/TrueType#TrueType_Collection) - TrueType Collection font
584
617
  - [`ttf`](https://en.wikipedia.org/wiki/TrueType) - TrueType font
package/supported.js CHANGED
@@ -128,7 +128,6 @@ export const extensions = [
128
128
  'it',
129
129
  's3m',
130
130
  'xm',
131
- 'ai',
132
131
  'skp',
133
132
  'avif',
134
133
  'eps',
@@ -175,6 +174,9 @@ export const extensions = [
175
174
  'rm',
176
175
  'ppsm',
177
176
  'ppsx',
177
+ 'tar.gz',
178
+ 'reg',
179
+ 'dat',
178
180
  ];
179
181
 
180
182
  export const mimeTypes = [
@@ -208,10 +210,10 @@ export const mimeTypes = [
208
210
  'application/x-bzip2',
209
211
  'application/x-7z-compressed',
210
212
  'application/x-apple-diskimage',
211
- 'application/x-apache-arrow',
213
+ 'application/vnd.apache.arrow.file',
212
214
  'video/mp4',
213
215
  'audio/midi',
214
- 'video/x-matroska',
216
+ 'video/matroska',
215
217
  'video/webm',
216
218
  'video/quicktime',
217
219
  'video/vnd.avi',
@@ -228,7 +230,7 @@ export const mimeTypes = [
228
230
  'audio/ogg',
229
231
  'audio/ogg; codecs=opus',
230
232
  'application/ogg',
231
- 'audio/x-flac',
233
+ 'audio/flac',
232
234
  'audio/ape',
233
235
  'audio/wavpack',
234
236
  'audio/amr',
@@ -321,7 +323,7 @@ export const mimeTypes = [
321
323
  'image/jls',
322
324
  'application/vnd.ms-outlook',
323
325
  'image/vnd.dwg',
324
- 'application/x-parquet',
326
+ 'application/vnd.apache.parquet',
325
327
  'application/java-vm',
326
328
  'application/x-arj',
327
329
  'application/x-cpio',
@@ -349,4 +351,6 @@ export const mimeTypes = [
349
351
  'application/vnd.ms-powerpoint.presentation.macroenabled.12',
350
352
  'application/java-archive',
351
353
  'application/vnd.rn-realmedia',
354
+ 'application/x-ms-regedit',
355
+ 'application/x-ft-windows-registry-hive',
352
356
  ];
package/util.js CHANGED
@@ -1,6 +1,26 @@
1
1
  import {StringType} from 'token-types';
2
2
 
3
- export function stringToBytes(string) {
3
+ export function stringToBytes(string, encoding) {
4
+ if (encoding === 'utf-16le') {
5
+ const bytes = [];
6
+ for (let index = 0; index < string.length; index++) {
7
+ const code = string.charCodeAt(index); // eslint-disable-line unicorn/prefer-code-point
8
+ bytes.push(code & 0xFF, (code >> 8) & 0xFF); // High byte
9
+ }
10
+
11
+ return bytes;
12
+ }
13
+
14
+ if (encoding === 'utf-16be') {
15
+ const bytes = [];
16
+ for (let index = 0; index < string.length; index++) {
17
+ const code = string.charCodeAt(index); // eslint-disable-line unicorn/prefer-code-point
18
+ bytes.push((code >> 8) & 0xFF, code & 0xFF); // Low byte
19
+ }
20
+
21
+ return bytes;
22
+ }
23
+
4
24
  return [...string].map(character => character.charCodeAt(0)); // eslint-disable-line unicorn/prefer-code-point
5
25
  }
6
26