file-type 21.0.0 → 21.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/core.js CHANGED
@@ -4,7 +4,7 @@ Primary entry point, Node.js specific entry point is index.js
4
4
 
5
5
  import * as Token from 'token-types';
6
6
  import * as strtok3 from 'strtok3/core';
7
- import {ZipHandler} from '@tokenizer/inflate';
7
+ import {ZipHandler, GzipHandler} from '@tokenizer/inflate';
8
8
  import {getUintBE} from 'uint8array-extras';
9
9
  import {
10
10
  stringToBytes,
@@ -234,11 +234,16 @@ export class FileTypeParser {
234
234
  }
235
235
 
236
236
  async fromBlob(blob) {
237
- return this.fromStream(blob.stream());
237
+ const tokenizer = strtok3.fromBlob(blob, this.tokenizerOptions);
238
+ try {
239
+ return await this.fromTokenizer(tokenizer);
240
+ } finally {
241
+ await tokenizer.close();
242
+ }
238
243
  }
239
244
 
240
245
  async fromStream(stream) {
241
- const tokenizer = await strtok3.fromWebStream(stream, this.tokenizerOptions);
246
+ const tokenizer = strtok3.fromWebStream(stream, this.tokenizerOptions);
242
247
  try {
243
248
  return await this.fromTokenizer(tokenizer);
244
249
  } finally {
@@ -296,7 +301,7 @@ export class FileTypeParser {
296
301
  }
297
302
 
298
303
  checkString(header, options) {
299
- return this.check(stringToBytes(header), options);
304
+ return this.check(stringToBytes(header, options?.encoding), options);
300
305
  }
301
306
 
302
307
  // Detections with a high degree of certainty in identifying the correct file type
@@ -310,7 +315,7 @@ export class FileTypeParser {
310
315
 
311
316
  this.tokenizer = tokenizer;
312
317
 
313
- await tokenizer.peekBuffer(this.buffer, {length: 12, mayBeLess: true});
318
+ await tokenizer.peekBuffer(this.buffer, {length: 32, mayBeLess: true});
314
319
 
315
320
  // -- 2-byte signatures --
316
321
 
@@ -408,6 +413,21 @@ export class FileTypeParser {
408
413
  }
409
414
 
410
415
  if (this.check([0x1F, 0x8B, 0x8])) {
416
+ const gzipHandler = new GzipHandler(tokenizer);
417
+
418
+ const stream = gzipHandler.inflate();
419
+ try {
420
+ const compressedFileType = await this.fromStream(stream);
421
+ if (compressedFileType && compressedFileType.ext === 'tar') {
422
+ return {
423
+ ext: 'tar.gz',
424
+ mime: 'application/gzip',
425
+ };
426
+ }
427
+ } finally {
428
+ await stream.cancel();
429
+ }
430
+
411
431
  return {
412
432
  ext: 'gz',
413
433
  mime: 'application/gzip',
@@ -577,6 +597,10 @@ export class FileTypeParser {
577
597
 
578
598
  return {};
579
599
  }
600
+ }).catch(error => {
601
+ if (!(error instanceof strtok3.EndOfStreamError)) {
602
+ throw error; // Re-throw non-EndOfStreamError
603
+ }
580
604
  });
581
605
 
582
606
  return fileType ?? {
@@ -932,6 +956,13 @@ export class FileTypeParser {
932
956
  };
933
957
  }
934
958
 
959
+ if (this.checkString('regf')) {
960
+ return {
961
+ ext: 'dat',
962
+ mime: 'application/x-ft-windows-registry-hive',
963
+ };
964
+ }
965
+
935
966
  // -- 5-byte signatures --
936
967
 
937
968
  if (this.check([0x4F, 0x54, 0x54, 0x4F, 0x00])) {
@@ -1283,6 +1314,15 @@ export class FileTypeParser {
1283
1314
  }
1284
1315
  }
1285
1316
 
1317
+ // -- 10-byte signatures --
1318
+
1319
+ if (this.checkString('REGEDIT4\r\n')) {
1320
+ return {
1321
+ ext: 'reg',
1322
+ mime: 'application/x-ms-regedit',
1323
+ };
1324
+ }
1325
+
1286
1326
  // -- 12-byte signatures --
1287
1327
 
1288
1328
  // RIFF file format which might be AVI, WAV, QCP, etc
@@ -1443,8 +1483,8 @@ export class FileTypeParser {
1443
1483
  };
1444
1484
  }
1445
1485
 
1446
- if (this.check([0xFE, 0xFF])) { // UTF-16-BOM-LE
1447
- if (this.check([0, 60, 0, 63, 0, 120, 0, 109, 0, 108], {offset: 2})) {
1486
+ if (this.check([0xFE, 0xFF])) { // UTF-16-BOM-BE
1487
+ if (this.checkString('<?xml ', {offset: 2, encoding: 'utf-16be'})) {
1448
1488
  return {
1449
1489
  ext: 'xml',
1450
1490
  mime: 'application/xml',
@@ -1462,7 +1502,7 @@ export class FileTypeParser {
1462
1502
  };
1463
1503
  }
1464
1504
 
1465
- // Increase sample size from 12 to 256.
1505
+ // Increase sample size from 32 to 256.
1466
1506
  await tokenizer.peekBuffer(this.buffer, {length: Math.min(256, tokenizer.fileInfo.size), mayBeLess: true});
1467
1507
 
1468
1508
  if (this.check([0x61, 0x63, 0x73, 0x70], {offset: 36})) {
@@ -1636,21 +1676,29 @@ export class FileTypeParser {
1636
1676
  };
1637
1677
  }
1638
1678
 
1639
- if (this.check([0xFF, 0xFE])) { // UTF-16-BOM-BE
1640
- if (this.check([60, 0, 63, 0, 120, 0, 109, 0, 108, 0], {offset: 2})) {
1679
+ if (this.check([0xFF, 0xFE])) { // UTF-16-BOM-LE
1680
+ const encoding = 'utf-16le';
1681
+ if (this.checkString('<?xml ', {offset: 2, encoding})) {
1641
1682
  return {
1642
1683
  ext: 'xml',
1643
1684
  mime: 'application/xml',
1644
1685
  };
1645
1686
  }
1646
1687
 
1647
- if (this.check([0xFF, 0x0E, 0x53, 0x00, 0x6B, 0x00, 0x65, 0x00, 0x74, 0x00, 0x63, 0x00, 0x68, 0x00, 0x55, 0x00, 0x70, 0x00, 0x20, 0x00, 0x4D, 0x00, 0x6F, 0x00, 0x64, 0x00, 0x65, 0x00, 0x6C, 0x00], {offset: 2})) {
1688
+ if (this.check([0xFF, 0x0E], {offset: 2}) && this.checkString('SketchUp Model', {offset: 4, encoding})) {
1648
1689
  return {
1649
1690
  ext: 'skp',
1650
1691
  mime: 'application/vnd.sketchup.skp',
1651
1692
  };
1652
1693
  }
1653
1694
 
1695
+ if (this.checkString('Windows Registry Editor Version 5.00\r\n', {offset: 2, encoding})) {
1696
+ return {
1697
+ ext: 'reg',
1698
+ mime: 'application/x-ms-regedit',
1699
+ };
1700
+ }
1701
+
1654
1702
  return undefined; // Some text based format
1655
1703
  }
1656
1704
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "file-type",
3
- "version": "21.0.0",
3
+ "version": "21.1.0",
4
4
  "description": "Detect the file type of a file, stream, or data",
5
5
  "license": "MIT",
6
6
  "repository": "sindresorhus/file-type",
@@ -243,11 +243,14 @@
243
243
  "jar",
244
244
  "rm",
245
245
  "ppsm",
246
- "ppsx"
246
+ "ppsx",
247
+ "tar.gz",
248
+ "reg",
249
+ "dat"
247
250
  ],
248
251
  "dependencies": {
249
- "@tokenizer/inflate": "^0.2.7",
250
- "strtok3": "^10.2.2",
252
+ "@tokenizer/inflate": "^0.3.1",
253
+ "strtok3": "^10.3.1",
251
254
  "token-types": "^6.0.0",
252
255
  "uint8array-extras": "^1.4.0"
253
256
  },
package/readme.md CHANGED
@@ -394,6 +394,7 @@ console.log(fileType);
394
394
 
395
395
  ### Available third-party file-type detectors
396
396
 
397
+ - [@file-type/av](https://github.com/Borewit/file-type-av): Improves detection of audio and video file formats, with accurate differentiation between the two
397
398
  - [@file-type/xml](https://github.com/Borewit/file-type-xml): Detects common XML file types, such as GLM, KML, MusicXML, RSS, SVG, and XHTML
398
399
 
399
400
  ### Detector execution flow
@@ -494,6 +495,7 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
494
495
  - [`cr3`](https://fileinfo.com/extension/cr3) - Canon Raw image file (v3)
495
496
  - [`crx`](https://developer.chrome.com/extensions/crx) - Google Chrome extension
496
497
  - [`cur`](https://en.wikipedia.org/wiki/ICO_(file_format)) - Icon file
498
+ - [`dat`](https://en.wikipedia.org/wiki/Windows_Registry) - Windows registry hive file
497
499
  - [`dcm`](https://en.wikipedia.org/wiki/DICOM#Data_format) - DICOM Image File
498
500
  - [`deb`](https://en.wikipedia.org/wiki/Deb_(file_format)) - Debian package
499
501
  - [`dmg`](https://en.wikipedia.org/wiki/Apple_Disk_Image) - Apple Disk Image
@@ -596,6 +598,7 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
596
598
  - [`qcp`](https://en.wikipedia.org/wiki/QCP) - Tagged and chunked data
597
599
  - [`raf`](https://en.wikipedia.org/wiki/Raw_image_format) - Fujifilm RAW image file
598
600
  - [`rar`](https://en.wikipedia.org/wiki/RAR_(file_format)) - Archive file
601
+ - [`reg`](https://en.wikipedia.org/wiki/Windows_Registry) - Windows registry (entries) file format
599
602
  - [`rm`](https://en.wikipedia.org/wiki/RealMedia) - RealMedia
600
603
  - [`rpm`](https://fileinfo.com/extension/rpm) - Red Hat Package Manager file
601
604
  - [`rtf`](https://en.wikipedia.org/wiki/Rich_Text_Format) - Rich Text Format
@@ -607,7 +610,8 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
607
610
  - [`sqlite`](https://www.sqlite.org/fileformat2.html) - SQLite file
608
611
  - [`stl`](https://en.wikipedia.org/wiki/STL_(file_format)) - Standard Tesselated Geometry File Format (ASCII only)
609
612
  - [`swf`](https://en.wikipedia.org/wiki/SWF) - Adobe Flash Player file
610
- - [`tar`](https://en.wikipedia.org/wiki/Tar_(computing)#File_format) - Tarball archive file
613
+ - [`tar`](https://en.wikipedia.org/wiki/Tar_(computing)#File_format) - Tape archive or tarball
614
+ - [`tar.gz`](https://en.wikipedia.org/wiki/Gzip) - Gzipped tape archive (tarball)
611
615
  - [`tif`](https://en.wikipedia.org/wiki/Tagged_Image_File_Format) - Tagged Image file
612
616
  - [`ttc`](https://en.wikipedia.org/wiki/TrueType#TrueType_Collection) - TrueType Collection font
613
617
  - [`ttf`](https://en.wikipedia.org/wiki/TrueType) - TrueType font
package/supported.js CHANGED
@@ -174,6 +174,9 @@ export const extensions = [
174
174
  'rm',
175
175
  'ppsm',
176
176
  'ppsx',
177
+ 'tar.gz',
178
+ 'reg',
179
+ 'dat',
177
180
  ];
178
181
 
179
182
  export const mimeTypes = [
@@ -348,4 +351,6 @@ export const mimeTypes = [
348
351
  'application/vnd.ms-powerpoint.presentation.macroenabled.12',
349
352
  'application/java-archive',
350
353
  'application/vnd.rn-realmedia',
354
+ 'application/x-ms-regedit',
355
+ 'application/x-ft-windows-registry-hive',
351
356
  ];
package/util.js CHANGED
@@ -1,6 +1,26 @@
1
1
  import {StringType} from 'token-types';
2
2
 
3
- export function stringToBytes(string) {
3
+ export function stringToBytes(string, encoding) {
4
+ if (encoding === 'utf-16le') {
5
+ const bytes = [];
6
+ for (let index = 0; index < string.length; index++) {
7
+ const code = string.charCodeAt(index); // eslint-disable-line unicorn/prefer-code-point
8
+ bytes.push(code & 0xFF, (code >> 8) & 0xFF); // High byte
9
+ }
10
+
11
+ return bytes;
12
+ }
13
+
14
+ if (encoding === 'utf-16be') {
15
+ const bytes = [];
16
+ for (let index = 0; index < string.length; index++) {
17
+ const code = string.charCodeAt(index); // eslint-disable-line unicorn/prefer-code-point
18
+ bytes.push((code >> 8) & 0xFF, code & 0xFF); // Low byte
19
+ }
20
+
21
+ return bytes;
22
+ }
23
+
4
24
  return [...string].map(character => character.charCodeAt(0)); // eslint-disable-line unicorn/prefer-code-point
5
25
  }
6
26