file-type 21.0.0 → 21.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/core.js CHANGED
@@ -4,7 +4,7 @@ Primary entry point, Node.js specific entry point is index.js
4
4
 
5
5
  import * as Token from 'token-types';
6
6
  import * as strtok3 from 'strtok3/core';
7
- import {ZipHandler} from '@tokenizer/inflate';
7
+ import {ZipHandler, GzipHandler} from '@tokenizer/inflate';
8
8
  import {getUintBE} from 'uint8array-extras';
9
9
  import {
10
10
  stringToBytes,
@@ -234,11 +234,16 @@ export class FileTypeParser {
234
234
  }
235
235
 
236
236
  async fromBlob(blob) {
237
- return this.fromStream(blob.stream());
237
+ const tokenizer = strtok3.fromBlob(blob, this.tokenizerOptions);
238
+ try {
239
+ return await this.fromTokenizer(tokenizer);
240
+ } finally {
241
+ await tokenizer.close();
242
+ }
238
243
  }
239
244
 
240
245
  async fromStream(stream) {
241
- const tokenizer = await strtok3.fromWebStream(stream, this.tokenizerOptions);
246
+ const tokenizer = strtok3.fromWebStream(stream, this.tokenizerOptions);
242
247
  try {
243
248
  return await this.fromTokenizer(tokenizer);
244
249
  } finally {
@@ -296,7 +301,7 @@ export class FileTypeParser {
296
301
  }
297
302
 
298
303
  checkString(header, options) {
299
- return this.check(stringToBytes(header), options);
304
+ return this.check(stringToBytes(header, options?.encoding), options);
300
305
  }
301
306
 
302
307
  // Detections with a high degree of certainty in identifying the correct file type
@@ -310,7 +315,7 @@ export class FileTypeParser {
310
315
 
311
316
  this.tokenizer = tokenizer;
312
317
 
313
- await tokenizer.peekBuffer(this.buffer, {length: 12, mayBeLess: true});
318
+ await tokenizer.peekBuffer(this.buffer, {length: 32, mayBeLess: true});
314
319
 
315
320
  // -- 2-byte signatures --
316
321
 
@@ -408,6 +413,30 @@ export class FileTypeParser {
408
413
  }
409
414
 
410
415
  if (this.check([0x1F, 0x8B, 0x8])) {
416
+ const gzipHandler = new GzipHandler(tokenizer);
417
+
418
+ const stream = gzipHandler.inflate();
419
+ let shouldCancelStream = true;
420
+ try {
421
+ let compressedFileType;
422
+ try {
423
+ compressedFileType = await this.fromStream(stream);
424
+ } catch {
425
+ shouldCancelStream = false;
426
+ }
427
+
428
+ if (compressedFileType && compressedFileType.ext === 'tar') {
429
+ return {
430
+ ext: 'tar.gz',
431
+ mime: 'application/gzip',
432
+ };
433
+ }
434
+ } finally {
435
+ if (shouldCancelStream) {
436
+ await stream.cancel();
437
+ }
438
+ }
439
+
411
440
  return {
412
441
  ext: 'gz',
413
442
  mime: 'application/gzip',
@@ -577,6 +606,10 @@ export class FileTypeParser {
577
606
 
578
607
  return {};
579
608
  }
609
+ }).catch(error => {
610
+ if (!(error instanceof strtok3.EndOfStreamError)) {
611
+ throw error; // Re-throw non-EndOfStreamError
612
+ }
580
613
  });
581
614
 
582
615
  return fileType ?? {
@@ -932,6 +965,13 @@ export class FileTypeParser {
932
965
  };
933
966
  }
934
967
 
968
+ if (this.checkString('regf')) {
969
+ return {
970
+ ext: 'dat',
971
+ mime: 'application/x-ft-windows-registry-hive',
972
+ };
973
+ }
974
+
935
975
  // -- 5-byte signatures --
936
976
 
937
977
  if (this.check([0x4F, 0x54, 0x54, 0x4F, 0x00])) {
@@ -1283,6 +1323,15 @@ export class FileTypeParser {
1283
1323
  }
1284
1324
  }
1285
1325
 
1326
+ // -- 10-byte signatures --
1327
+
1328
+ if (this.checkString('REGEDIT4\r\n')) {
1329
+ return {
1330
+ ext: 'reg',
1331
+ mime: 'application/x-ms-regedit',
1332
+ };
1333
+ }
1334
+
1286
1335
  // -- 12-byte signatures --
1287
1336
 
1288
1337
  // RIFF file format which might be AVI, WAV, QCP, etc
@@ -1443,8 +1492,8 @@ export class FileTypeParser {
1443
1492
  };
1444
1493
  }
1445
1494
 
1446
- if (this.check([0xFE, 0xFF])) { // UTF-16-BOM-LE
1447
- if (this.check([0, 60, 0, 63, 0, 120, 0, 109, 0, 108], {offset: 2})) {
1495
+ if (this.check([0xFE, 0xFF])) { // UTF-16-BOM-BE
1496
+ if (this.checkString('<?xml ', {offset: 2, encoding: 'utf-16be'})) {
1448
1497
  return {
1449
1498
  ext: 'xml',
1450
1499
  mime: 'application/xml',
@@ -1462,7 +1511,7 @@ export class FileTypeParser {
1462
1511
  };
1463
1512
  }
1464
1513
 
1465
- // Increase sample size from 12 to 256.
1514
+ // Increase sample size from 32 to 256.
1466
1515
  await tokenizer.peekBuffer(this.buffer, {length: Math.min(256, tokenizer.fileInfo.size), mayBeLess: true});
1467
1516
 
1468
1517
  if (this.check([0x61, 0x63, 0x73, 0x70], {offset: 36})) {
@@ -1636,21 +1685,29 @@ export class FileTypeParser {
1636
1685
  };
1637
1686
  }
1638
1687
 
1639
- if (this.check([0xFF, 0xFE])) { // UTF-16-BOM-BE
1640
- if (this.check([60, 0, 63, 0, 120, 0, 109, 0, 108, 0], {offset: 2})) {
1688
+ if (this.check([0xFF, 0xFE])) { // UTF-16-BOM-LE
1689
+ const encoding = 'utf-16le';
1690
+ if (this.checkString('<?xml ', {offset: 2, encoding})) {
1641
1691
  return {
1642
1692
  ext: 'xml',
1643
1693
  mime: 'application/xml',
1644
1694
  };
1645
1695
  }
1646
1696
 
1647
- if (this.check([0xFF, 0x0E, 0x53, 0x00, 0x6B, 0x00, 0x65, 0x00, 0x74, 0x00, 0x63, 0x00, 0x68, 0x00, 0x55, 0x00, 0x70, 0x00, 0x20, 0x00, 0x4D, 0x00, 0x6F, 0x00, 0x64, 0x00, 0x65, 0x00, 0x6C, 0x00], {offset: 2})) {
1697
+ if (this.check([0xFF, 0x0E], {offset: 2}) && this.checkString('SketchUp Model', {offset: 4, encoding})) {
1648
1698
  return {
1649
1699
  ext: 'skp',
1650
1700
  mime: 'application/vnd.sketchup.skp',
1651
1701
  };
1652
1702
  }
1653
1703
 
1704
+ if (this.checkString('Windows Registry Editor Version 5.00\r\n', {offset: 2, encoding})) {
1705
+ return {
1706
+ ext: 'reg',
1707
+ mime: 'application/x-ms-regedit',
1708
+ };
1709
+ }
1710
+
1654
1711
  return undefined; // Some text based format
1655
1712
  }
1656
1713
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "file-type",
3
- "version": "21.0.0",
3
+ "version": "21.1.1",
4
4
  "description": "Detect the file type of a file, stream, or data",
5
5
  "license": "MIT",
6
6
  "repository": "sindresorhus/file-type",
@@ -243,12 +243,15 @@
243
243
  "jar",
244
244
  "rm",
245
245
  "ppsm",
246
- "ppsx"
246
+ "ppsx",
247
+ "tar.gz",
248
+ "reg",
249
+ "dat"
247
250
  ],
248
251
  "dependencies": {
249
- "@tokenizer/inflate": "^0.2.7",
250
- "strtok3": "^10.2.2",
251
- "token-types": "^6.0.0",
252
+ "@tokenizer/inflate": "^0.4.1",
253
+ "strtok3": "^10.3.4",
254
+ "token-types": "^6.1.1",
252
255
  "uint8array-extras": "^1.4.0"
253
256
  },
254
257
  "devDependencies": {
package/readme.md CHANGED
@@ -394,6 +394,7 @@ console.log(fileType);
394
394
 
395
395
  ### Available third-party file-type detectors
396
396
 
397
+ - [@file-type/av](https://github.com/Borewit/file-type-av): Improves detection of audio and video file formats, with accurate differentiation between the two
397
398
  - [@file-type/xml](https://github.com/Borewit/file-type-xml): Detects common XML file types, such as GLM, KML, MusicXML, RSS, SVG, and XHTML
398
399
 
399
400
  ### Detector execution flow
@@ -494,6 +495,7 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
494
495
  - [`cr3`](https://fileinfo.com/extension/cr3) - Canon Raw image file (v3)
495
496
  - [`crx`](https://developer.chrome.com/extensions/crx) - Google Chrome extension
496
497
  - [`cur`](https://en.wikipedia.org/wiki/ICO_(file_format)) - Icon file
498
+ - [`dat`](https://en.wikipedia.org/wiki/Windows_Registry) - Windows registry hive file
497
499
  - [`dcm`](https://en.wikipedia.org/wiki/DICOM#Data_format) - DICOM Image File
498
500
  - [`deb`](https://en.wikipedia.org/wiki/Deb_(file_format)) - Debian package
499
501
  - [`dmg`](https://en.wikipedia.org/wiki/Apple_Disk_Image) - Apple Disk Image
@@ -596,6 +598,7 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
596
598
  - [`qcp`](https://en.wikipedia.org/wiki/QCP) - Tagged and chunked data
597
599
  - [`raf`](https://en.wikipedia.org/wiki/Raw_image_format) - Fujifilm RAW image file
598
600
  - [`rar`](https://en.wikipedia.org/wiki/RAR_(file_format)) - Archive file
601
+ - [`reg`](https://en.wikipedia.org/wiki/Windows_Registry) - Windows registry (entries) file format
599
602
  - [`rm`](https://en.wikipedia.org/wiki/RealMedia) - RealMedia
600
603
  - [`rpm`](https://fileinfo.com/extension/rpm) - Red Hat Package Manager file
601
604
  - [`rtf`](https://en.wikipedia.org/wiki/Rich_Text_Format) - Rich Text Format
@@ -607,7 +610,8 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
607
610
  - [`sqlite`](https://www.sqlite.org/fileformat2.html) - SQLite file
608
611
  - [`stl`](https://en.wikipedia.org/wiki/STL_(file_format)) - Standard Tesselated Geometry File Format (ASCII only)
609
612
  - [`swf`](https://en.wikipedia.org/wiki/SWF) - Adobe Flash Player file
610
- - [`tar`](https://en.wikipedia.org/wiki/Tar_(computing)#File_format) - Tarball archive file
613
+ - [`tar`](https://en.wikipedia.org/wiki/Tar_(computing)#File_format) - Tape archive or tarball
614
+ - [`tar.gz`](https://en.wikipedia.org/wiki/Gzip) - Gzipped tape archive (tarball)
611
615
  - [`tif`](https://en.wikipedia.org/wiki/Tagged_Image_File_Format) - Tagged Image file
612
616
  - [`ttc`](https://en.wikipedia.org/wiki/TrueType#TrueType_Collection) - TrueType Collection font
613
617
  - [`ttf`](https://en.wikipedia.org/wiki/TrueType) - TrueType font
package/supported.js CHANGED
@@ -174,6 +174,9 @@ export const extensions = [
174
174
  'rm',
175
175
  'ppsm',
176
176
  'ppsx',
177
+ 'tar.gz',
178
+ 'reg',
179
+ 'dat',
177
180
  ];
178
181
 
179
182
  export const mimeTypes = [
@@ -348,4 +351,6 @@ export const mimeTypes = [
348
351
  'application/vnd.ms-powerpoint.presentation.macroenabled.12',
349
352
  'application/java-archive',
350
353
  'application/vnd.rn-realmedia',
354
+ 'application/x-ms-regedit',
355
+ 'application/x-ft-windows-registry-hive',
351
356
  ];
package/util.js CHANGED
@@ -1,6 +1,26 @@
1
1
  import {StringType} from 'token-types';
2
2
 
3
- export function stringToBytes(string) {
3
+ export function stringToBytes(string, encoding) {
4
+ if (encoding === 'utf-16le') {
5
+ const bytes = [];
6
+ for (let index = 0; index < string.length; index++) {
7
+ const code = string.charCodeAt(index); // eslint-disable-line unicorn/prefer-code-point
8
+ bytes.push(code & 0xFF, (code >> 8) & 0xFF); // High byte
9
+ }
10
+
11
+ return bytes;
12
+ }
13
+
14
+ if (encoding === 'utf-16be') {
15
+ const bytes = [];
16
+ for (let index = 0; index < string.length; index++) {
17
+ const code = string.charCodeAt(index); // eslint-disable-line unicorn/prefer-code-point
18
+ bytes.push((code >> 8) & 0xFF, code & 0xFF); // Low byte
19
+ }
20
+
21
+ return bytes;
22
+ }
23
+
4
24
  return [...string].map(character => character.charCodeAt(0)); // eslint-disable-line unicorn/prefer-code-point
5
25
  }
6
26