file-type 21.3.4 → 22.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "file-type",
3
- "version": "21.3.4",
3
+ "version": "22.0.0",
4
4
  "description": "Detect the file type of a file, stream, or data",
5
5
  "license": "MIT",
6
6
  "repository": "sindresorhus/file-type",
@@ -12,41 +12,18 @@
12
12
  },
13
13
  "type": "module",
14
14
  "exports": {
15
- ".": {
16
- "node": {
17
- "types": "./index.d.ts",
18
- "import": "./index.js",
19
- "module-sync": "./index.js"
20
- },
21
- "default": {
22
- "types": "./core.d.ts",
23
- "import": "./core.js",
24
- "module-sync": "./core.js"
25
- }
26
- },
27
- "./core": {
28
- "types": "./core.d.ts",
29
- "default": "./core.js"
30
- },
31
- "./node": {
32
- "types": "./index.d.ts",
33
- "default": "./index.js"
34
- }
15
+ "types": "./source/index.d.ts",
16
+ "default": "./source/index.js"
35
17
  },
36
18
  "sideEffects": false,
37
19
  "engines": {
38
- "node": ">=20"
20
+ "node": ">=22"
39
21
  },
40
22
  "scripts": {
41
- "test": "xo && ava && tsd"
23
+ "test": "xo && ava && tsd --typings source/index.d.ts --files source/index.test-d.ts"
42
24
  },
43
25
  "files": [
44
- "index.js",
45
- "index.d.ts",
46
- "core.js",
47
- "core.d.ts",
48
- "supported.js",
49
- "util.js"
26
+ "source"
50
27
  ],
51
28
  "keywords": [
52
29
  "mime",
@@ -248,42 +225,48 @@
248
225
  "ppsx",
249
226
  "tar.gz",
250
227
  "reg",
251
- "dat"
228
+ "dat",
229
+ "key",
230
+ "numbers",
231
+ "pages"
252
232
  ],
253
233
  "dependencies": {
254
234
  "@tokenizer/inflate": "^0.4.1",
255
- "strtok3": "^10.3.4",
256
- "token-types": "^6.1.1",
257
- "uint8array-extras": "^1.4.0"
235
+ "strtok3": "^10.3.5",
236
+ "token-types": "^6.1.2",
237
+ "uint8array-extras": "^1.5.0"
258
238
  },
259
239
  "devDependencies": {
260
240
  "@tokenizer/token": "^0.3.0",
261
- "@types/node": "^25.3.3",
241
+ "@types/node": "^25.5.0",
262
242
  "ava": "^7.0.0",
263
243
  "commonmark": "^0.31.2",
264
244
  "get-stream": "^9.0.1",
265
- "noop-stream": "^1.0.0",
266
245
  "tsd": "^0.33.0",
267
- "xo": "^0.60.0"
246
+ "xo": "^2.0.2"
268
247
  },
269
- "xo": {
270
- "envs": [
271
- "node",
272
- "browser"
273
- ],
274
- "ignores": [
275
- "fixture"
276
- ],
277
- "rules": {
278
- "no-inner-declarations": "warn",
279
- "no-await-in-loop": "warn",
280
- "no-bitwise": "off",
281
- "@typescript-eslint/no-unsafe-assignment": "off",
282
- "unicorn/text-encoding-identifier-case": "off",
283
- "unicorn/switch-case-braces": "off",
284
- "unicorn/prefer-top-level-await": "off"
248
+ "xo": [
249
+ {
250
+ "ignores": [
251
+ "fixture/**"
252
+ ]
253
+ },
254
+ {
255
+ "rules": {
256
+ "no-inner-declarations": "warn",
257
+ "no-await-in-loop": "warn",
258
+ "no-bitwise": "off",
259
+ "@typescript-eslint/no-unsafe-assignment": "off",
260
+ "unicorn/text-encoding-identifier-case": "off",
261
+ "unicorn/switch-case-braces": "off",
262
+ "unicorn/prefer-top-level-await": "off",
263
+ "n/prefer-global/buffer": "off",
264
+ "@stylistic/curly-newline": "off",
265
+ "ava/no-useless-t-pass": "off",
266
+ "ava/no-conditional-assertion": "off"
267
+ }
285
268
  }
286
- },
269
+ ],
287
270
  "ava": {
288
271
  "serial": true
289
272
  }
package/readme.md CHANGED
@@ -10,9 +10,6 @@ This package is for detecting binary-based file formats, not text-based formats
10
10
 
11
11
  We accept contributions for commonly used modern file formats, not historical or obscure ones. Open an issue first for discussion.
12
12
 
13
- > [!IMPORTANT]
14
- > NO SECURITY REPORTS WILL BE ACCEPTED RIGHT NOW. I'm currently hardening the parser and all the low-quality AI-generated security reports is just a huge waste of time.
15
-
16
13
  ## Install
17
14
 
18
15
  ```sh
@@ -53,52 +50,6 @@ console.log(await fileTypeFromBuffer(buffer));
53
50
 
54
51
  Determine file type from a stream:
55
52
 
56
- ```js
57
- import fs from 'node:fs';
58
- import {fileTypeFromStream} from 'file-type';
59
-
60
- const stream = fs.createReadStream('Unicorn.mp4');
61
-
62
- console.log(await fileTypeFromStream(stream));
63
- //=> {ext: 'mp4', mime: 'video/mp4'}
64
- ```
65
-
66
- The stream method can also be used to read from a remote location:
67
-
68
- ```js
69
- import got from 'got';
70
- import {fileTypeFromStream} from 'file-type';
71
-
72
- const url = 'https://upload.wikimedia.org/wikipedia/en/a/a9/Example.jpg';
73
-
74
- const stream = got.stream(url);
75
-
76
- console.log(await fileTypeFromStream(stream));
77
- //=> {ext: 'jpg', mime: 'image/jpeg'}
78
- ```
79
-
80
- Another stream example:
81
-
82
- ```js
83
- import stream from 'node:stream';
84
- import fs from 'node:fs';
85
- import crypto from 'node:crypto';
86
- import {fileTypeStream} from 'file-type';
87
-
88
- const read = fs.createReadStream('encrypted.enc');
89
- const decipher = crypto.createDecipheriv(alg, key, iv);
90
-
91
- const streamWithFileType = await fileTypeStream(stream.pipeline(read, decipher));
92
-
93
- console.log(streamWithFileType.fileType);
94
- //=> {ext: 'mov', mime: 'video/quicktime'}
95
-
96
- const write = fs.createWriteStream(`decrypted.${streamWithFileType.fileType.ext}`);
97
- streamWithFileType.pipe(write);
98
- ```
99
-
100
- ### Browser
101
-
102
53
  ```js
103
54
  import {fileTypeFromStream} from 'file-type';
104
55
 
@@ -115,7 +66,7 @@ console.log(fileType);
115
66
 
116
67
  ### fileTypeFromBuffer(buffer, options)
117
68
 
118
- Detect the file type of a `Uint8Array`, or `ArrayBuffer`.
69
+ Detect the file type of a `Uint8Array` or `ArrayBuffer`.
119
70
 
120
71
  The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.
121
72
 
@@ -138,11 +89,9 @@ A buffer representing file data. It works best if the buffer contains the entire
138
89
 
139
90
  Detect the file type of a file path.
140
91
 
141
- This is for Node.js only.
92
+ Only available in environments where `node:fs` is available, such as Node.js. To read from a [`File`](https://developer.mozilla.org/docs/Web/API/File), see [`fileTypeFromBlob()`](#filetypefromblobblob-options).
142
93
 
143
- To read from a [`File`](https://developer.mozilla.org/docs/Web/API/File), see [`fileTypeFromBlob()`](#filetypefromblobblob-options).
144
-
145
- The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.
94
+ The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the file.
146
95
 
147
96
  Returns a `Promise` for an object with the detected file type:
148
97
 
@@ -157,14 +106,10 @@ Type: `string`
157
106
 
158
107
  The file path to parse.
159
108
 
160
- ### fileTypeFromStream(stream)
109
+ ### fileTypeFromStream(stream, options)
161
110
 
162
111
  Detect the file type of a [web `ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream).
163
112
 
164
- If the engine is Node.js, this may also be a [Node.js `stream.Readable`](https://nodejs.org/api/stream.html#stream_class_stream_readable).
165
-
166
- Direct support for Node.js streams will be dropped in the future, when Node.js streams can be converted to Web streams (see [`toWeb()`](https://nodejs.org/api/stream.html#streamreadabletowebstreamreadable-options)).
167
-
168
113
  The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.
169
114
 
170
115
  Returns a `Promise` for an object with the detected file type:
@@ -176,13 +121,16 @@ Or `undefined` when there is no match.
176
121
 
177
122
  #### stream
178
123
 
179
- Type: [Web `ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream) or [Node.js `stream.Readable`](https://nodejs.org/api/stream.html#stream_class_stream_readable)
124
+ Type: [Web `ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream)
180
125
 
181
126
  A readable stream representing file data.
182
127
 
128
+ > [!TIP]
129
+ > If you have a Node.js `stream.Readable`, convert it with [`Readable.toWeb()`](https://nodejs.org/api/stream.html#streamreadabletowebstreamreadable-options).
130
+
183
131
  ### fileTypeFromBlob(blob, options)
184
132
 
185
- Detect the file type of a [`Blob`](https://developer.mozilla.org/docs/Web/API/Blob),
133
+ Detect the file type of a [`Blob`](https://developer.mozilla.org/docs/Web/API/Blob).
186
134
 
187
135
  > [!TIP]
188
136
  > A [`File` object](https://developer.mozilla.org/docs/Web/API/File) is a `Blob` and can be passed in here.
@@ -251,7 +199,6 @@ import {S3Client} from '@aws-sdk/client-s3';
251
199
  import {makeChunkedTokenizerFromS3} from '@tokenizer/s3';
252
200
  import {fileTypeFromTokenizer} from 'file-type';
253
201
 
254
- // Initialize the S3 client
255
202
  // Initialize S3 client
256
203
  const s3 = new S3Client();
257
204
 
@@ -278,16 +225,16 @@ A file source implementing the [tokenizer interface](https://github.com/Borewit/
278
225
 
279
226
  Returns a `Promise` which resolves to the original readable stream argument, but with an added `fileType` property, which is an object like the one returned from `fileTypeFromFile()`.
280
227
 
281
- This method can be handy to put in between a stream, but it comes with a price.
228
+ This method can be handy to put in a stream pipeline, but it comes with a price.
282
229
  Internally `stream()` builds up a buffer of `sampleSize` bytes, used as a sample, to determine the file type.
283
230
  The sample size impacts the file detection resolution.
284
231
  A smaller sample size will result in lower probability of the best file type detection.
285
232
 
286
- **Note:** When using Node.js, a `stream.Readable` may be provided as well.
233
+ #### webStream
287
234
 
288
- #### readableStream
235
+ Type: [Web `ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream)
289
236
 
290
- Type: [`stream.Readable`](https://nodejs.org/api/stream.html#stream_class_stream_readable)
237
+ The input stream.
291
238
 
292
239
  #### options
293
240
 
@@ -305,25 +252,18 @@ The sample size in bytes.
305
252
  #### Example
306
253
 
307
254
  ```js
308
- import got from 'got';
309
255
  import {fileTypeStream} from 'file-type';
310
256
 
311
257
  const url = 'https://upload.wikimedia.org/wikipedia/en/a/a9/Example.jpg';
312
258
 
313
- const stream1 = got.stream(url);
314
- const stream2 = await fileTypeStream(stream1, {sampleSize: 1024});
259
+ const response = await fetch(url);
260
+ const stream = await fileTypeStream(response.body, {sampleSize: 1024});
315
261
 
316
- if (stream2.fileType?.mime === 'image/jpeg') {
317
- // stream2 can be used to stream the JPEG image (from the very beginning of the stream)
262
+ if (stream.fileType?.mime === 'image/jpeg') {
263
+ // stream can be used to stream the JPEG image (from the very beginning of the stream)
318
264
  }
319
265
  ```
320
266
 
321
- #### readableStream
322
-
323
- Type: [`stream.Readable`](https://nodejs.org/api/stream.html#stream_class_stream_readable)
324
-
325
- The input stream.
326
-
327
267
  ### supportedExtensions
328
268
 
329
269
  Returns a `Set<string>` of supported file extensions.
@@ -363,15 +303,11 @@ A tolerance of 10 bytes covers most cases.
363
303
  Custom file type detectors are plugins designed to extend the default detection capabilities.
364
304
  They allow support for uncommon file types, non-binary formats, or customized detection behavior.
365
305
 
366
- Detectors can be added via the constructor options or by modifying `FileTypeParser#detectors` directly.
367
- Detectors provided through the constructor are executed before the default ones.
368
-
369
306
  Detectors can be added via the constructor options or by directly modifying `FileTypeParser#detectors`.
307
+ Detectors provided through the constructor are executed before the default ones.
370
308
 
371
309
  ### Example adding a detector
372
310
 
373
- For example:
374
-
375
311
  ```js
376
312
  import {FileTypeParser} from 'file-type';
377
313
  import {detectXml} from '@file-type/xml';
@@ -397,14 +333,14 @@ If a detector returns `undefined`, the following rules apply:
397
333
 
398
334
  ### Writing your own custom detector
399
335
 
400
- Below is an example of a custom detector array. This can be passed to the `FileTypeParser` via the `fileTypeOptions` argument.
336
+ Below is an example of a custom detector. This can be passed to the `FileTypeParser` via the `customDetectors` option.
401
337
 
402
338
  ```js
403
339
  import {FileTypeParser} from 'file-type';
404
340
 
405
341
  const unicornDetector = {
406
342
  id: 'unicorn', // May be used to recognize the detector in the detector list
407
- async detect(tokenizer) {
343
+ async detect(tokenizer) {
408
344
  const unicornHeader = [85, 78, 73, 67, 79, 82, 78]; // "UNICORN" in ASCII decimal
409
345
 
410
346
  const buffer = new Uint8Array(unicornHeader.length);
@@ -429,7 +365,10 @@ console.log(fileType); // {ext: 'unicorn', mime: 'application/unicorn'}
429
365
  @param fileType - The file type detected by standard or previous custom detectors, or `undefined` if no match is found.
430
366
  @returns The detected file type, or `undefined` if no match is found.
431
367
  */
432
- export type Detector = (tokenizer: ITokenizer, fileType?: FileTypeResult) => Promise<FileTypeResult | undefined>;
368
+ export type Detector = {
369
+ id: string;
370
+ detect: (tokenizer: ITokenizer, fileType?: FileTypeResult) => Promise<FileTypeResult | undefined>;
371
+ };
433
372
  ```
434
373
 
435
374
  ## Abort signal
@@ -439,7 +378,7 @@ Some async operations can be aborted by passing an [`AbortSignal`](https://devel
439
378
  ```js
440
379
  import {FileTypeParser} from 'file-type';
441
380
 
442
- const abortController = new AbortController()
381
+ const abortController = new AbortController();
443
382
 
444
383
  const parser = new FileTypeParser({signal: abortController.signal});
445
384
 
@@ -450,6 +389,8 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
450
389
 
451
390
  ## Supported file types
452
391
 
392
+ MIME media subtypes prefixed with `x-ft-` are custom and defined by us. They are neither formally registered with IANA nor based on any informal conventions.
393
+
453
394
  - [`3g2`](https://en.wikipedia.org/wiki/3GP_and_3G2#3G2) - Multimedia container format defined by the 3GPP2 for 3G CDMA2000 multimedia services
454
395
  - [`3gp`](https://en.wikipedia.org/wiki/3GP_and_3G2#3GP) - Multimedia container format defined by the Third Generation Partnership Project (3GPP) for 3G UMTS multimedia services
455
396
  - [`3mf`](https://en.wikipedia.org/wiki/3D_Manufacturing_Format) - 3D Manufacturing Format
@@ -531,6 +472,7 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
531
472
  - [`jpx`](https://en.wikipedia.org/wiki/JPEG_2000) - JPEG 2000
532
473
  - [`jxl`](https://en.wikipedia.org/wiki/JPEG_XL) - JPEG XL image format
533
474
  - [`jxr`](https://en.wikipedia.org/wiki/JPEG_XR) - Joint Photographic Experts Group extended range
475
+ - [`key`](https://en.wikipedia.org/wiki/Keynote_(presentation_software)) - Apple Keynote presentation
534
476
  - [`ktx`](https://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/) - OpenGL and OpenGL ES textures
535
477
  - [`lnk`](https://en.wikipedia.org/wiki/Shortcut_%28computing%29#Microsoft_Windows) - Microsoft Windows file shortcut
536
478
  - [`lz`](https://en.wikipedia.org/wiki/Lzip) - Archive file
@@ -557,6 +499,7 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
557
499
  - [`mxf`](https://en.wikipedia.org/wiki/Material_Exchange_Format) - Material Exchange Format
558
500
  - [`nef`](https://www.nikonusa.com/en/learn-and-explore/a/products-and-innovation/nikon-electronic-format-nef.html) - Nikon Electronic Format image file
559
501
  - [`nes`](https://fileinfo.com/extension/nes) - Nintendo NES ROM
502
+ - [`numbers`](https://en.wikipedia.org/wiki/Numbers_(spreadsheet)) - Apple Numbers spreadsheet
560
503
  - [`odg`](https://en.wikipedia.org/wiki/OpenDocument) - OpenDocument for drawing
561
504
  - [`odp`](https://en.wikipedia.org/wiki/OpenDocument) - OpenDocument for presentations
562
505
  - [`ods`](https://en.wikipedia.org/wiki/OpenDocument) - OpenDocument for spreadsheets
@@ -573,6 +516,7 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
573
516
  - [`otp`](https://en.wikipedia.org/wiki/OpenDocument_technical_specification#Templates) - OpenDocument template for presentations
574
517
  - [`ots`](https://en.wikipedia.org/wiki/OpenDocument_technical_specification#Templates) - OpenDocument template for spreadsheets
575
518
  - [`ott`](https://en.wikipedia.org/wiki/OpenDocument_technical_specification#Templates) - OpenDocument template for word processing
519
+ - [`pages`](https://en.wikipedia.org/wiki/Pages_(word_processor)) - Apple Pages document
576
520
  - [`parquet`](https://en.wikipedia.org/wiki/Apache_Parquet) - Apache Parquet
577
521
  - [`pcap`](https://wiki.wireshark.org/Development/LibpcapFileFormat) - Libpcap File Format
578
522
  - [`pdf`](https://en.wikipedia.org/wiki/Portable_Document_Format) - Portable Document Format
@@ -584,7 +528,7 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
584
528
  - [`ppsx`](https://en.wikipedia.org/wiki/List_of_Microsoft_Office_filename_extensions#PowerPoint) - Office PowerPoint 2007 slide show
585
529
  - [`pptm`](https://en.wikipedia.org/wiki/List_of_Microsoft_Office_filename_extensions) - Microsoft PowerPoint macro-enabled document
586
530
  - [`pptx`](https://en.wikipedia.org/wiki/Office_Open_XML) - Microsoft PowerPoint document
587
- - [`ps`](https://en.wikipedia.org/wiki/Postscript) - Postscript
531
+ - [`ps`](https://en.wikipedia.org/wiki/Postscript) - PostScript
588
532
  - [`psd`](https://en.wikipedia.org/wiki/Adobe_Photoshop#File_format) - Adobe Photoshop document
589
533
  - [`pst`](https://en.wikipedia.org/wiki/Personal_Storage_Table) - Personal Storage Table file
590
534
  - [`qcp`](https://en.wikipedia.org/wiki/QCP) - Tagged and chunked data
@@ -601,7 +545,7 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
601
545
  - [`skp`](https://en.wikipedia.org/wiki/SketchUp) - SketchUp
602
546
  - [`spx`](https://en.wikipedia.org/wiki/Ogg) - Audio file
603
547
  - [`sqlite`](https://www.sqlite.org/fileformat2.html) - SQLite file
604
- - [`stl`](https://en.wikipedia.org/wiki/STL_(file_format)) - Standard Tesselated Geometry File Format (ASCII only)
548
+ - [`stl`](https://en.wikipedia.org/wiki/STL_(file_format)) - Standard Tessellated Geometry File Format (ASCII only)
605
549
  - [`swf`](https://en.wikipedia.org/wiki/SWF) - Adobe Flash Player file
606
550
  - [`tar`](https://en.wikipedia.org/wiki/Tar_(computing)#File_format) - Tape archive or tarball
607
551
  - [`tar.gz`](https://en.wikipedia.org/wiki/Gzip) - Gzipped tape archive (tarball)
@@ -633,29 +577,15 @@ abortController.abort(); // Abort file-type reading from the Blob stream.
633
577
 
634
578
  *[Pull requests](.github/pull_request_template.md) are welcome for additional commonly used file types.*
635
579
 
636
- The following file types will not be accepted, but most of them are supported by [third-party detector](#available-third-party-file-type-detectors)
580
+ The following file types will not be accepted, but most of them are supported by [third-party detectors](#available-third-party-file-type-detectors).
637
581
  - [MS-CFB: Microsoft Compound File Binary File Format based formats](https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/53989ce4-7b05-4f8d-829b-d08d6148375b)
638
582
  - `.doc` - Microsoft Word 97-2003 Document
639
583
  - `.xls` - Microsoft Excel 97-2003 Document
640
- - `.ppt` - Microsoft PowerPoint97-2003 Document
584
+ - `.ppt` - Microsoft PowerPoint 97-2003 Document
641
585
  - `.msi` - Microsoft Windows Installer
642
586
  - `.csv` - [Reason.](https://github.com/sindresorhus/file-type/issues/264#issuecomment-568439196)
643
587
  - `.svg`
644
588
 
645
- #### tokenizer
646
-
647
- Type: [`ITokenizer`](https://github.com/Borewit/strtok3#tokenizer)
648
-
649
- Usable as source of the examined file.
650
-
651
- #### fileType
652
-
653
- Type: `FileTypeResult`
654
-
655
- An object having an `ext` (extension) and `mime` (mime type) property.
656
-
657
- Detected by the standard detections or a previous custom detection. Undefined if no matching fileTypeResult could be found.
658
-
659
589
  ## Related
660
590
 
661
591
  - [file-type-cli](https://github.com/sindresorhus/file-type-cli) - CLI for this module
@@ -0,0 +1,127 @@
1
+ import * as Token from 'token-types';
2
+ import * as strtok3 from 'strtok3/core';
3
+ import {
4
+ maximumUntrustedSkipSizeInBytes,
5
+ ParserHardLimitError,
6
+ checkBytes,
7
+ safeReadBuffer,
8
+ safeIgnore,
9
+ hasUnknownFileSize,
10
+ hasExceededUnknownSizeScanBudget,
11
+ } from '../parser.js';
12
+
13
+ const maximumAsfHeaderObjectCount = 512;
14
+ const maximumAsfHeaderPayloadSizeInBytes = 1024 * 1024;
15
+
16
+ export async function detectAsf(tokenizer) {
17
+ let isMalformedAsf = false;
18
+ try {
19
+ async function readHeader() {
20
+ const guid = new Uint8Array(16);
21
+ await safeReadBuffer(tokenizer, guid, undefined, {
22
+ maximumLength: guid.length,
23
+ reason: 'ASF header GUID',
24
+ });
25
+ return {
26
+ id: guid,
27
+ size: Number(await tokenizer.readToken(Token.UINT64_LE)),
28
+ };
29
+ }
30
+
31
+ await safeIgnore(tokenizer, 30, {
32
+ maximumLength: 30,
33
+ reason: 'ASF header prelude',
34
+ });
35
+ const isUnknownFileSize = hasUnknownFileSize(tokenizer);
36
+ const asfHeaderScanStart = tokenizer.position;
37
+ let asfHeaderObjectCount = 0;
38
+ while (tokenizer.position + 24 < tokenizer.fileInfo.size) {
39
+ asfHeaderObjectCount++;
40
+ if (asfHeaderObjectCount > maximumAsfHeaderObjectCount) {
41
+ break;
42
+ }
43
+
44
+ if (hasExceededUnknownSizeScanBudget(tokenizer, asfHeaderScanStart, maximumUntrustedSkipSizeInBytes)) {
45
+ break;
46
+ }
47
+
48
+ const previousPosition = tokenizer.position;
49
+ const header = await readHeader();
50
+ let payload = header.size - 24;
51
+ if (
52
+ !Number.isFinite(payload)
53
+ || payload < 0
54
+ ) {
55
+ isMalformedAsf = true;
56
+ break;
57
+ }
58
+
59
+ if (checkBytes(header.id, [0x91, 0x07, 0xDC, 0xB7, 0xB7, 0xA9, 0xCF, 0x11, 0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65])) {
60
+ // Sync on Stream-Properties-Object (B7DC0791-A9B7-11CF-8EE6-00C00C205365)
61
+ const typeId = new Uint8Array(16);
62
+ payload -= await safeReadBuffer(tokenizer, typeId, undefined, {
63
+ maximumLength: typeId.length,
64
+ reason: 'ASF stream type GUID',
65
+ });
66
+
67
+ if (checkBytes(typeId, [0x40, 0x9E, 0x69, 0xF8, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
68
+ // Found audio:
69
+ return {
70
+ ext: 'asf',
71
+ mime: 'audio/x-ms-asf',
72
+ };
73
+ }
74
+
75
+ if (checkBytes(typeId, [0xC0, 0xEF, 0x19, 0xBC, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
76
+ // Found video:
77
+ return {
78
+ ext: 'asf',
79
+ mime: 'video/x-ms-asf',
80
+ };
81
+ }
82
+
83
+ break;
84
+ }
85
+
86
+ if (
87
+ isUnknownFileSize
88
+ && payload > maximumAsfHeaderPayloadSizeInBytes
89
+ ) {
90
+ isMalformedAsf = true;
91
+ break;
92
+ }
93
+
94
+ await safeIgnore(tokenizer, payload, {
95
+ maximumLength: isUnknownFileSize ? maximumAsfHeaderPayloadSizeInBytes : tokenizer.fileInfo.size,
96
+ reason: 'ASF header payload',
97
+ });
98
+
99
+ // Safeguard against malformed files: break if the position did not advance.
100
+ if (tokenizer.position <= previousPosition) {
101
+ isMalformedAsf = true;
102
+ break;
103
+ }
104
+ }
105
+ } catch (error) {
106
+ if (
107
+ error instanceof strtok3.EndOfStreamError
108
+ || error instanceof ParserHardLimitError
109
+ ) {
110
+ if (hasUnknownFileSize(tokenizer)) {
111
+ isMalformedAsf = true;
112
+ }
113
+ } else {
114
+ throw error;
115
+ }
116
+ }
117
+
118
+ if (isMalformedAsf) {
119
+ return;
120
+ }
121
+
122
+ // Default to ASF generic extension
123
+ return {
124
+ ext: 'asf',
125
+ mime: 'application/vnd.ms-asf',
126
+ };
127
+ }
@@ -0,0 +1,120 @@
1
+ import * as Token from 'token-types';
2
+ import {getUintBE} from 'uint8array-extras';
3
+ import {
4
+ maximumUntrustedSkipSizeInBytes,
5
+ getSafeBound,
6
+ safeReadBuffer,
7
+ safeIgnore,
8
+ hasUnknownFileSize,
9
+ hasExceededUnknownSizeScanBudget,
10
+ } from '../parser.js';
11
+
12
+ const maximumEbmlDocumentTypeSizeInBytes = 64;
13
+ const maximumEbmlElementPayloadSizeInBytes = 1024 * 1024;
14
+ const maximumEbmlElementCount = 256;
15
+
16
+ export async function detectEbml(tokenizer) {
17
+ async function readField() {
18
+ const msb = await tokenizer.peekNumber(Token.UINT8);
19
+ let mask = 0x80;
20
+ let ic = 0; // 0 = A, 1 = B, 2 = C, 3 = D
21
+
22
+ while ((msb & mask) === 0 && mask !== 0) {
23
+ ++ic;
24
+ mask >>= 1;
25
+ }
26
+
27
+ const id = new Uint8Array(ic + 1);
28
+ await safeReadBuffer(tokenizer, id, undefined, {
29
+ maximumLength: id.length,
30
+ reason: 'EBML field',
31
+ });
32
+ return id;
33
+ }
34
+
35
+ async function readElement() {
36
+ const idField = await readField();
37
+ const lengthField = await readField();
38
+
39
+ lengthField[0] ^= 0x80 >> (lengthField.length - 1);
40
+ const nrLength = Math.min(6, lengthField.length); // JavaScript can max read 6 bytes integer
41
+
42
+ const idView = new DataView(idField.buffer);
43
+ const lengthView = new DataView(lengthField.buffer, lengthField.length - nrLength, nrLength);
44
+
45
+ return {
46
+ id: getUintBE(idView),
47
+ len: getUintBE(lengthView),
48
+ };
49
+ }
50
+
51
+ async function readChildren(children) {
52
+ let ebmlElementCount = 0;
53
+ while (children > 0) {
54
+ ebmlElementCount++;
55
+ if (ebmlElementCount > maximumEbmlElementCount) {
56
+ return;
57
+ }
58
+
59
+ if (hasExceededUnknownSizeScanBudget(tokenizer, ebmlScanStart, maximumUntrustedSkipSizeInBytes)) {
60
+ return;
61
+ }
62
+
63
+ const previousPosition = tokenizer.position;
64
+ const element = await readElement();
65
+
66
+ if (element.id === 0x42_82) {
67
+ // `DocType` is a short string ("webm", "matroska", ...), reject implausible lengths to avoid large allocations.
68
+ if (element.len > maximumEbmlDocumentTypeSizeInBytes) {
69
+ return;
70
+ }
71
+
72
+ const documentTypeLength = getSafeBound(element.len, maximumEbmlDocumentTypeSizeInBytes, 'EBML DocType');
73
+ const rawValue = await tokenizer.readToken(new Token.StringType(documentTypeLength));
74
+ return rawValue.replaceAll(/\0.*$/gv, ''); // Return DocType
75
+ }
76
+
77
+ if (
78
+ hasUnknownFileSize(tokenizer)
79
+ && (
80
+ !Number.isFinite(element.len)
81
+ || element.len < 0
82
+ || element.len > maximumEbmlElementPayloadSizeInBytes
83
+ )
84
+ ) {
85
+ return;
86
+ }
87
+
88
+ await safeIgnore(tokenizer, element.len, {
89
+ maximumLength: hasUnknownFileSize(tokenizer) ? maximumEbmlElementPayloadSizeInBytes : tokenizer.fileInfo.size,
90
+ reason: 'EBML payload',
91
+ }); // ignore payload
92
+ --children;
93
+
94
+ // Safeguard against malformed files: bail if the position did not advance.
95
+ if (tokenizer.position <= previousPosition) {
96
+ return;
97
+ }
98
+ }
99
+ }
100
+
101
+ const rootElement = await readElement();
102
+ const ebmlScanStart = tokenizer.position;
103
+ const documentType = await readChildren(rootElement.len);
104
+
105
+ switch (documentType) {
106
+ case 'webm':
107
+ return {
108
+ ext: 'webm',
109
+ mime: 'video/webm',
110
+ };
111
+
112
+ case 'matroska':
113
+ return {
114
+ ext: 'mkv',
115
+ mime: 'video/matroska',
116
+ };
117
+
118
+ default:
119
+ }
120
+ }