file-type 21.3.4 → 22.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,120 @@
1
+ import * as Token from 'token-types';
2
+ import {getUintBE} from 'uint8array-extras';
3
+ import {
4
+ maximumUntrustedSkipSizeInBytes,
5
+ getSafeBound,
6
+ safeReadBuffer,
7
+ safeIgnore,
8
+ hasUnknownFileSize,
9
+ hasExceededUnknownSizeScanBudget,
10
+ } from '../parser.js';
11
+
12
+ const maximumEbmlDocumentTypeSizeInBytes = 64;
13
+ const maximumEbmlElementPayloadSizeInBytes = 1024 * 1024;
14
+ const maximumEbmlElementCount = 256;
15
+
16
+ export async function detectEbml(tokenizer) {
17
+ async function readField() {
18
+ const msb = await tokenizer.peekNumber(Token.UINT8);
19
+ let mask = 0x80;
20
+ let ic = 0; // 0 = A, 1 = B, 2 = C, 3 = D
21
+
22
+ while ((msb & mask) === 0 && mask !== 0) {
23
+ ++ic;
24
+ mask >>= 1;
25
+ }
26
+
27
+ const id = new Uint8Array(ic + 1);
28
+ await safeReadBuffer(tokenizer, id, undefined, {
29
+ maximumLength: id.length,
30
+ reason: 'EBML field',
31
+ });
32
+ return id;
33
+ }
34
+
35
+ async function readElement() {
36
+ const idField = await readField();
37
+ const lengthField = await readField();
38
+
39
+ lengthField[0] ^= 0x80 >> (lengthField.length - 1);
40
+ const nrLength = Math.min(6, lengthField.length); // JavaScript can max read 6 bytes integer
41
+
42
+ const idView = new DataView(idField.buffer);
43
+ const lengthView = new DataView(lengthField.buffer, lengthField.length - nrLength, nrLength);
44
+
45
+ return {
46
+ id: getUintBE(idView),
47
+ len: getUintBE(lengthView),
48
+ };
49
+ }
50
+
51
+ async function readChildren(children) {
52
+ let ebmlElementCount = 0;
53
+ while (children > 0) {
54
+ ebmlElementCount++;
55
+ if (ebmlElementCount > maximumEbmlElementCount) {
56
+ return;
57
+ }
58
+
59
+ if (hasExceededUnknownSizeScanBudget(tokenizer, ebmlScanStart, maximumUntrustedSkipSizeInBytes)) {
60
+ return;
61
+ }
62
+
63
+ const previousPosition = tokenizer.position;
64
+ const element = await readElement();
65
+
66
+ if (element.id === 0x42_82) {
67
+ // `DocType` is a short string ("webm", "matroska", ...), reject implausible lengths to avoid large allocations.
68
+ if (element.len > maximumEbmlDocumentTypeSizeInBytes) {
69
+ return;
70
+ }
71
+
72
+ const documentTypeLength = getSafeBound(element.len, maximumEbmlDocumentTypeSizeInBytes, 'EBML DocType');
73
+ const rawValue = await tokenizer.readToken(new Token.StringType(documentTypeLength));
74
+ return rawValue.replaceAll(/\0.*$/gv, ''); // Return DocType
75
+ }
76
+
77
+ if (
78
+ hasUnknownFileSize(tokenizer)
79
+ && (
80
+ !Number.isFinite(element.len)
81
+ || element.len < 0
82
+ || element.len > maximumEbmlElementPayloadSizeInBytes
83
+ )
84
+ ) {
85
+ return;
86
+ }
87
+
88
+ await safeIgnore(tokenizer, element.len, {
89
+ maximumLength: hasUnknownFileSize(tokenizer) ? maximumEbmlElementPayloadSizeInBytes : tokenizer.fileInfo.size,
90
+ reason: 'EBML payload',
91
+ }); // ignore payload
92
+ --children;
93
+
94
+ // Safeguard against malformed files: bail if the position did not advance.
95
+ if (tokenizer.position <= previousPosition) {
96
+ return;
97
+ }
98
+ }
99
+ }
100
+
101
+ const rootElement = await readElement();
102
+ const ebmlScanStart = tokenizer.position;
103
+ const documentType = await readChildren(rootElement.len);
104
+
105
+ switch (documentType) {
106
+ case 'webm':
107
+ return {
108
+ ext: 'webm',
109
+ mime: 'video/webm',
110
+ };
111
+
112
+ case 'matroska':
113
+ return {
114
+ ext: 'mkv',
115
+ mime: 'video/matroska',
116
+ };
117
+
118
+ default:
119
+ }
120
+ }
@@ -0,0 +1,123 @@
1
+ import * as Token from 'token-types';
2
+ import * as strtok3 from 'strtok3/core';
3
+ import {
4
+ ParserHardLimitError,
5
+ safeIgnore,
6
+ hasUnknownFileSize,
7
+ hasExceededUnknownSizeScanBudget,
8
+ } from '../parser.js';
9
+
10
+ const maximumPngChunkCount = 512;
11
+ const maximumPngStreamScanBudgetInBytes = 16 * 1024 * 1024;
12
+ const maximumPngChunkSizeInBytes = 1024 * 1024;
13
+
14
+ function isPngAncillaryChunk(type) {
15
+ return (type.codePointAt(0) & 0x20) !== 0;
16
+ }
17
+
18
+ export async function detectPng(tokenizer) {
19
+ const pngFileType = {
20
+ ext: 'png',
21
+ mime: 'image/png',
22
+ };
23
+
24
+ const apngFileType = {
25
+ ext: 'apng',
26
+ mime: 'image/apng',
27
+ };
28
+
29
+ // APNG format (https://wiki.mozilla.org/APNG_Specification)
30
+ // 1. Find the first IDAT (image data) chunk (49 44 41 54)
31
+ // 2. Check if there is an "acTL" chunk before the IDAT one (61 63 54 4C)
32
+
33
+ // Offset calculated as follows:
34
+ // - 8 bytes: PNG signature
35
+ // - 4 (length) + 4 (chunk type) + 13 (chunk data) + 4 (CRC): IHDR chunk
36
+
37
+ await tokenizer.ignore(8); // ignore PNG signature
38
+
39
+ async function readChunkHeader() {
40
+ return {
41
+ length: await tokenizer.readToken(Token.INT32_BE),
42
+ type: await tokenizer.readToken(new Token.StringType(4, 'latin1')),
43
+ };
44
+ }
45
+
46
+ const isUnknownPngStream = hasUnknownFileSize(tokenizer);
47
+ const pngScanStart = tokenizer.position;
48
+ let pngChunkCount = 0;
49
+ let hasSeenImageHeader = false;
50
+ do {
51
+ pngChunkCount++;
52
+ if (pngChunkCount > maximumPngChunkCount) {
53
+ break;
54
+ }
55
+
56
+ if (hasExceededUnknownSizeScanBudget(tokenizer, pngScanStart, maximumPngStreamScanBudgetInBytes)) {
57
+ break;
58
+ }
59
+
60
+ const previousPosition = tokenizer.position;
61
+ const chunk = await readChunkHeader();
62
+ if (chunk.length < 0) {
63
+ return; // Invalid chunk length
64
+ }
65
+
66
+ if (chunk.type === 'IHDR') {
67
+ // PNG requires the first real image header to be a 13-byte IHDR chunk.
68
+ if (chunk.length !== 13) {
69
+ return;
70
+ }
71
+
72
+ hasSeenImageHeader = true;
73
+ }
74
+
75
+ switch (chunk.type) {
76
+ case 'IDAT':
77
+ return pngFileType;
78
+ case 'acTL':
79
+ return apngFileType;
80
+ default:
81
+ if (
82
+ !hasSeenImageHeader
83
+ && chunk.type !== 'CgBI'
84
+ ) {
85
+ return;
86
+ }
87
+
88
+ if (
89
+ isUnknownPngStream
90
+ && chunk.length > maximumPngChunkSizeInBytes
91
+ ) {
92
+ // Avoid huge attacker-controlled skips when probing unknown-size streams.
93
+ return hasSeenImageHeader && isPngAncillaryChunk(chunk.type) ? pngFileType : undefined;
94
+ }
95
+
96
+ try {
97
+ await safeIgnore(tokenizer, chunk.length + 4, {
98
+ maximumLength: isUnknownPngStream ? maximumPngChunkSizeInBytes + 4 : tokenizer.fileInfo.size,
99
+ reason: 'PNG chunk payload',
100
+ }); // Ignore chunk-data + CRC
101
+ } catch (error) {
102
+ if (
103
+ !isUnknownPngStream
104
+ && (
105
+ error instanceof ParserHardLimitError
106
+ || error instanceof strtok3.EndOfStreamError
107
+ )
108
+ ) {
109
+ return pngFileType;
110
+ }
111
+
112
+ throw error;
113
+ }
114
+ }
115
+
116
+ // Safeguard against malformed files: bail if the position did not advance.
117
+ if (tokenizer.position <= previousPosition) {
118
+ break;
119
+ }
120
+ } while (tokenizer.position + 8 < tokenizer.fileInfo.size);
121
+
122
+ return pngFileType;
123
+ }