llonebot-dist 7.11.0 → 7.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/llbot.js +38149 -71365
  2. package/llbot.js.map +1 -1
  3. package/node_modules/file-type/package.json +36 -53
  4. package/node_modules/file-type/readme.md +35 -102
  5. package/node_modules/file-type/source/detectors/asf.js +127 -0
  6. package/node_modules/file-type/source/detectors/ebml.js +120 -0
  7. package/node_modules/file-type/source/detectors/png.js +123 -0
  8. package/node_modules/file-type/source/detectors/zip.js +643 -0
  9. package/node_modules/file-type/{core.d.ts → source/index.d.ts} +49 -22
  10. package/node_modules/file-type/{core.js → source/index.js} +253 -1056
  11. package/node_modules/file-type/source/index.test-d.ts +53 -0
  12. package/node_modules/file-type/source/parser.js +65 -0
  13. package/node_modules/file-type/{supported.js → source/supported.js} +14 -6
  14. package/node_modules/file-type/{util.js → source/tokens.js} +2 -2
  15. package/node_modules/strtok3/LICENSE.txt +1 -1
  16. package/node_modules/strtok3/README.md +2 -2
  17. package/node_modules/strtok3/lib/AbstractTokenizer.d.ts +1 -1
  18. package/node_modules/strtok3/lib/AbstractTokenizer.js +4 -1
  19. package/node_modules/strtok3/lib/ReadStreamTokenizer.d.ts +3 -0
  20. package/node_modules/strtok3/lib/ReadStreamTokenizer.js +6 -1
  21. package/node_modules/strtok3/lib/stream/WebStreamByobReader.js +1 -1
  22. package/node_modules/strtok3/package.json +8 -8
  23. package/node_modules/ws/index.js +15 -6
  24. package/node_modules/ws/lib/permessage-deflate.js +6 -6
  25. package/node_modules/ws/lib/websocket-server.js +5 -5
  26. package/node_modules/ws/lib/websocket.js +6 -6
  27. package/node_modules/ws/package.json +4 -3
  28. package/node_modules/ws/wrapper.mjs +14 -1
  29. package/package.json +1 -1
  30. package/webui/assets/index-BkP41fNe.js +37 -0
  31. package/webui/assets/{index-B6wi2XZx.css → index-DsGxgscs.css} +1 -1
  32. package/webui/index.html +2 -2
  33. package//346/233/264/346/226/260/346/227/245/345/277/227.txt +42 -2
  34. package/node_modules/file-type/index.d.ts +0 -98
  35. package/node_modules/file-type/index.js +0 -110
  36. package/webui/assets/index-DwQjH3d6.js +0 -37
@@ -4,278 +4,91 @@ Primary entry point, Node.js specific entry point is index.js
4
4
 
5
5
  import * as Token from 'token-types';
6
6
  import * as strtok3 from 'strtok3/core';
7
- import {ZipHandler, GzipHandler} from '@tokenizer/inflate';
8
- import {getUintBE} from 'uint8array-extras';
7
+ import {GzipHandler} from '@tokenizer/inflate';
8
+ import {concatUint8Arrays} from 'uint8array-extras';
9
9
  import {
10
10
  stringToBytes,
11
11
  tarHeaderChecksumMatches,
12
12
  uint32SyncSafeToken,
13
- } from './util.js';
13
+ } from './tokens.js';
14
14
  import {extensions, mimeTypes} from './supported.js';
15
+ import {
16
+ maximumUntrustedSkipSizeInBytes,
17
+ ParserHardLimitError,
18
+ safeIgnore,
19
+ checkBytes,
20
+ hasUnknownFileSize,
21
+ } from './parser.js';
22
+ import {detectZip} from './detectors/zip.js';
23
+ import {detectEbml} from './detectors/ebml.js';
24
+ import {detectPng} from './detectors/png.js';
25
+ import {detectAsf} from './detectors/asf.js';
15
26
 
16
27
  export const reasonableDetectionSizeInBytes = 4100; // A fair amount of file-types are detectable within this range.
17
- // Keep defensive limits small enough to avoid accidental memory spikes from untrusted inputs.
18
28
  const maximumMpegOffsetTolerance = reasonableDetectionSizeInBytes - 2;
19
- const maximumZipEntrySizeInBytes = 1024 * 1024;
20
- const maximumZipEntryCount = 1024;
21
- const maximumZipBufferedReadSizeInBytes = (2 ** 31) - 1;
22
- const maximumUntrustedSkipSizeInBytes = 16 * 1024 * 1024;
23
- const maximumZipTextEntrySizeInBytes = maximumZipEntrySizeInBytes;
24
29
  const maximumNestedGzipDetectionSizeInBytes = maximumUntrustedSkipSizeInBytes;
25
30
  const maximumNestedGzipProbeDepth = 1;
31
+ const unknownSizeGzipProbeTimeoutInMilliseconds = 100;
26
32
  const maximumId3HeaderSizeInBytes = maximumUntrustedSkipSizeInBytes;
27
- const maximumEbmlDocumentTypeSizeInBytes = 64;
28
- const maximumEbmlElementPayloadSizeInBytes = maximumUntrustedSkipSizeInBytes;
29
- const maximumEbmlElementCount = 256;
30
- const maximumPngChunkCount = 512;
31
- const maximumAsfHeaderObjectCount = 512;
32
33
  const maximumTiffTagCount = 512;
33
34
  const maximumDetectionReentryCount = 256;
34
- const maximumPngChunkSizeInBytes = maximumUntrustedSkipSizeInBytes;
35
+ const maximumTiffStreamIfdOffsetInBytes = 1024 * 1024;
35
36
  const maximumTiffIfdOffsetInBytes = maximumUntrustedSkipSizeInBytes;
36
- const recoverableZipErrorMessages = new Set([
37
- 'Unexpected signature',
38
- 'Encrypted ZIP',
39
- 'Expected Central-File-Header signature',
40
- ]);
41
- const recoverableZipErrorMessagePrefixes = [
42
- 'ZIP entry count exceeds ',
43
- 'Unsupported ZIP compression method:',
44
- 'ZIP entry compressed data exceeds ',
45
- 'ZIP entry decompressed data exceeds ',
46
- ];
47
- const recoverableZipErrorCodes = new Set([
48
- 'Z_BUF_ERROR',
49
- 'Z_DATA_ERROR',
50
- 'ERR_INVALID_STATE',
51
- ]);
52
-
53
- class ParserHardLimitError extends Error {}
54
-
55
- function getSafeBound(value, maximum, reason) {
56
- if (
57
- !Number.isFinite(value)
58
- || value < 0
59
- || value > maximum
60
- ) {
61
- throw new ParserHardLimitError(`${reason} has invalid size ${value} (maximum ${maximum} bytes)`);
62
- }
63
-
64
- return value;
65
- }
66
-
67
- async function safeIgnore(tokenizer, length, {maximumLength = maximumUntrustedSkipSizeInBytes, reason = 'skip'} = {}) {
68
- const safeLength = getSafeBound(length, maximumLength, reason);
69
- await tokenizer.ignore(safeLength);
70
- }
71
-
72
- async function safeReadBuffer(tokenizer, buffer, options, {maximumLength = buffer.length, reason = 'read'} = {}) {
73
- const length = options?.length ?? buffer.length;
74
- const safeLength = getSafeBound(length, maximumLength, reason);
75
- return tokenizer.readBuffer(buffer, {
76
- ...options,
77
- length: safeLength,
78
- });
79
- }
80
-
81
- async function decompressDeflateRawWithLimit(data, {maximumLength = maximumZipEntrySizeInBytes} = {}) {
82
- const input = new ReadableStream({
83
- start(controller) {
84
- controller.enqueue(data);
85
- controller.close();
86
- },
87
- });
88
- const output = input.pipeThrough(new DecompressionStream('deflate-raw'));
89
- const reader = output.getReader();
90
- const chunks = [];
91
- let totalLength = 0;
92
-
93
- try {
94
- for (;;) {
95
- const {done, value} = await reader.read();
96
- if (done) {
97
- break;
98
- }
99
-
100
- totalLength += value.length;
101
- if (totalLength > maximumLength) {
102
- await reader.cancel();
103
- throw new Error(`ZIP entry decompressed data exceeds ${maximumLength} bytes`);
104
- }
105
37
 
106
- chunks.push(value);
107
- }
108
- } finally {
109
- reader.releaseLock();
110
- }
111
-
112
- const uncompressedData = new Uint8Array(totalLength);
113
- let offset = 0;
114
- for (const chunk of chunks) {
115
- uncompressedData.set(chunk, offset);
116
- offset += chunk.length;
38
+ export function normalizeSampleSize(sampleSize) {
39
+ // `sampleSize` is an explicit caller-controlled tuning knob, not untrusted file input.
40
+ // Preserve valid caller-requested probe depth here; applications must bound attacker-derived option values themselves.
41
+ if (!Number.isFinite(sampleSize)) {
42
+ return reasonableDetectionSizeInBytes;
117
43
  }
118
44
 
119
- return uncompressedData;
45
+ return Math.max(1, Math.trunc(sampleSize));
120
46
  }
121
47
 
122
- const zipDataDescriptorSignature = 0x08_07_4B_50;
123
- const zipDataDescriptorLengthInBytes = 16;
124
- const zipDataDescriptorOverlapLengthInBytes = zipDataDescriptorLengthInBytes - 1;
125
-
126
- function findZipDataDescriptorOffset(buffer, bytesConsumed) {
127
- if (buffer.length < zipDataDescriptorLengthInBytes) {
128
- return -1;
129
- }
130
-
131
- const lastPossibleDescriptorOffset = buffer.length - zipDataDescriptorLengthInBytes;
132
- for (let index = 0; index <= lastPossibleDescriptorOffset; index++) {
133
- if (
134
- Token.UINT32_LE.get(buffer, index) === zipDataDescriptorSignature
135
- && Token.UINT32_LE.get(buffer, index + 8) === bytesConsumed + index
136
- ) {
137
- return index;
138
- }
48
+ function normalizeMpegOffsetTolerance(mpegOffsetTolerance) {
49
+ // This value controls scan depth and therefore worst-case CPU work.
50
+ if (!Number.isFinite(mpegOffsetTolerance)) {
51
+ return 0;
139
52
  }
140
53
 
141
- return -1;
54
+ return Math.max(0, Math.min(maximumMpegOffsetTolerance, Math.trunc(mpegOffsetTolerance)));
142
55
  }
143
56
 
144
- function mergeByteChunks(chunks, totalLength) {
145
- const merged = new Uint8Array(totalLength);
146
- let offset = 0;
147
-
148
- for (const chunk of chunks) {
149
- merged.set(chunk, offset);
150
- offset += chunk.length;
57
+ function getKnownFileSizeOrMaximum(fileSize) {
58
+ if (!Number.isFinite(fileSize)) {
59
+ return Number.MAX_SAFE_INTEGER;
151
60
  }
152
61
 
153
- return merged;
62
+ return Math.max(0, fileSize);
154
63
  }
155
64
 
156
- async function readZipDataDescriptorEntryWithLimit(zipHandler, {shouldBuffer, maximumLength = maximumZipEntrySizeInBytes} = {}) {
157
- const {syncBuffer} = zipHandler;
158
- const {length: syncBufferLength} = syncBuffer;
159
- const chunks = [];
160
- let bytesConsumed = 0;
161
-
162
- for (;;) {
163
- const length = await zipHandler.tokenizer.peekBuffer(syncBuffer, {mayBeLess: true});
164
- const dataDescriptorOffset = findZipDataDescriptorOffset(syncBuffer.subarray(0, length), bytesConsumed);
165
- const retainedLength = dataDescriptorOffset >= 0
166
- ? 0
167
- : (
168
- length === syncBufferLength
169
- ? Math.min(zipDataDescriptorOverlapLengthInBytes, length - 1)
170
- : 0
171
- );
172
- const chunkLength = dataDescriptorOffset >= 0 ? dataDescriptorOffset : length - retainedLength;
173
-
174
- if (chunkLength === 0) {
175
- break;
176
- }
177
-
178
- bytesConsumed += chunkLength;
179
- if (bytesConsumed > maximumLength) {
180
- throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
181
- }
182
-
183
- if (shouldBuffer) {
184
- const data = new Uint8Array(chunkLength);
185
- await zipHandler.tokenizer.readBuffer(data);
186
- chunks.push(data);
187
- } else {
188
- await zipHandler.tokenizer.ignore(chunkLength);
189
- }
190
-
191
- if (dataDescriptorOffset >= 0) {
192
- break;
193
- }
194
- }
195
-
196
- if (!shouldBuffer) {
197
- return;
198
- }
199
-
200
- return mergeByteChunks(chunks, bytesConsumed);
65
+ // Wrap stream in an identity TransformStream to avoid BYOB readers.
66
+ // Node.js has a bug where calling controller.close() inside a BYOB stream's
67
+ // pull() callback does not resolve pending reader.read() calls, causing
68
+ // permanent hangs on streams shorter than the requested read size.
69
+ // Using a default (non-BYOB) reader via TransformStream avoids this.
70
+ function toDefaultStream(stream) {
71
+ return stream.pipeThrough(new TransformStream());
201
72
  }
202
73
 
203
- async function readZipEntryData(zipHandler, zipHeader, {shouldBuffer} = {}) {
204
- if (
205
- zipHeader.dataDescriptor
206
- && zipHeader.compressedSize === 0
207
- ) {
208
- return readZipDataDescriptorEntryWithLimit(zipHandler, {shouldBuffer});
209
- }
210
-
211
- if (!shouldBuffer) {
212
- await zipHandler.tokenizer.ignore(zipHeader.compressedSize);
213
- return;
214
- }
215
-
216
- const maximumLength = getMaximumZipBufferedReadLength(zipHandler.tokenizer);
217
- if (
218
- !Number.isFinite(zipHeader.compressedSize)
219
- || zipHeader.compressedSize < 0
220
- || zipHeader.compressedSize > maximumLength
221
- ) {
222
- throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
74
+ function readWithSignal(reader, signal) {
75
+ if (signal === undefined) {
76
+ return reader.read();
223
77
  }
224
78
 
225
- const fileData = new Uint8Array(zipHeader.compressedSize);
226
- await zipHandler.tokenizer.readBuffer(fileData);
227
- return fileData;
79
+ signal.throwIfAborted();
80
+
81
+ return Promise.race([
82
+ reader.read(),
83
+ new Promise((_resolve, reject) => {
84
+ signal.addEventListener('abort', () => {
85
+ reject(signal.reason);
86
+ reader.cancel(signal.reason).catch(() => {});
87
+ }, {once: true});
88
+ }),
89
+ ]);
228
90
  }
229
91
 
230
- // Override the default inflate to enforce decompression size limits, since @tokenizer/inflate does not expose a configuration hook for this.
231
- ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
232
- if (zipHeader.compressedMethod === 0) {
233
- return callback(fileData);
234
- }
235
-
236
- if (zipHeader.compressedMethod !== 8) {
237
- throw new Error(`Unsupported ZIP compression method: ${zipHeader.compressedMethod}`);
238
- }
239
-
240
- const uncompressedData = await decompressDeflateRawWithLimit(fileData, {maximumLength: maximumZipEntrySizeInBytes});
241
- return callback(uncompressedData);
242
- };
243
-
244
- ZipHandler.prototype.unzip = async function (fileCallback) {
245
- let stop = false;
246
- let zipEntryCount = 0;
247
- do {
248
- const zipHeader = await this.readLocalFileHeader();
249
- if (!zipHeader) {
250
- break;
251
- }
252
-
253
- zipEntryCount++;
254
- if (zipEntryCount > maximumZipEntryCount) {
255
- throw new Error(`ZIP entry count exceeds ${maximumZipEntryCount}`);
256
- }
257
-
258
- const next = fileCallback(zipHeader);
259
- stop = Boolean(next.stop);
260
- await this.tokenizer.ignore(zipHeader.extraFieldLength);
261
- const fileData = await readZipEntryData(this, zipHeader, {
262
- shouldBuffer: Boolean(next.handler),
263
- });
264
-
265
- if (next.handler) {
266
- await this.inflate(zipHeader, fileData, next.handler);
267
- }
268
-
269
- if (zipHeader.dataDescriptor) {
270
- const dataDescriptor = new Uint8Array(zipDataDescriptorLengthInBytes);
271
- await this.tokenizer.readBuffer(dataDescriptor);
272
- if (Token.UINT32_LE.get(dataDescriptor, 0) !== zipDataDescriptorSignature) {
273
- throw new Error(`Expected data-descriptor-signature at position ${this.tokenizer.position - dataDescriptor.length}`);
274
- }
275
- }
276
- } while (!stop);
277
- };
278
-
279
92
  function createByteLimitedReadableStream(stream, maximumBytes) {
280
93
  const reader = stream.getReader();
281
94
  let emittedBytes = 0;
@@ -342,348 +155,6 @@ export async function fileTypeFromBlob(blob, options) {
342
155
  return new FileTypeParser(options).fromBlob(blob);
343
156
  }
344
157
 
345
- function getFileTypeFromMimeType(mimeType) {
346
- mimeType = mimeType.toLowerCase();
347
- switch (mimeType) {
348
- case 'application/epub+zip':
349
- return {
350
- ext: 'epub',
351
- mime: mimeType,
352
- };
353
- case 'application/vnd.oasis.opendocument.text':
354
- return {
355
- ext: 'odt',
356
- mime: mimeType,
357
- };
358
- case 'application/vnd.oasis.opendocument.text-template':
359
- return {
360
- ext: 'ott',
361
- mime: mimeType,
362
- };
363
- case 'application/vnd.oasis.opendocument.spreadsheet':
364
- return {
365
- ext: 'ods',
366
- mime: mimeType,
367
- };
368
- case 'application/vnd.oasis.opendocument.spreadsheet-template':
369
- return {
370
- ext: 'ots',
371
- mime: mimeType,
372
- };
373
- case 'application/vnd.oasis.opendocument.presentation':
374
- return {
375
- ext: 'odp',
376
- mime: mimeType,
377
- };
378
- case 'application/vnd.oasis.opendocument.presentation-template':
379
- return {
380
- ext: 'otp',
381
- mime: mimeType,
382
- };
383
- case 'application/vnd.oasis.opendocument.graphics':
384
- return {
385
- ext: 'odg',
386
- mime: mimeType,
387
- };
388
- case 'application/vnd.oasis.opendocument.graphics-template':
389
- return {
390
- ext: 'otg',
391
- mime: mimeType,
392
- };
393
- case 'application/vnd.openxmlformats-officedocument.presentationml.slideshow':
394
- return {
395
- ext: 'ppsx',
396
- mime: mimeType,
397
- };
398
- case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
399
- return {
400
- ext: 'xlsx',
401
- mime: mimeType,
402
- };
403
- case 'application/vnd.ms-excel.sheet.macroenabled':
404
- return {
405
- ext: 'xlsm',
406
- mime: 'application/vnd.ms-excel.sheet.macroenabled.12',
407
- };
408
- case 'application/vnd.openxmlformats-officedocument.spreadsheetml.template':
409
- return {
410
- ext: 'xltx',
411
- mime: mimeType,
412
- };
413
- case 'application/vnd.ms-excel.template.macroenabled':
414
- return {
415
- ext: 'xltm',
416
- mime: 'application/vnd.ms-excel.template.macroenabled.12',
417
- };
418
- case 'application/vnd.ms-powerpoint.slideshow.macroenabled':
419
- return {
420
- ext: 'ppsm',
421
- mime: 'application/vnd.ms-powerpoint.slideshow.macroenabled.12',
422
- };
423
- case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
424
- return {
425
- ext: 'docx',
426
- mime: mimeType,
427
- };
428
- case 'application/vnd.ms-word.document.macroenabled':
429
- return {
430
- ext: 'docm',
431
- mime: 'application/vnd.ms-word.document.macroenabled.12',
432
- };
433
- case 'application/vnd.openxmlformats-officedocument.wordprocessingml.template':
434
- return {
435
- ext: 'dotx',
436
- mime: mimeType,
437
- };
438
- case 'application/vnd.ms-word.template.macroenabledtemplate':
439
- return {
440
- ext: 'dotm',
441
- mime: 'application/vnd.ms-word.template.macroenabled.12',
442
- };
443
- case 'application/vnd.openxmlformats-officedocument.presentationml.template':
444
- return {
445
- ext: 'potx',
446
- mime: mimeType,
447
- };
448
- case 'application/vnd.ms-powerpoint.template.macroenabled':
449
- return {
450
- ext: 'potm',
451
- mime: 'application/vnd.ms-powerpoint.template.macroenabled.12',
452
- };
453
- case 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
454
- return {
455
- ext: 'pptx',
456
- mime: mimeType,
457
- };
458
- case 'application/vnd.ms-powerpoint.presentation.macroenabled':
459
- return {
460
- ext: 'pptm',
461
- mime: 'application/vnd.ms-powerpoint.presentation.macroenabled.12',
462
- };
463
- case 'application/vnd.ms-visio.drawing':
464
- return {
465
- ext: 'vsdx',
466
- mime: 'application/vnd.visio',
467
- };
468
- case 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml':
469
- return {
470
- ext: '3mf',
471
- mime: 'model/3mf',
472
- };
473
- default:
474
- }
475
- }
476
-
477
- function _check(buffer, headers, options) {
478
- options = {
479
- offset: 0,
480
- ...options,
481
- };
482
-
483
- for (const [index, header] of headers.entries()) {
484
- // If a bitmask is set
485
- if (options.mask) {
486
- // If header doesn't equal `buf` with bits masked off
487
- if (header !== (options.mask[index] & buffer[index + options.offset])) {
488
- return false;
489
- }
490
- } else if (header !== buffer[index + options.offset]) {
491
- return false;
492
- }
493
- }
494
-
495
- return true;
496
- }
497
-
498
- export function normalizeSampleSize(sampleSize) {
499
- // Accept odd caller input, but preserve valid caller-requested probe depth.
500
- if (!Number.isFinite(sampleSize)) {
501
- return reasonableDetectionSizeInBytes;
502
- }
503
-
504
- return Math.max(1, Math.trunc(sampleSize));
505
- }
506
-
507
- function normalizeMpegOffsetTolerance(mpegOffsetTolerance) {
508
- // This value controls scan depth and therefore worst-case CPU work.
509
- if (!Number.isFinite(mpegOffsetTolerance)) {
510
- return 0;
511
- }
512
-
513
- return Math.max(0, Math.min(maximumMpegOffsetTolerance, Math.trunc(mpegOffsetTolerance)));
514
- }
515
-
516
- function getKnownFileSizeOrMaximum(fileSize) {
517
- if (!Number.isFinite(fileSize)) {
518
- return Number.MAX_SAFE_INTEGER;
519
- }
520
-
521
- return Math.max(0, fileSize);
522
- }
523
-
524
- function hasUnknownFileSize(tokenizer) {
525
- const fileSize = tokenizer.fileInfo.size;
526
- return (
527
- !Number.isFinite(fileSize)
528
- || fileSize === Number.MAX_SAFE_INTEGER
529
- );
530
- }
531
-
532
- function hasExceededUnknownSizeScanBudget(tokenizer, startOffset, maximumBytes) {
533
- return (
534
- hasUnknownFileSize(tokenizer)
535
- && tokenizer.position - startOffset > maximumBytes
536
- );
537
- }
538
-
539
- function getMaximumZipBufferedReadLength(tokenizer) {
540
- const fileSize = tokenizer.fileInfo.size;
541
- const remainingBytes = Number.isFinite(fileSize)
542
- ? Math.max(0, fileSize - tokenizer.position)
543
- : Number.MAX_SAFE_INTEGER;
544
-
545
- return Math.min(remainingBytes, maximumZipBufferedReadSizeInBytes);
546
- }
547
-
548
- function isRecoverableZipError(error) {
549
- if (error instanceof strtok3.EndOfStreamError) {
550
- return true;
551
- }
552
-
553
- if (error instanceof ParserHardLimitError) {
554
- return true;
555
- }
556
-
557
- if (!(error instanceof Error)) {
558
- return false;
559
- }
560
-
561
- if (recoverableZipErrorMessages.has(error.message)) {
562
- return true;
563
- }
564
-
565
- if (recoverableZipErrorCodes.has(error.code)) {
566
- return true;
567
- }
568
-
569
- for (const prefix of recoverableZipErrorMessagePrefixes) {
570
- if (error.message.startsWith(prefix)) {
571
- return true;
572
- }
573
- }
574
-
575
- return false;
576
- }
577
-
578
- function canReadZipEntryForDetection(zipHeader, maximumSize = maximumZipEntrySizeInBytes) {
579
- const sizes = [zipHeader.compressedSize, zipHeader.uncompressedSize];
580
- for (const size of sizes) {
581
- if (
582
- !Number.isFinite(size)
583
- || size < 0
584
- || size > maximumSize
585
- ) {
586
- return false;
587
- }
588
- }
589
-
590
- return true;
591
- }
592
-
593
- function createOpenXmlZipDetectionState() {
594
- return {
595
- hasContentTypesEntry: false,
596
- hasParsedContentTypesEntry: false,
597
- isParsingContentTypes: false,
598
- hasUnparseableContentTypes: false,
599
- hasWordDirectory: false,
600
- hasPresentationDirectory: false,
601
- hasSpreadsheetDirectory: false,
602
- hasThreeDimensionalModelEntry: false,
603
- };
604
- }
605
-
606
- function updateOpenXmlZipDetectionStateFromFilename(openXmlState, filename) {
607
- if (filename.startsWith('word/')) {
608
- openXmlState.hasWordDirectory = true;
609
- }
610
-
611
- if (filename.startsWith('ppt/')) {
612
- openXmlState.hasPresentationDirectory = true;
613
- }
614
-
615
- if (filename.startsWith('xl/')) {
616
- openXmlState.hasSpreadsheetDirectory = true;
617
- }
618
-
619
- if (
620
- filename.startsWith('3D/')
621
- && filename.endsWith('.model')
622
- ) {
623
- openXmlState.hasThreeDimensionalModelEntry = true;
624
- }
625
- }
626
-
627
- function getOpenXmlFileTypeFromZipEntries(openXmlState) {
628
- // Only use directory-name heuristic when [Content_Types].xml was present in the archive
629
- // but its handler was skipped (not invoked, not currently running, and not already resolved).
630
- // This avoids guessing from directory names when content-type parsing already gave a definitive answer or failed.
631
- if (
632
- !openXmlState.hasContentTypesEntry
633
- || openXmlState.hasUnparseableContentTypes
634
- || openXmlState.isParsingContentTypes
635
- || openXmlState.hasParsedContentTypesEntry
636
- ) {
637
- return;
638
- }
639
-
640
- if (openXmlState.hasWordDirectory) {
641
- return {
642
- ext: 'docx',
643
- mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
644
- };
645
- }
646
-
647
- if (openXmlState.hasPresentationDirectory) {
648
- return {
649
- ext: 'pptx',
650
- mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
651
- };
652
- }
653
-
654
- if (openXmlState.hasSpreadsheetDirectory) {
655
- return {
656
- ext: 'xlsx',
657
- mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
658
- };
659
- }
660
-
661
- if (openXmlState.hasThreeDimensionalModelEntry) {
662
- return {
663
- ext: '3mf',
664
- mime: 'model/3mf',
665
- };
666
- }
667
- }
668
-
669
- function getOpenXmlMimeTypeFromContentTypesXml(xmlContent) {
670
- // We only need the `ContentType="...main+xml"` value, so a small string scan is enough and avoids full XML parsing.
671
- const endPosition = xmlContent.indexOf('.main+xml"');
672
- if (endPosition === -1) {
673
- const mimeType = 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml';
674
- if (xmlContent.includes(`ContentType="${mimeType}"`)) {
675
- return mimeType;
676
- }
677
-
678
- return;
679
- }
680
-
681
- const truncatedContent = xmlContent.slice(0, endPosition);
682
- const firstQuotePosition = truncatedContent.lastIndexOf('"');
683
- // If no quote is found, `lastIndexOf` returns -1 and this intentionally falls back to the full truncated prefix.
684
- return truncatedContent.slice(firstQuotePosition + 1);
685
- }
686
-
687
158
  export async function fileTypeFromTokenizer(tokenizer, options) {
688
159
  return new FileTypeParser(options).fromTokenizer(tokenizer);
689
160
  }
@@ -715,7 +186,11 @@ export class FileTypeParser {
715
186
  };
716
187
  }
717
188
 
718
- async fromTokenizer(tokenizer, detectionReentryCount = 0) {
189
+ createTokenizerFromWebStream(stream) {
190
+ return strtok3.fromWebStream(toDefaultStream(stream), this.getTokenizerOptions());
191
+ }
192
+
193
+ async parseTokenizer(tokenizer, detectionReentryCount = 0) {
719
194
  this.detectionReentryCount = detectionReentryCount;
720
195
  const initialPosition = tokenizer.position;
721
196
  // Iterate through all file-type detectors
@@ -745,6 +220,14 @@ export class FileTypeParser {
745
220
  }
746
221
  }
747
222
 
223
+ async fromTokenizer(tokenizer) {
224
+ try {
225
+ return await this.parseTokenizer(tokenizer);
226
+ } finally {
227
+ await tokenizer.close();
228
+ }
229
+ }
230
+
748
231
  async fromBuffer(input) {
749
232
  if (!(input instanceof Uint8Array || input instanceof ArrayBuffer)) {
750
233
  throw new TypeError(`Expected the \`input\` argument to be of type \`Uint8Array\` or \`ArrayBuffer\`, got \`${typeof input}\``);
@@ -760,58 +243,107 @@ export class FileTypeParser {
760
243
  }
761
244
 
762
245
  async fromBlob(blob) {
246
+ this.options.signal?.throwIfAborted();
763
247
  const tokenizer = strtok3.fromBlob(blob, this.getTokenizerOptions());
764
- try {
765
- return await this.fromTokenizer(tokenizer);
766
- } finally {
767
- await tokenizer.close();
768
- }
248
+ return this.fromTokenizer(tokenizer);
769
249
  }
770
250
 
771
251
  async fromStream(stream) {
772
- const tokenizer = strtok3.fromWebStream(stream, this.getTokenizerOptions());
773
- try {
774
- return await this.fromTokenizer(tokenizer);
775
- } finally {
776
- await tokenizer.close();
252
+ this.options.signal?.throwIfAborted();
253
+ const tokenizer = this.createTokenizerFromWebStream(stream);
254
+ return this.fromTokenizer(tokenizer);
255
+ }
256
+
257
+ async fromFile(path) {
258
+ this.options.signal?.throwIfAborted();
259
+ // TODO: Remove this when `strtok3.fromFile()` safely rejects non-regular filesystem objects without a pathname race.
260
+ const [{default: fsPromises}, {FileTokenizer}] = await Promise.all([
261
+ import('node:fs/promises'),
262
+ import('strtok3'),
263
+ ]);
264
+ const fileHandle = await fsPromises.open(path, fsPromises.constants.O_RDONLY | fsPromises.constants.O_NONBLOCK);
265
+ const fileStat = await fileHandle.stat();
266
+ if (!fileStat.isFile()) {
267
+ await fileHandle.close();
268
+ return;
777
269
  }
270
+
271
+ const tokenizer = new FileTokenizer(fileHandle, {
272
+ ...this.getTokenizerOptions(),
273
+ fileInfo: {path, size: fileStat.size},
274
+ });
275
+ return this.fromTokenizer(tokenizer);
778
276
  }
779
277
 
780
278
  async toDetectionStream(stream, options) {
279
+ this.options.signal?.throwIfAborted();
781
280
  const sampleSize = normalizeSampleSize(options?.sampleSize ?? reasonableDetectionSizeInBytes);
782
281
  let detectedFileType;
783
- let firstChunk;
282
+ let streamEnded = false;
784
283
 
785
- const reader = stream.getReader({mode: 'byob'});
786
- try {
787
- // Read the first chunk from the stream
788
- const {value: chunk, done} = await reader.read(new Uint8Array(sampleSize));
789
- firstChunk = chunk;
790
- if (!done && chunk) {
791
- try {
792
- // Attempt to detect the file type from the chunk
793
- detectedFileType = await this.fromBuffer(chunk.subarray(0, sampleSize));
794
- } catch (error) {
795
- if (!(error instanceof strtok3.EndOfStreamError)) {
796
- throw error; // Re-throw non-EndOfStreamError
797
- }
284
+ const reader = stream.getReader();
285
+ const chunks = [];
286
+ let totalSize = 0;
798
287
 
799
- detectedFileType = undefined;
288
+ try {
289
+ while (totalSize < sampleSize) {
290
+ const {value, done} = await readWithSignal(reader, this.options.signal);
291
+ if (done || !value) {
292
+ streamEnded = true;
293
+ break;
800
294
  }
295
+
296
+ chunks.push(value);
297
+ totalSize += value.length;
801
298
  }
802
299
 
803
- firstChunk = chunk;
300
+ if (
301
+ !streamEnded
302
+ && totalSize === sampleSize
303
+ ) {
304
+ const {value, done} = await readWithSignal(reader, this.options.signal);
305
+ if (done || !value) {
306
+ streamEnded = true;
307
+ } else {
308
+ chunks.push(value);
309
+ totalSize += value.length;
310
+ }
311
+ }
804
312
  } finally {
805
- reader.releaseLock(); // Ensure the reader is released
313
+ reader.releaseLock();
806
314
  }
807
315
 
808
- // Create a new ReadableStream to manage locking issues
316
+ if (totalSize > 0) {
317
+ const sample = chunks.length === 1 ? chunks[0] : concatUint8Arrays(chunks);
318
+ try {
319
+ detectedFileType = await this.fromBuffer(sample.subarray(0, sampleSize));
320
+ } catch (error) {
321
+ if (!(error instanceof strtok3.EndOfStreamError)) {
322
+ throw error;
323
+ }
324
+
325
+ detectedFileType = undefined;
326
+ }
327
+
328
+ if (
329
+ !streamEnded
330
+ && detectedFileType?.ext === 'pages'
331
+ ) {
332
+ detectedFileType = {
333
+ ext: 'zip',
334
+ mime: 'application/zip',
335
+ };
336
+ }
337
+ }
338
+
339
+ // Prepend collected chunks and pipe the rest through
809
340
  const transformStream = new TransformStream({
810
- async start(controller) {
811
- controller.enqueue(firstChunk); // Enqueue the initial chunk
341
+ start(controller) {
342
+ for (const chunk of chunks) {
343
+ controller.enqueue(chunk);
344
+ }
812
345
  },
813
346
  transform(chunk, controller) {
814
- // Pass through the chunks without modification
815
347
  controller.enqueue(chunk);
816
348
  },
817
349
  });
@@ -822,8 +354,72 @@ export class FileTypeParser {
822
354
  return newStream;
823
355
  }
824
356
 
357
+ async detectGzip(tokenizer) {
358
+ if (this.gzipProbeDepth >= maximumNestedGzipProbeDepth) {
359
+ return {
360
+ ext: 'gz',
361
+ mime: 'application/gzip',
362
+ };
363
+ }
364
+
365
+ const gzipHandler = new GzipHandler(tokenizer);
366
+ const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes);
367
+ const hasUnknownSize = hasUnknownFileSize(tokenizer);
368
+ let timeout;
369
+ let probeSignal;
370
+ let probeParser;
371
+ let compressedFileType;
372
+
373
+ if (hasUnknownSize) {
374
+ const timeoutController = new AbortController();
375
+ timeout = setTimeout(() => {
376
+ timeoutController.abort(new DOMException(`Operation timed out after ${unknownSizeGzipProbeTimeoutInMilliseconds} ms`, 'TimeoutError'));
377
+ }, unknownSizeGzipProbeTimeoutInMilliseconds);
378
+ probeSignal = this.options.signal === undefined
379
+ ? timeoutController.signal
380
+ : AbortSignal.any([this.options.signal, timeoutController.signal]);
381
+ probeParser = new FileTypeParser({
382
+ ...this.options,
383
+ signal: probeSignal,
384
+ });
385
+ probeParser.gzipProbeDepth = this.gzipProbeDepth + 1;
386
+ } else {
387
+ this.gzipProbeDepth++;
388
+ }
389
+
390
+ try {
391
+ compressedFileType = await (probeParser ?? this).fromStream(limitedInflatedStream);
392
+ } catch (error) {
393
+ if (
394
+ error?.name === 'AbortError'
395
+ && probeSignal?.reason?.name !== 'TimeoutError'
396
+ ) {
397
+ throw error;
398
+ }
399
+
400
+ // Timeout, decompression, or inner-detection failures are expected for non-tar gzip files.
401
+ } finally {
402
+ clearTimeout(timeout);
403
+ if (!hasUnknownSize) {
404
+ this.gzipProbeDepth--;
405
+ }
406
+ }
407
+
408
+ if (compressedFileType?.ext === 'tar') {
409
+ return {
410
+ ext: 'tar.gz',
411
+ mime: 'application/gzip',
412
+ };
413
+ }
414
+
415
+ return {
416
+ ext: 'gz',
417
+ mime: 'application/gzip',
418
+ };
419
+ }
420
+
825
421
  check(header, options) {
826
- return _check(this.buffer, header, options);
422
+ return checkBytes(this.buffer, header, options);
827
423
  }
828
424
 
829
425
  checkString(header, options) {
@@ -841,6 +437,13 @@ export class FileTypeParser {
841
437
 
842
438
  this.tokenizer = tokenizer;
843
439
 
440
+ if (hasUnknownFileSize(tokenizer)) {
441
+ await tokenizer.peekBuffer(this.buffer, {length: 3, mayBeLess: true});
442
+ if (this.check([0x1F, 0x8B, 0x8])) {
443
+ return this.detectGzip(tokenizer);
444
+ }
445
+ }
446
+
844
447
  await tokenizer.peekBuffer(this.buffer, {length: 32, mayBeLess: true});
845
448
 
846
449
  // -- 2-byte signatures --
@@ -944,41 +547,7 @@ export class FileTypeParser {
944
547
  }
945
548
 
946
549
  if (this.check([0x1F, 0x8B, 0x8])) {
947
- if (this.gzipProbeDepth >= maximumNestedGzipProbeDepth) {
948
- return {
949
- ext: 'gz',
950
- mime: 'application/gzip',
951
- };
952
- }
953
-
954
- const gzipHandler = new GzipHandler(tokenizer);
955
- const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes);
956
- let compressedFileType;
957
- try {
958
- this.gzipProbeDepth++;
959
- compressedFileType = await this.fromStream(limitedInflatedStream);
960
- } catch (error) {
961
- if (error?.name === 'AbortError') {
962
- throw error;
963
- }
964
-
965
- // Decompression or inner-detection failures are expected for non-tar gzip files.
966
- } finally {
967
- this.gzipProbeDepth--;
968
- }
969
-
970
- // We only need enough inflated bytes to confidently decide whether this is tar.gz.
971
- if (compressedFileType?.ext === 'tar') {
972
- return {
973
- ext: 'tar.gz',
974
- mime: 'application/gzip',
975
- };
976
- }
977
-
978
- return {
979
- ext: 'gz',
980
- mime: 'application/gzip',
981
- };
550
+ return this.detectGzip(tokenizer);
982
551
  }
983
552
 
984
553
  if (this.check([0x42, 0x5A, 0x68])) {
@@ -997,11 +566,14 @@ export class FileTypeParser {
997
566
  const isUnknownFileSize = hasUnknownFileSize(tokenizer);
998
567
  if (
999
568
  !Number.isFinite(id3HeaderLength)
1000
- || id3HeaderLength < 0
569
+ || id3HeaderLength < 0
1001
570
  // Keep ID3 probing bounded for unknown-size streams to avoid attacker-controlled large skips.
1002
571
  || (
1003
572
  isUnknownFileSize
1004
- && id3HeaderLength > maximumId3HeaderSizeInBytes
573
+ && (
574
+ id3HeaderLength > maximumId3HeaderSizeInBytes
575
+ || (tokenizer.position + id3HeaderLength) > maximumId3HeaderSizeInBytes
576
+ )
1005
577
  )
1006
578
  ) {
1007
579
  return;
@@ -1036,7 +608,7 @@ export class FileTypeParser {
1036
608
  }
1037
609
 
1038
610
  this.detectionReentryCount++;
1039
- return this.fromTokenizer(tokenizer, this.detectionReentryCount); // Skip ID3 header, recursion
611
+ return this.parseTokenizer(tokenizer, this.detectionReentryCount); // Skip ID3 header, recursion
1040
612
  }
1041
613
 
1042
614
  // Musepack, SV7
@@ -1120,108 +692,7 @@ export class FileTypeParser {
1120
692
  // Zip-based file formats
1121
693
  // Need to be before the `zip` check
1122
694
  if (this.check([0x50, 0x4B, 0x3, 0x4])) { // Local file header signature
1123
- let fileType;
1124
- const openXmlState = createOpenXmlZipDetectionState();
1125
-
1126
- try {
1127
- await new ZipHandler(tokenizer).unzip(zipHeader => {
1128
- updateOpenXmlZipDetectionStateFromFilename(openXmlState, zipHeader.filename);
1129
-
1130
- const isOpenXmlContentTypesEntry = zipHeader.filename === '[Content_Types].xml';
1131
- const openXmlFileTypeFromEntries = getOpenXmlFileTypeFromZipEntries(openXmlState);
1132
- if (
1133
- !isOpenXmlContentTypesEntry
1134
- && openXmlFileTypeFromEntries
1135
- ) {
1136
- fileType = openXmlFileTypeFromEntries;
1137
- return {
1138
- stop: true,
1139
- };
1140
- }
1141
-
1142
- switch (zipHeader.filename) {
1143
- case 'META-INF/mozilla.rsa':
1144
- fileType = {
1145
- ext: 'xpi',
1146
- mime: 'application/x-xpinstall',
1147
- };
1148
- return {
1149
- stop: true,
1150
- };
1151
- case 'META-INF/MANIFEST.MF':
1152
- fileType = {
1153
- ext: 'jar',
1154
- mime: 'application/java-archive',
1155
- };
1156
- return {
1157
- stop: true,
1158
- };
1159
- case 'mimetype':
1160
- if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
1161
- return {};
1162
- }
1163
-
1164
- return {
1165
- async handler(fileData) {
1166
- // Use TextDecoder to decode the UTF-8 encoded data
1167
- const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
1168
- fileType = getFileTypeFromMimeType(mimeType);
1169
- },
1170
- stop: true,
1171
- };
1172
-
1173
- case '[Content_Types].xml': {
1174
- openXmlState.hasContentTypesEntry = true;
1175
-
1176
- if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
1177
- openXmlState.hasUnparseableContentTypes = true;
1178
- return {};
1179
- }
1180
-
1181
- openXmlState.isParsingContentTypes = true;
1182
- return {
1183
- async handler(fileData) {
1184
- // Use TextDecoder to decode the UTF-8 encoded data
1185
- const xmlContent = new TextDecoder('utf-8').decode(fileData);
1186
- const mimeType = getOpenXmlMimeTypeFromContentTypesXml(xmlContent);
1187
- if (mimeType) {
1188
- fileType = getFileTypeFromMimeType(mimeType);
1189
- }
1190
-
1191
- openXmlState.hasParsedContentTypesEntry = true;
1192
- openXmlState.isParsingContentTypes = false;
1193
- },
1194
- stop: true,
1195
- };
1196
- }
1197
-
1198
- default:
1199
- if (/classes\d*\.dex/.test(zipHeader.filename)) {
1200
- fileType = {
1201
- ext: 'apk',
1202
- mime: 'application/vnd.android.package-archive',
1203
- };
1204
- return {stop: true};
1205
- }
1206
-
1207
- return {};
1208
- }
1209
- });
1210
- } catch (error) {
1211
- if (!isRecoverableZipError(error)) {
1212
- throw error;
1213
- }
1214
-
1215
- if (openXmlState.isParsingContentTypes) {
1216
- openXmlState.isParsingContentTypes = false;
1217
- openXmlState.hasUnparseableContentTypes = true;
1218
- }
1219
- }
1220
-
1221
- return fileType ?? getOpenXmlFileTypeFromZipEntries(openXmlState) ?? {
1222
- ext: 'zip',
1223
- mime: 'application/zip',
1224
- };
695
+ return detectZip(tokenizer);
1225
696
  }
1226
697
 
1227
698
  if (this.checkString('OggS')) {
@@ -1231,7 +702,7 @@ export class FileTypeParser {
1231
702
  await tokenizer.readBuffer(type);
1232
703
 
1233
704
  // Needs to be before `ogg` check
1234
- if (_check(type, [0x4F, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64])) {
705
+ if (checkBytes(type, [0x4F, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64])) {
1235
706
  return {
1236
707
  ext: 'opus',
1237
708
  mime: 'audio/ogg; codecs=opus',
@@ -1239,7 +710,7 @@ export class FileTypeParser {
1239
710
  }
1240
711
 
1241
712
  // If ' theora' in header.
1242
- if (_check(type, [0x80, 0x74, 0x68, 0x65, 0x6F, 0x72, 0x61])) {
713
+ if (checkBytes(type, [0x80, 0x74, 0x68, 0x65, 0x6F, 0x72, 0x61])) {
1243
714
  return {
1244
715
  ext: 'ogv',
1245
716
  mime: 'video/ogg',
@@ -1247,7 +718,7 @@ export class FileTypeParser {
1247
718
  }
1248
719
 
1249
720
  // If '\x01video' in header.
1250
- if (_check(type, [0x01, 0x76, 0x69, 0x64, 0x65, 0x6F, 0x00])) {
721
+ if (checkBytes(type, [0x01, 0x76, 0x69, 0x64, 0x65, 0x6F, 0x00])) {
1251
722
  return {
1252
723
  ext: 'ogm',
1253
724
  mime: 'video/ogg',
@@ -1255,7 +726,7 @@ export class FileTypeParser {
1255
726
  }
1256
727
 
1257
728
  // If ' FLAC' in header https://xiph.org/flac/faq.html
1258
- if (_check(type, [0x7F, 0x46, 0x4C, 0x41, 0x43])) {
729
+ if (checkBytes(type, [0x7F, 0x46, 0x4C, 0x41, 0x43])) {
1259
730
  return {
1260
731
  ext: 'oga',
1261
732
  mime: 'audio/ogg',
@@ -1263,7 +734,7 @@ export class FileTypeParser {
1263
734
  }
1264
735
 
1265
736
  // 'Speex ' in header https://en.wikipedia.org/wiki/Speex
1266
- if (_check(type, [0x53, 0x70, 0x65, 0x65, 0x78, 0x20, 0x20])) {
737
+ if (checkBytes(type, [0x53, 0x70, 0x65, 0x65, 0x78, 0x20, 0x20])) {
1267
738
  return {
1268
739
  ext: 'spx',
1269
740
  mime: 'audio/ogg',
@@ -1271,7 +742,7 @@ export class FileTypeParser {
1271
742
  }
1272
743
 
1273
744
  // If '\x01vorbis' in header
1274
- if (_check(type, [0x01, 0x76, 0x6F, 0x72, 0x62, 0x69, 0x73])) {
745
+ if (checkBytes(type, [0x01, 0x76, 0x6F, 0x72, 0x62, 0x69, 0x73])) {
1275
746
  return {
1276
747
  ext: 'ogg',
1277
748
  mime: 'audio/ogg',
@@ -1347,7 +818,7 @@ export class FileTypeParser {
1347
818
  if (this.checkString('LZIP')) {
1348
819
  return {
1349
820
  ext: 'lz',
1350
- mime: 'application/x-lzip',
821
+ mime: 'application/lzip',
1351
822
  };
1352
823
  }
1353
824
 
@@ -1412,105 +883,7 @@ export class FileTypeParser {
1412
883
 
1413
884
  // https://github.com/file/file/blob/master/magic/Magdir/matroska
1414
885
  if (this.check([0x1A, 0x45, 0xDF, 0xA3])) { // Root element: EBML
1415
- async function readField() {
1416
- const msb = await tokenizer.peekNumber(Token.UINT8);
1417
- let mask = 0x80;
1418
- let ic = 0; // 0 = A, 1 = B, 2 = C, 3 = D
1419
-
1420
- while ((msb & mask) === 0 && mask !== 0) {
1421
- ++ic;
1422
- mask >>= 1;
1423
- }
1424
-
1425
- const id = new Uint8Array(ic + 1);
1426
- await safeReadBuffer(tokenizer, id, undefined, {
1427
- maximumLength: id.length,
1428
- reason: 'EBML field',
1429
- });
1430
- return id;
1431
- }
1432
-
1433
- async function readElement() {
1434
- const idField = await readField();
1435
- const lengthField = await readField();
1436
-
1437
- lengthField[0] ^= 0x80 >> (lengthField.length - 1);
1438
- const nrLength = Math.min(6, lengthField.length); // JavaScript can max read 6 bytes integer
1439
-
1440
- const idView = new DataView(idField.buffer);
1441
- const lengthView = new DataView(lengthField.buffer, lengthField.length - nrLength, nrLength);
1442
-
1443
- return {
1444
- id: getUintBE(idView),
1445
- len: getUintBE(lengthView),
1446
- };
1447
- }
1448
-
1449
- async function readChildren(children) {
1450
- let ebmlElementCount = 0;
1451
- while (children > 0) {
1452
- ebmlElementCount++;
1453
- if (ebmlElementCount > maximumEbmlElementCount) {
1454
- return;
1455
- }
1456
-
1457
- const previousPosition = tokenizer.position;
1458
- const element = await readElement();
1459
-
1460
- if (element.id === 0x42_82) {
1461
- // `DocType` is a short string ("webm", "matroska", ...), reject implausible lengths to avoid large allocations.
1462
- if (element.len > maximumEbmlDocumentTypeSizeInBytes) {
1463
- return;
1464
- }
1465
-
1466
- const documentTypeLength = getSafeBound(element.len, maximumEbmlDocumentTypeSizeInBytes, 'EBML DocType');
1467
- const rawValue = await tokenizer.readToken(new Token.StringType(documentTypeLength));
1468
- return rawValue.replaceAll(/\00.*$/g, ''); // Return DocType
1469
- }
1470
-
1471
- if (
1472
- hasUnknownFileSize(tokenizer)
1473
- && (
1474
- !Number.isFinite(element.len)
1475
- || element.len < 0
1476
- || element.len > maximumEbmlElementPayloadSizeInBytes
1477
- )
1478
- ) {
1479
- return;
1480
- }
1481
-
1482
- await safeIgnore(tokenizer, element.len, {
1483
- maximumLength: hasUnknownFileSize(tokenizer) ? maximumEbmlElementPayloadSizeInBytes : tokenizer.fileInfo.size,
1484
- reason: 'EBML payload',
1485
- }); // ignore payload
1486
- --children;
1487
-
1488
- // Safeguard against malformed files: bail if the position did not advance.
1489
- if (tokenizer.position <= previousPosition) {
1490
- return;
1491
- }
1492
- }
1493
- }
1494
-
1495
- const rootElement = await readElement();
1496
- const documentType = await readChildren(rootElement.len);
1497
-
1498
- switch (documentType) {
1499
- case 'webm':
1500
- return {
1501
- ext: 'webm',
1502
- mime: 'video/webm',
1503
- };
1504
-
1505
- case 'matroska':
1506
- return {
1507
- ext: 'mkv',
1508
- mime: 'video/matroska',
1509
- };
1510
-
1511
- default:
1512
- return;
1513
- }
886
+ return detectEbml(tokenizer);
1514
887
  }
1515
888
 
1516
889
  if (this.checkString('SQLi')) {
@@ -1608,7 +981,7 @@ export class FileTypeParser {
1608
981
  if (this.check([0x04, 0x22, 0x4D, 0x18])) {
1609
982
  return {
1610
983
  ext: 'lz4',
1611
- mime: 'application/x-lz4', // Invented by us
984
+ mime: 'application/x-lz4', // Informal, used by freedesktop.org shared-mime-info
1612
985
  };
1613
986
  }
1614
987
 
@@ -1643,7 +1016,7 @@ export class FileTypeParser {
1643
1016
  };
1644
1017
  }
1645
1018
 
1646
- if (this.checkString('{\\rtf')) {
1019
+ if (this.checkString(String.raw`{\rtf`)) {
1647
1020
  return {
1648
1021
  ext: 'rtf',
1649
1022
  mime: 'application/rtf',
@@ -1744,7 +1117,7 @@ export class FileTypeParser {
1744
1117
  if (this.checkString('DRACO')) {
1745
1118
  return {
1746
1119
  ext: 'drc',
1747
- mime: 'application/vnd.google.draco', // Invented by us
1120
+ mime: 'application/x-ft-draco',
1748
1121
  };
1749
1122
  }
1750
1123
 
@@ -1790,7 +1163,7 @@ export class FileTypeParser {
1790
1163
 
1791
1164
  if (this.checkString('AC')) {
1792
1165
  const version = new Token.StringType(4, 'latin1').get(this.buffer, 2);
1793
- if (version.match('^d*') && version >= 1000 && version <= 1050) {
1166
+ if (/^\d+$/v.test(version) && version >= 1000 && version <= 1050) {
1794
1167
  return {
1795
1168
  ext: 'dwg',
1796
1169
  mime: 'image/vnd.dwg',
@@ -1845,93 +1218,7 @@ export class FileTypeParser {
1845
1218
  // -- 8-byte signatures --
1846
1219
 
1847
1220
  if (this.check([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])) {
1848
- const pngFileType = {
1849
- ext: 'png',
1850
- mime: 'image/png',
1851
- };
1852
-
1853
- const apngFileType = {
1854
- ext: 'apng',
1855
- mime: 'image/apng',
1856
- };
1857
-
1858
- // APNG format (https://wiki.mozilla.org/APNG_Specification)
1859
- // 1. Find the first IDAT (image data) chunk (49 44 41 54)
1860
- // 2. Check if there is an "acTL" chunk before the IDAT one (61 63 54 4C)
1861
-
1862
- // Offset calculated as follows:
1863
- // - 8 bytes: PNG signature
1864
- // - 4 (length) + 4 (chunk type) + 13 (chunk data) + 4 (CRC): IHDR chunk
1865
-
1866
- await tokenizer.ignore(8); // ignore PNG signature
1867
-
1868
- async function readChunkHeader() {
1869
- return {
1870
- length: await tokenizer.readToken(Token.INT32_BE),
1871
- type: await tokenizer.readToken(new Token.StringType(4, 'latin1')),
1872
- };
1873
- }
1874
-
1875
- const isUnknownPngStream = hasUnknownFileSize(tokenizer);
1876
- const pngScanStart = tokenizer.position;
1877
- let pngChunkCount = 0;
1878
- do {
1879
- pngChunkCount++;
1880
- if (pngChunkCount > maximumPngChunkCount) {
1881
- break;
1882
- }
1883
-
1884
- if (hasExceededUnknownSizeScanBudget(tokenizer, pngScanStart, maximumPngChunkSizeInBytes)) {
1885
- break;
1886
- }
1887
-
1888
- const previousPosition = tokenizer.position;
1889
- const chunk = await readChunkHeader();
1890
- if (chunk.length < 0) {
1891
- return; // Invalid chunk length
1892
- }
1893
-
1894
- switch (chunk.type) {
1895
- case 'IDAT':
1896
- return pngFileType;
1897
- case 'acTL':
1898
- return apngFileType;
1899
- default:
1900
- if (
1901
- isUnknownPngStream
1902
- && chunk.length > maximumPngChunkSizeInBytes
1903
- ) {
1904
- // Avoid huge attacker-controlled skips when probing unknown-size streams.
1905
- return;
1906
- }
1907
-
1908
- try {
1909
- await safeIgnore(tokenizer, chunk.length + 4, {
1910
- maximumLength: isUnknownPngStream ? maximumPngChunkSizeInBytes + 4 : tokenizer.fileInfo.size,
1911
- reason: 'PNG chunk payload',
1912
- }); // Ignore chunk-data + CRC
1913
- } catch (error) {
1914
- if (
1915
- !isUnknownPngStream
1916
- && (
1917
- error instanceof ParserHardLimitError
1918
- || error instanceof strtok3.EndOfStreamError
1919
- )
1920
- ) {
1921
- return pngFileType;
1922
- }
1923
-
1924
- throw error;
1925
- }
1926
- }
1927
-
1928
- // Safeguard against malformed files: bail if the position did not advance.
1929
- if (tokenizer.position <= previousPosition) {
1930
- break;
1931
- }
1932
- } while (tokenizer.position + 8 < tokenizer.fileInfo.size);
1933
-
1934
- return pngFileType;
1221
+ return detectPng(tokenizer);
1935
1222
  }
1936
1223
 
1937
1224
  if (this.check([0x41, 0x52, 0x52, 0x4F, 0x57, 0x31, 0x00, 0x00])) {
@@ -2089,108 +1376,7 @@ export class FileTypeParser {
2089
1376
 
2090
1377
  // ASF_Header_Object first 80 bytes
2091
1378
  if (this.check([0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11, 0xA6, 0xD9])) {
2092
- let isMalformedAsf = false;
2093
- try {
2094
- async function readHeader() {
2095
- const guid = new Uint8Array(16);
2096
- await safeReadBuffer(tokenizer, guid, undefined, {
2097
- maximumLength: guid.length,
2098
- reason: 'ASF header GUID',
2099
- });
2100
- return {
2101
- id: guid,
2102
- size: Number(await tokenizer.readToken(Token.UINT64_LE)),
2103
- };
2104
- }
2105
-
2106
- await safeIgnore(tokenizer, 30, {
2107
- maximumLength: 30,
2108
- reason: 'ASF header prelude',
2109
- });
2110
- const isUnknownFileSize = hasUnknownFileSize(tokenizer);
2111
- const asfHeaderScanStart = tokenizer.position;
2112
- let asfHeaderObjectCount = 0;
2113
- while (tokenizer.position + 24 < tokenizer.fileInfo.size) {
2114
- asfHeaderObjectCount++;
2115
- if (asfHeaderObjectCount > maximumAsfHeaderObjectCount) {
2116
- break;
2117
- }
2118
-
2119
- if (hasExceededUnknownSizeScanBudget(tokenizer, asfHeaderScanStart, maximumUntrustedSkipSizeInBytes)) {
2120
- break;
2121
- }
2122
-
2123
- const previousPosition = tokenizer.position;
2124
- const header = await readHeader();
2125
- let payload = header.size - 24;
2126
- if (
2127
- !Number.isFinite(payload)
2128
- || payload < 0
2129
- ) {
2130
- isMalformedAsf = true;
2131
- break;
2132
- }
2133
-
2134
- if (_check(header.id, [0x91, 0x07, 0xDC, 0xB7, 0xB7, 0xA9, 0xCF, 0x11, 0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65])) {
2135
- // Sync on Stream-Properties-Object (B7DC0791-A9B7-11CF-8EE6-00C00C205365)
2136
- const typeId = new Uint8Array(16);
2137
- payload -= await safeReadBuffer(tokenizer, typeId, undefined, {
2138
- maximumLength: typeId.length,
2139
- reason: 'ASF stream type GUID',
2140
- });
2141
-
2142
- if (_check(typeId, [0x40, 0x9E, 0x69, 0xF8, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
2143
- // Found audio:
2144
- return {
2145
- ext: 'asf',
2146
- mime: 'audio/x-ms-asf',
2147
- };
2148
- }
2149
-
2150
- if (_check(typeId, [0xC0, 0xEF, 0x19, 0xBC, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
2151
- // Found video:
2152
- return {
2153
- ext: 'asf',
2154
- mime: 'video/x-ms-asf',
2155
- };
2156
- }
2157
-
2158
- break;
2159
- }
2160
-
2161
- await safeIgnore(tokenizer, payload, {
2162
- maximumLength: isUnknownFileSize ? maximumUntrustedSkipSizeInBytes : tokenizer.fileInfo.size,
2163
- reason: 'ASF header payload',
2164
- });
2165
-
2166
- // Safeguard against malformed files: break if the position did not advance.
2167
- if (tokenizer.position <= previousPosition) {
2168
- isMalformedAsf = true;
2169
- break;
2170
- }
2171
- }
2172
- } catch (error) {
2173
- if (
2174
- error instanceof strtok3.EndOfStreamError
2175
- || error instanceof ParserHardLimitError
2176
- ) {
2177
- if (hasUnknownFileSize(tokenizer)) {
2178
- isMalformedAsf = true;
2179
- }
2180
- } else {
2181
- throw error;
2182
- }
2183
- }
2184
-
2185
- if (isMalformedAsf) {
2186
- return;
2187
- }
2188
-
2189
- // Default to ASF generic extension
2190
- return {
2191
- ext: 'asf',
2192
- mime: 'application/vnd.ms-asf',
2193
- };
1379
+ return detectAsf(tokenizer);
2194
1380
  }
2195
1381
 
2196
1382
  if (this.check([0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A])) {
@@ -2404,21 +1590,21 @@ export class FileTypeParser {
2404
1590
  if (this.check([0x4C, 0x00, 0x00, 0x00, 0x01, 0x14, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46])) {
2405
1591
  return {
2406
1592
  ext: 'lnk',
2407
- mime: 'application/x.ms.shortcut', // Invented by us
1593
+ mime: 'application/x-ms-shortcut', // Informal, used by freedesktop.org shared-mime-info
2408
1594
  };
2409
1595
  }
2410
1596
 
2411
1597
  if (this.check([0x62, 0x6F, 0x6F, 0x6B, 0x00, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x72, 0x6B, 0x00, 0x00, 0x00, 0x00])) {
2412
1598
  return {
2413
1599
  ext: 'alias',
2414
- mime: 'application/x.apple.alias', // Invented by us
1600
+ mime: 'application/x-ft-apple.alias',
2415
1601
  };
2416
1602
  }
2417
1603
 
2418
1604
  if (this.checkString('Kaydara FBX Binary \u0000')) {
2419
1605
  return {
2420
1606
  ext: 'fbx',
2421
- mime: 'application/x.autodesk.fbx', // Invented by us
1607
+ mime: 'application/x-ft-fbx',
2422
1608
  };
2423
1609
  }
2424
1610
 
@@ -2625,6 +1811,13 @@ export class FileTypeParser {
2625
1811
  }
2626
1812
  }
2627
1813
 
1814
+ if (
1815
+ hasUnknownFileSize(this.tokenizer)
1816
+ && ifdOffset > maximumTiffStreamIfdOffsetInBytes
1817
+ ) {
1818
+ return tiffFileType;
1819
+ }
1820
+
2628
1821
  const maximumTiffOffset = hasUnknownFileSize(this.tokenizer) ? maximumTiffIfdOffsetInBytes : this.tokenizer.fileInfo.size;
2629
1822
 
2630
1823
  try {
@@ -2713,3 +1906,7 @@ export class FileTypeParser {
2713
1906
 
2714
1907
  export const supportedExtensions = new Set(extensions);
2715
1908
  export const supportedMimeTypes = new Set(mimeTypes);
1909
+
1910
+ export async function fileTypeFromFile(path, options) {
1911
+ return (new FileTypeParser(options)).fromFile(path);
1912
+ }