file-type 21.3.3 → 22.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,643 @@
1
+ import * as Token from 'token-types';
2
+ import * as strtok3 from 'strtok3/core';
3
+ import {ZipHandler} from '@tokenizer/inflate';
4
+ import {
5
+ maximumUntrustedSkipSizeInBytes,
6
+ ParserHardLimitError,
7
+ safeIgnore,
8
+ hasUnknownFileSize,
9
+ hasExceededUnknownSizeScanBudget,
10
+ } from '../parser.js';
11
+
12
+ const maximumZipEntrySizeInBytes = 1024 * 1024;
13
+ const maximumZipEntryCount = 1024;
14
+ const maximumZipBufferedReadSizeInBytes = (2 ** 31) - 1;
15
+ const maximumZipTextEntrySizeInBytes = maximumZipEntrySizeInBytes;
16
+
17
+ const recoverableZipErrorMessages = new Set([
18
+ 'Unexpected signature',
19
+ 'Encrypted ZIP',
20
+ 'Expected Central-File-Header signature',
21
+ ]);
22
+ const recoverableZipErrorMessagePrefixes = [
23
+ 'ZIP entry count exceeds ',
24
+ 'Unsupported ZIP compression method:',
25
+ 'ZIP entry compressed data exceeds ',
26
+ 'ZIP entry decompressed data exceeds ',
27
+ 'Expected data-descriptor-signature at position ',
28
+ ];
29
+ const recoverableZipErrorCodes = new Set([
30
+ 'Z_BUF_ERROR',
31
+ 'Z_DATA_ERROR',
32
+ 'ERR_INVALID_STATE',
33
+ ]);
34
+
35
+ async function decompressDeflateRawWithLimit(data, {maximumLength = maximumZipEntrySizeInBytes} = {}) {
36
+ const input = new ReadableStream({
37
+ start(controller) {
38
+ controller.enqueue(data);
39
+ controller.close();
40
+ },
41
+ });
42
+ const output = input.pipeThrough(new DecompressionStream('deflate-raw'));
43
+ const reader = output.getReader();
44
+ const chunks = [];
45
+ let totalLength = 0;
46
+
47
+ try {
48
+ for (;;) {
49
+ const {done, value} = await reader.read();
50
+ if (done) {
51
+ break;
52
+ }
53
+
54
+ totalLength += value.length;
55
+ if (totalLength > maximumLength) {
56
+ await reader.cancel();
57
+ throw new Error(`ZIP entry decompressed data exceeds ${maximumLength} bytes`);
58
+ }
59
+
60
+ chunks.push(value);
61
+ }
62
+ } finally {
63
+ reader.releaseLock();
64
+ }
65
+
66
+ const uncompressedData = new Uint8Array(totalLength);
67
+ let offset = 0;
68
+ for (const chunk of chunks) {
69
+ uncompressedData.set(chunk, offset);
70
+ offset += chunk.length;
71
+ }
72
+
73
+ return uncompressedData;
74
+ }
75
+
76
+ function mergeByteChunks(chunks, totalLength) {
77
+ const merged = new Uint8Array(totalLength);
78
+ let offset = 0;
79
+
80
+ for (const chunk of chunks) {
81
+ merged.set(chunk, offset);
82
+ offset += chunk.length;
83
+ }
84
+
85
+ return merged;
86
+ }
87
+
88
+ function getMaximumZipBufferedReadLength(tokenizer) {
89
+ const fileSize = tokenizer.fileInfo.size;
90
+ const remainingBytes = Number.isFinite(fileSize)
91
+ ? Math.max(0, fileSize - tokenizer.position)
92
+ : Number.MAX_SAFE_INTEGER;
93
+
94
+ return Math.min(remainingBytes, maximumZipBufferedReadSizeInBytes);
95
+ }
96
+
97
+ function isRecoverableZipError(error) {
98
+ if (error instanceof strtok3.EndOfStreamError) {
99
+ return true;
100
+ }
101
+
102
+ if (error instanceof ParserHardLimitError) {
103
+ return true;
104
+ }
105
+
106
+ if (!(error instanceof Error)) {
107
+ return false;
108
+ }
109
+
110
+ if (recoverableZipErrorMessages.has(error.message)) {
111
+ return true;
112
+ }
113
+
114
+ if (recoverableZipErrorCodes.has(error.code)) {
115
+ return true;
116
+ }
117
+
118
+ for (const prefix of recoverableZipErrorMessagePrefixes) {
119
+ if (error.message.startsWith(prefix)) {
120
+ return true;
121
+ }
122
+ }
123
+
124
+ return false;
125
+ }
126
+
127
+ function canReadZipEntryForDetection(zipHeader, maximumSize = maximumZipEntrySizeInBytes) {
128
+ const sizes = [zipHeader.compressedSize, zipHeader.uncompressedSize];
129
+ for (const size of sizes) {
130
+ if (
131
+ !Number.isFinite(size)
132
+ || size < 0
133
+ || size > maximumSize
134
+ ) {
135
+ return false;
136
+ }
137
+ }
138
+
139
+ return true;
140
+ }
141
+
142
+ // -- iWork helpers --
143
+
144
+ function createIWorkZipDetectionState() {
145
+ return {
146
+ hasDocumentEntry: false,
147
+ hasMasterSlideEntry: false,
148
+ hasTablesEntry: false,
149
+ hasCalculationEngineEntry: false,
150
+ };
151
+ }
152
+
153
+ function updateIWorkZipDetectionStateFromFilename(iWorkState, filename) {
154
+ if (filename === 'Index/Document.iwa') {
155
+ iWorkState.hasDocumentEntry = true;
156
+ }
157
+
158
+ if (filename.startsWith('Index/MasterSlide')) {
159
+ iWorkState.hasMasterSlideEntry = true;
160
+ }
161
+
162
+ if (filename.startsWith('Index/Tables/')) {
163
+ iWorkState.hasTablesEntry = true;
164
+ }
165
+
166
+ if (filename === 'Index/CalculationEngine.iwa') {
167
+ iWorkState.hasCalculationEngineEntry = true;
168
+ }
169
+ }
170
+
171
+ function getIWorkFileTypeFromZipEntries(iWorkState) {
172
+ if (!iWorkState.hasDocumentEntry) {
173
+ return;
174
+ }
175
+
176
+ if (iWorkState.hasMasterSlideEntry) {
177
+ return {ext: 'key', mime: 'application/vnd.apple.keynote'};
178
+ }
179
+
180
+ if (iWorkState.hasTablesEntry) {
181
+ return {ext: 'numbers', mime: 'application/vnd.apple.numbers'};
182
+ }
183
+
184
+ return {ext: 'pages', mime: 'application/vnd.apple.pages'};
185
+ }
186
+
187
+ // -- OpenXML helpers --
188
+
189
+ function getFileTypeFromMimeType(mimeType) {
190
+ mimeType = mimeType.toLowerCase();
191
+ switch (mimeType) {
192
+ case 'application/epub+zip':
193
+ return {ext: 'epub', mime: mimeType};
194
+ case 'application/vnd.oasis.opendocument.text':
195
+ return {ext: 'odt', mime: mimeType};
196
+ case 'application/vnd.oasis.opendocument.text-template':
197
+ return {ext: 'ott', mime: mimeType};
198
+ case 'application/vnd.oasis.opendocument.spreadsheet':
199
+ return {ext: 'ods', mime: mimeType};
200
+ case 'application/vnd.oasis.opendocument.spreadsheet-template':
201
+ return {ext: 'ots', mime: mimeType};
202
+ case 'application/vnd.oasis.opendocument.presentation':
203
+ return {ext: 'odp', mime: mimeType};
204
+ case 'application/vnd.oasis.opendocument.presentation-template':
205
+ return {ext: 'otp', mime: mimeType};
206
+ case 'application/vnd.oasis.opendocument.graphics':
207
+ return {ext: 'odg', mime: mimeType};
208
+ case 'application/vnd.oasis.opendocument.graphics-template':
209
+ return {ext: 'otg', mime: mimeType};
210
+ case 'application/vnd.openxmlformats-officedocument.presentationml.slideshow':
211
+ return {ext: 'ppsx', mime: mimeType};
212
+ case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
213
+ return {ext: 'xlsx', mime: mimeType};
214
+ case 'application/vnd.ms-excel.sheet.macroenabled':
215
+ return {ext: 'xlsm', mime: 'application/vnd.ms-excel.sheet.macroenabled.12'};
216
+ case 'application/vnd.openxmlformats-officedocument.spreadsheetml.template':
217
+ return {ext: 'xltx', mime: mimeType};
218
+ case 'application/vnd.ms-excel.template.macroenabled':
219
+ return {ext: 'xltm', mime: 'application/vnd.ms-excel.template.macroenabled.12'};
220
+ case 'application/vnd.ms-powerpoint.slideshow.macroenabled':
221
+ return {ext: 'ppsm', mime: 'application/vnd.ms-powerpoint.slideshow.macroenabled.12'};
222
+ case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
223
+ return {ext: 'docx', mime: mimeType};
224
+ case 'application/vnd.ms-word.document.macroenabled':
225
+ return {ext: 'docm', mime: 'application/vnd.ms-word.document.macroenabled.12'};
226
+ case 'application/vnd.openxmlformats-officedocument.wordprocessingml.template':
227
+ return {ext: 'dotx', mime: mimeType};
228
+ case 'application/vnd.ms-word.template.macroenabledtemplate':
229
+ return {ext: 'dotm', mime: 'application/vnd.ms-word.template.macroenabled.12'};
230
+ case 'application/vnd.openxmlformats-officedocument.presentationml.template':
231
+ return {ext: 'potx', mime: mimeType};
232
+ case 'application/vnd.ms-powerpoint.template.macroenabled':
233
+ return {ext: 'potm', mime: 'application/vnd.ms-powerpoint.template.macroenabled.12'};
234
+ case 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
235
+ return {ext: 'pptx', mime: mimeType};
236
+ case 'application/vnd.ms-powerpoint.presentation.macroenabled':
237
+ return {ext: 'pptm', mime: 'application/vnd.ms-powerpoint.presentation.macroenabled.12'};
238
+ case 'application/vnd.ms-visio.drawing':
239
+ return {ext: 'vsdx', mime: 'application/vnd.visio'};
240
+ case 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml':
241
+ return {ext: '3mf', mime: 'model/3mf'};
242
+ default:
243
+ }
244
+ }
245
+
246
+ function createOpenXmlZipDetectionState() {
247
+ return {
248
+ hasContentTypesEntry: false,
249
+ hasParsedContentTypesEntry: false,
250
+ isParsingContentTypes: false,
251
+ hasUnparseableContentTypes: false,
252
+ hasWordDirectory: false,
253
+ hasPresentationDirectory: false,
254
+ hasSpreadsheetDirectory: false,
255
+ hasThreeDimensionalModelEntry: false,
256
+ };
257
+ }
258
+
259
+ function updateOpenXmlZipDetectionStateFromFilename(openXmlState, filename) {
260
+ if (filename.startsWith('word/')) {
261
+ openXmlState.hasWordDirectory = true;
262
+ }
263
+
264
+ if (filename.startsWith('ppt/')) {
265
+ openXmlState.hasPresentationDirectory = true;
266
+ }
267
+
268
+ if (filename.startsWith('xl/')) {
269
+ openXmlState.hasSpreadsheetDirectory = true;
270
+ }
271
+
272
+ if (
273
+ filename.startsWith('3D/')
274
+ && filename.endsWith('.model')
275
+ ) {
276
+ openXmlState.hasThreeDimensionalModelEntry = true;
277
+ }
278
+ }
279
+
280
+ function getOpenXmlFileTypeFromDirectoryNames(openXmlState) {
281
+ if (openXmlState.hasWordDirectory) {
282
+ return {
283
+ ext: 'docx',
284
+ mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
285
+ };
286
+ }
287
+
288
+ if (openXmlState.hasPresentationDirectory) {
289
+ return {
290
+ ext: 'pptx',
291
+ mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
292
+ };
293
+ }
294
+
295
+ if (openXmlState.hasSpreadsheetDirectory) {
296
+ return {
297
+ ext: 'xlsx',
298
+ mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
299
+ };
300
+ }
301
+
302
+ if (openXmlState.hasThreeDimensionalModelEntry) {
303
+ return {
304
+ ext: '3mf',
305
+ mime: 'model/3mf',
306
+ };
307
+ }
308
+ }
309
+
310
+ function getOpenXmlFileTypeFromZipEntries(openXmlState) {
311
+ // Only use directory-name heuristic when [Content_Types].xml was present in the archive
312
+ // but its handler was skipped (not invoked, not currently running, and not already resolved).
313
+ // This avoids guessing from directory names when content-type parsing already gave a definitive answer or failed.
314
+ if (
315
+ !openXmlState.hasContentTypesEntry
316
+ || openXmlState.hasUnparseableContentTypes
317
+ || openXmlState.isParsingContentTypes
318
+ || openXmlState.hasParsedContentTypesEntry
319
+ ) {
320
+ return;
321
+ }
322
+
323
+ return getOpenXmlFileTypeFromDirectoryNames(openXmlState);
324
+ }
325
+
326
+ function getOpenXmlMimeTypeFromContentTypesXml(xmlContent) {
327
+ // We only need the `ContentType="...main+xml"` value, so a small string scan is enough and avoids full XML parsing.
328
+ const endPosition = xmlContent.indexOf('.main+xml"');
329
+ if (endPosition === -1) {
330
+ const mimeType = 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml';
331
+ if (xmlContent.includes(`ContentType="${mimeType}"`)) {
332
+ return mimeType;
333
+ }
334
+
335
+ return;
336
+ }
337
+
338
+ const truncatedContent = xmlContent.slice(0, endPosition);
339
+ const firstQuotePosition = truncatedContent.lastIndexOf('"');
340
+ // If no quote is found, `lastIndexOf` returns -1 and this intentionally falls back to the full truncated prefix.
341
+ return truncatedContent.slice(firstQuotePosition + 1);
342
+ }
343
+
344
+ const zipDataDescriptorSignature = 0x08_07_4B_50;
345
+ const zipDataDescriptorLengthInBytes = 16;
346
+ const zipDataDescriptorOverlapLengthInBytes = zipDataDescriptorLengthInBytes - 1;
347
+
348
+ function findZipDataDescriptorOffset(buffer, bytesConsumed) {
349
+ if (buffer.length < zipDataDescriptorLengthInBytes) {
350
+ return -1;
351
+ }
352
+
353
+ const lastPossibleDescriptorOffset = buffer.length - zipDataDescriptorLengthInBytes;
354
+ for (let index = 0; index <= lastPossibleDescriptorOffset; index++) {
355
+ if (
356
+ Token.UINT32_LE.get(buffer, index) === zipDataDescriptorSignature
357
+ && Token.UINT32_LE.get(buffer, index + 8) === bytesConsumed + index
358
+ ) {
359
+ return index;
360
+ }
361
+ }
362
+
363
+ return -1;
364
+ }
365
+
366
+ async function readZipDataDescriptorEntryWithLimit(zipHandler, {shouldBuffer, maximumLength = maximumZipEntrySizeInBytes} = {}) {
367
+ const {syncBuffer} = zipHandler;
368
+ const {length: syncBufferLength} = syncBuffer;
369
+ const chunks = [];
370
+ let bytesConsumed = 0;
371
+
372
+ for (;;) {
373
+ const length = await zipHandler.tokenizer.peekBuffer(syncBuffer, {mayBeLess: true});
374
+ const dataDescriptorOffset = findZipDataDescriptorOffset(syncBuffer.subarray(0, length), bytesConsumed);
375
+ const retainedLength = dataDescriptorOffset >= 0
376
+ ? 0
377
+ : (
378
+ length === syncBufferLength
379
+ ? Math.min(zipDataDescriptorOverlapLengthInBytes, length - 1)
380
+ : 0
381
+ );
382
+ const chunkLength = dataDescriptorOffset >= 0 ? dataDescriptorOffset : length - retainedLength;
383
+
384
+ if (chunkLength === 0) {
385
+ break;
386
+ }
387
+
388
+ bytesConsumed += chunkLength;
389
+ if (bytesConsumed > maximumLength) {
390
+ throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
391
+ }
392
+
393
+ if (shouldBuffer) {
394
+ const data = new Uint8Array(chunkLength);
395
+ await zipHandler.tokenizer.readBuffer(data);
396
+ chunks.push(data);
397
+ } else {
398
+ await zipHandler.tokenizer.ignore(chunkLength);
399
+ }
400
+
401
+ if (dataDescriptorOffset >= 0) {
402
+ break;
403
+ }
404
+ }
405
+
406
+ if (!hasUnknownFileSize(zipHandler.tokenizer)) {
407
+ zipHandler.knownSizeDescriptorScannedBytes += bytesConsumed;
408
+ }
409
+
410
+ if (!shouldBuffer) {
411
+ return;
412
+ }
413
+
414
+ return mergeByteChunks(chunks, bytesConsumed);
415
+ }
416
+
417
+ function getRemainingZipScanBudget(zipHandler, startOffset) {
418
+ if (hasUnknownFileSize(zipHandler.tokenizer)) {
419
+ return Math.max(0, maximumUntrustedSkipSizeInBytes - (zipHandler.tokenizer.position - startOffset));
420
+ }
421
+
422
+ return Math.max(0, maximumZipEntrySizeInBytes - zipHandler.knownSizeDescriptorScannedBytes);
423
+ }
424
+
425
+ async function readZipEntryData(zipHandler, zipHeader, {shouldBuffer, maximumDescriptorLength = maximumZipEntrySizeInBytes} = {}) {
426
+ if (
427
+ zipHeader.dataDescriptor
428
+ && zipHeader.compressedSize === 0
429
+ ) {
430
+ return readZipDataDescriptorEntryWithLimit(zipHandler, {
431
+ shouldBuffer,
432
+ maximumLength: maximumDescriptorLength,
433
+ });
434
+ }
435
+
436
+ if (!shouldBuffer) {
437
+ await safeIgnore(zipHandler.tokenizer, zipHeader.compressedSize, {
438
+ maximumLength: hasUnknownFileSize(zipHandler.tokenizer) ? maximumZipEntrySizeInBytes : zipHandler.tokenizer.fileInfo.size,
439
+ reason: 'ZIP entry compressed data',
440
+ });
441
+ return;
442
+ }
443
+
444
+ const maximumLength = getMaximumZipBufferedReadLength(zipHandler.tokenizer);
445
+ if (
446
+ !Number.isFinite(zipHeader.compressedSize)
447
+ || zipHeader.compressedSize < 0
448
+ || zipHeader.compressedSize > maximumLength
449
+ ) {
450
+ throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
451
+ }
452
+
453
+ const fileData = new Uint8Array(zipHeader.compressedSize);
454
+ await zipHandler.tokenizer.readBuffer(fileData);
455
+ return fileData;
456
+ }
457
+
458
+ // Override the default inflate to enforce decompression size limits, since @tokenizer/inflate does not expose a configuration hook for this.
459
+ ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
460
+ if (zipHeader.compressedMethod === 0) {
461
+ return callback(fileData);
462
+ }
463
+
464
+ if (zipHeader.compressedMethod !== 8) {
465
+ throw new Error(`Unsupported ZIP compression method: ${zipHeader.compressedMethod}`);
466
+ }
467
+
468
+ const uncompressedData = await decompressDeflateRawWithLimit(fileData, {maximumLength: maximumZipEntrySizeInBytes});
469
+ return callback(uncompressedData);
470
+ };
471
+
472
+ ZipHandler.prototype.unzip = async function (fileCallback) {
473
+ let stop = false;
474
+ let zipEntryCount = 0;
475
+ const zipScanStart = this.tokenizer.position;
476
+ this.knownSizeDescriptorScannedBytes = 0;
477
+ do {
478
+ if (hasExceededUnknownSizeScanBudget(this.tokenizer, zipScanStart, maximumUntrustedSkipSizeInBytes)) {
479
+ throw new ParserHardLimitError(`ZIP stream probing exceeds ${maximumUntrustedSkipSizeInBytes} bytes`);
480
+ }
481
+
482
+ const zipHeader = await this.readLocalFileHeader();
483
+ if (!zipHeader) {
484
+ break;
485
+ }
486
+
487
+ zipEntryCount++;
488
+ if (zipEntryCount > maximumZipEntryCount) {
489
+ throw new Error(`ZIP entry count exceeds ${maximumZipEntryCount}`);
490
+ }
491
+
492
+ const next = fileCallback(zipHeader);
493
+ stop = Boolean(next.stop);
494
+ await this.tokenizer.ignore(zipHeader.extraFieldLength);
495
+ const fileData = await readZipEntryData(this, zipHeader, {
496
+ shouldBuffer: Boolean(next.handler),
497
+ maximumDescriptorLength: Math.min(maximumZipEntrySizeInBytes, getRemainingZipScanBudget(this, zipScanStart)),
498
+ });
499
+
500
+ if (next.handler) {
501
+ await this.inflate(zipHeader, fileData, next.handler);
502
+ }
503
+
504
+ if (zipHeader.dataDescriptor) {
505
+ const dataDescriptor = new Uint8Array(zipDataDescriptorLengthInBytes);
506
+ await this.tokenizer.readBuffer(dataDescriptor);
507
+ if (Token.UINT32_LE.get(dataDescriptor, 0) !== zipDataDescriptorSignature) {
508
+ throw new Error(`Expected data-descriptor-signature at position ${this.tokenizer.position - dataDescriptor.length}`);
509
+ }
510
+ }
511
+
512
+ if (hasExceededUnknownSizeScanBudget(this.tokenizer, zipScanStart, maximumUntrustedSkipSizeInBytes)) {
513
+ throw new ParserHardLimitError(`ZIP stream probing exceeds ${maximumUntrustedSkipSizeInBytes} bytes`);
514
+ }
515
+ } while (!stop);
516
+ };
517
+
518
+ export async function detectZip(tokenizer) {
519
+ let fileType;
520
+ const openXmlState = createOpenXmlZipDetectionState();
521
+ const iWorkState = createIWorkZipDetectionState();
522
+
523
+ try {
524
+ await new ZipHandler(tokenizer).unzip(zipHeader => {
525
+ updateOpenXmlZipDetectionStateFromFilename(openXmlState, zipHeader.filename);
526
+ updateIWorkZipDetectionStateFromFilename(iWorkState, zipHeader.filename);
527
+
528
+ // Early exit for Keynote or Numbers when markers are definitive
529
+ if (iWorkState.hasDocumentEntry && (iWorkState.hasMasterSlideEntry || iWorkState.hasTablesEntry)) {
530
+ fileType = getIWorkFileTypeFromZipEntries(iWorkState);
531
+ return {stop: true};
532
+ }
533
+
534
+ const isOpenXmlContentTypesEntry = zipHeader.filename === '[Content_Types].xml';
535
+ const openXmlFileTypeFromEntries = getOpenXmlFileTypeFromZipEntries(openXmlState);
536
+ if (
537
+ !isOpenXmlContentTypesEntry
538
+ && openXmlFileTypeFromEntries
539
+ ) {
540
+ fileType = openXmlFileTypeFromEntries;
541
+ return {
542
+ stop: true,
543
+ };
544
+ }
545
+
546
+ switch (zipHeader.filename) {
547
+ case 'META-INF/mozilla.rsa':
548
+ fileType = {
549
+ ext: 'xpi',
550
+ mime: 'application/x-xpinstall',
551
+ };
552
+ return {
553
+ stop: true,
554
+ };
555
+ case 'META-INF/MANIFEST.MF':
556
+ fileType = {
557
+ ext: 'jar',
558
+ mime: 'application/java-archive',
559
+ };
560
+ return {
561
+ stop: true,
562
+ };
563
+ case 'mimetype':
564
+ if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
565
+ return {};
566
+ }
567
+
568
+ return {
569
+ async handler(fileData) {
570
+ // Use TextDecoder to decode the UTF-8 encoded data
571
+ const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
572
+ fileType = getFileTypeFromMimeType(mimeType);
573
+ },
574
+ stop: true,
575
+ };
576
+
577
+ case '[Content_Types].xml': {
578
+ openXmlState.hasContentTypesEntry = true;
579
+
580
+ if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
581
+ openXmlState.hasUnparseableContentTypes = true;
582
+ return {};
583
+ }
584
+
585
+ openXmlState.isParsingContentTypes = true;
586
+ return {
587
+ async handler(fileData) {
588
+ // Use TextDecoder to decode the UTF-8 encoded data
589
+ const xmlContent = new TextDecoder('utf-8').decode(fileData);
590
+ const mimeType = getOpenXmlMimeTypeFromContentTypesXml(xmlContent);
591
+ if (mimeType) {
592
+ fileType = getFileTypeFromMimeType(mimeType);
593
+ }
594
+
595
+ openXmlState.hasParsedContentTypesEntry = true;
596
+ openXmlState.isParsingContentTypes = false;
597
+ },
598
+ stop: true,
599
+ };
600
+ }
601
+
602
+ default:
603
+ if (/classes\d*\.dex/v.test(zipHeader.filename)) {
604
+ fileType = {
605
+ ext: 'apk',
606
+ mime: 'application/vnd.android.package-archive',
607
+ };
608
+ return {stop: true};
609
+ }
610
+
611
+ return {};
612
+ }
613
+ });
614
+ } catch (error) {
615
+ if (!isRecoverableZipError(error)) {
616
+ throw error;
617
+ }
618
+
619
+ if (openXmlState.isParsingContentTypes) {
620
+ openXmlState.isParsingContentTypes = false;
621
+ openXmlState.hasUnparseableContentTypes = true;
622
+ }
623
+
624
+ // When the stream was truncated before reaching [Content_Types].xml, use directory names as a fallback.
625
+ // This handles LibreOffice-created OOXML files where [Content_Types].xml appears after content entries.
626
+ if (!fileType && error instanceof strtok3.EndOfStreamError && !openXmlState.hasContentTypesEntry) {
627
+ fileType = getOpenXmlFileTypeFromDirectoryNames(openXmlState);
628
+ }
629
+ }
630
+
631
+ const iWorkFileType = hasUnknownFileSize(tokenizer)
632
+ && iWorkState.hasDocumentEntry
633
+ && !iWorkState.hasMasterSlideEntry
634
+ && !iWorkState.hasTablesEntry
635
+ && !iWorkState.hasCalculationEngineEntry
636
+ ? undefined
637
+ : getIWorkFileTypeFromZipEntries(iWorkState);
638
+
639
+ return fileType ?? getOpenXmlFileTypeFromZipEntries(openXmlState) ?? iWorkFileType ?? {
640
+ ext: 'zip',
641
+ mime: 'application/zip',
642
+ };
643
+ }