npm - file-type - Versions diffs - 21.3.3 → 22.0.0 - Mend

file-type 21.3.3 → 22.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/package.json +36 -53
package/readme.md +35 -105
package/source/detectors/asf.js +127 -0
package/source/detectors/ebml.js +120 -0
package/source/detectors/png.js +123 -0
package/source/detectors/zip.js +643 -0
package/{core.d.ts → source/index.d.ts} +49 -22
package/{core.js → source/index.js} +242 -1122
package/source/index.test-d.ts +53 -0
package/source/parser.js +65 -0
package/{supported.js → source/supported.js} +14 -6
package/{util.js → source/tokens.js} +2 -2
package/index.d.ts +0 -98
package/index.js +0 -126

package/source/detectors/zip.js ADDED Viewed

@@ -0,0 +1,643 @@
+import * as Token from 'token-types';
+import * as strtok3 from 'strtok3/core';
+import {ZipHandler} from '@tokenizer/inflate';
+import {
+	maximumUntrustedSkipSizeInBytes,
+	ParserHardLimitError,
+	safeIgnore,
+	hasUnknownFileSize,
+	hasExceededUnknownSizeScanBudget,
+} from '../parser.js';
+const maximumZipEntrySizeInBytes = 1024 * 1024;
+const maximumZipEntryCount = 1024;
+const maximumZipBufferedReadSizeInBytes = (2 ** 31) - 1;
+const maximumZipTextEntrySizeInBytes = maximumZipEntrySizeInBytes;
+const recoverableZipErrorMessages = new Set([
+	'Unexpected signature',
+	'Encrypted ZIP',
+	'Expected Central-File-Header signature',
+]);
+const recoverableZipErrorMessagePrefixes = [
+	'ZIP entry count exceeds ',
+	'Unsupported ZIP compression method:',
+	'ZIP entry compressed data exceeds ',
+	'ZIP entry decompressed data exceeds ',
+	'Expected data-descriptor-signature at position ',
+];
+const recoverableZipErrorCodes = new Set([
+	'Z_BUF_ERROR',
+	'Z_DATA_ERROR',
+	'ERR_INVALID_STATE',
+]);
+async function decompressDeflateRawWithLimit(data, {maximumLength = maximumZipEntrySizeInBytes} = {}) {
+	const input = new ReadableStream({
+		start(controller) {
+			controller.enqueue(data);
+			controller.close();
+		},
+	});
+	const output = input.pipeThrough(new DecompressionStream('deflate-raw'));
+	const reader = output.getReader();
+	const chunks = [];
+	let totalLength = 0;
+	try {
+		for (;;) {
+			const {done, value} = await reader.read();
+			if (done) {
+				break;
+			}
+			totalLength += value.length;
+			if (totalLength > maximumLength) {
+				await reader.cancel();
+				throw new Error(`ZIP entry decompressed data exceeds ${maximumLength} bytes`);
+			}
+			chunks.push(value);
+		}
+	} finally {
+		reader.releaseLock();
+	}
+	const uncompressedData = new Uint8Array(totalLength);
+	let offset = 0;
+	for (const chunk of chunks) {
+		uncompressedData.set(chunk, offset);
+		offset += chunk.length;
+	}
+	return uncompressedData;
+}
+function mergeByteChunks(chunks, totalLength) {
+	const merged = new Uint8Array(totalLength);
+	let offset = 0;
+	for (const chunk of chunks) {
+		merged.set(chunk, offset);
+		offset += chunk.length;
+	}
+	return merged;
+}
+function getMaximumZipBufferedReadLength(tokenizer) {
+	const fileSize = tokenizer.fileInfo.size;
+	const remainingBytes = Number.isFinite(fileSize)
+		? Math.max(0, fileSize - tokenizer.position)
+		: Number.MAX_SAFE_INTEGER;
+	return Math.min(remainingBytes, maximumZipBufferedReadSizeInBytes);
+}
+function isRecoverableZipError(error) {
+	if (error instanceof strtok3.EndOfStreamError) {
+		return true;
+	}
+	if (error instanceof ParserHardLimitError) {
+		return true;
+	}
+	if (!(error instanceof Error)) {
+		return false;
+	}
+	if (recoverableZipErrorMessages.has(error.message)) {
+		return true;
+	}
+	if (recoverableZipErrorCodes.has(error.code)) {
+		return true;
+	}
+	for (const prefix of recoverableZipErrorMessagePrefixes) {
+		if (error.message.startsWith(prefix)) {
+			return true;
+		}
+	}
+	return false;
+}
+function canReadZipEntryForDetection(zipHeader, maximumSize = maximumZipEntrySizeInBytes) {
+	const sizes = [zipHeader.compressedSize, zipHeader.uncompressedSize];
+	for (const size of sizes) {
+		if (
+			!Number.isFinite(size)
+			|| size < 0
+			|| size > maximumSize
+		) {
+			return false;
+		}
+	}
+	return true;
+}
+// -- iWork helpers --
+function createIWorkZipDetectionState() {
+	return {
+		hasDocumentEntry: false,
+		hasMasterSlideEntry: false,
+		hasTablesEntry: false,
+		hasCalculationEngineEntry: false,
+	};
+}
+function updateIWorkZipDetectionStateFromFilename(iWorkState, filename) {
+	if (filename === 'Index/Document.iwa') {
+		iWorkState.hasDocumentEntry = true;
+	}
+	if (filename.startsWith('Index/MasterSlide')) {
+		iWorkState.hasMasterSlideEntry = true;
+	}
+	if (filename.startsWith('Index/Tables/')) {
+		iWorkState.hasTablesEntry = true;
+	}
+	if (filename === 'Index/CalculationEngine.iwa') {
+		iWorkState.hasCalculationEngineEntry = true;
+	}
+}
+function getIWorkFileTypeFromZipEntries(iWorkState) {
+	if (!iWorkState.hasDocumentEntry) {
+		return;
+	}
+	if (iWorkState.hasMasterSlideEntry) {
+		return {ext: 'key', mime: 'application/vnd.apple.keynote'};
+	}
+	if (iWorkState.hasTablesEntry) {
+		return {ext: 'numbers', mime: 'application/vnd.apple.numbers'};
+	}
+	return {ext: 'pages', mime: 'application/vnd.apple.pages'};
+}
+// -- OpenXML helpers --
+function getFileTypeFromMimeType(mimeType) {
+	mimeType = mimeType.toLowerCase();
+	switch (mimeType) {
+		case 'application/epub+zip':
+			return {ext: 'epub', mime: mimeType};
+		case 'application/vnd.oasis.opendocument.text':
+			return {ext: 'odt', mime: mimeType};
+		case 'application/vnd.oasis.opendocument.text-template':
+			return {ext: 'ott', mime: mimeType};
+		case 'application/vnd.oasis.opendocument.spreadsheet':
+			return {ext: 'ods', mime: mimeType};
+		case 'application/vnd.oasis.opendocument.spreadsheet-template':
+			return {ext: 'ots', mime: mimeType};
+		case 'application/vnd.oasis.opendocument.presentation':
+			return {ext: 'odp', mime: mimeType};
+		case 'application/vnd.oasis.opendocument.presentation-template':
+			return {ext: 'otp', mime: mimeType};
+		case 'application/vnd.oasis.opendocument.graphics':
+			return {ext: 'odg', mime: mimeType};
+		case 'application/vnd.oasis.opendocument.graphics-template':
+			return {ext: 'otg', mime: mimeType};
+		case 'application/vnd.openxmlformats-officedocument.presentationml.slideshow':
+			return {ext: 'ppsx', mime: mimeType};
+		case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
+			return {ext: 'xlsx', mime: mimeType};
+		case 'application/vnd.ms-excel.sheet.macroenabled':
+			return {ext: 'xlsm', mime: 'application/vnd.ms-excel.sheet.macroenabled.12'};
+		case 'application/vnd.openxmlformats-officedocument.spreadsheetml.template':
+			return {ext: 'xltx', mime: mimeType};
+		case 'application/vnd.ms-excel.template.macroenabled':
+			return {ext: 'xltm', mime: 'application/vnd.ms-excel.template.macroenabled.12'};
+		case 'application/vnd.ms-powerpoint.slideshow.macroenabled':
+			return {ext: 'ppsm', mime: 'application/vnd.ms-powerpoint.slideshow.macroenabled.12'};
+		case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
+			return {ext: 'docx', mime: mimeType};
+		case 'application/vnd.ms-word.document.macroenabled':
+			return {ext: 'docm', mime: 'application/vnd.ms-word.document.macroenabled.12'};
+		case 'application/vnd.openxmlformats-officedocument.wordprocessingml.template':
+			return {ext: 'dotx', mime: mimeType};
+		case 'application/vnd.ms-word.template.macroenabledtemplate':
+			return {ext: 'dotm', mime: 'application/vnd.ms-word.template.macroenabled.12'};
+		case 'application/vnd.openxmlformats-officedocument.presentationml.template':
+			return {ext: 'potx', mime: mimeType};
+		case 'application/vnd.ms-powerpoint.template.macroenabled':
+			return {ext: 'potm', mime: 'application/vnd.ms-powerpoint.template.macroenabled.12'};
+		case 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
+			return {ext: 'pptx', mime: mimeType};
+		case 'application/vnd.ms-powerpoint.presentation.macroenabled':
+			return {ext: 'pptm', mime: 'application/vnd.ms-powerpoint.presentation.macroenabled.12'};
+		case 'application/vnd.ms-visio.drawing':
+			return {ext: 'vsdx', mime: 'application/vnd.visio'};
+		case 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml':
+			return {ext: '3mf', mime: 'model/3mf'};
+		default:
+	}
+}
+function createOpenXmlZipDetectionState() {
+	return {
+		hasContentTypesEntry: false,
+		hasParsedContentTypesEntry: false,
+		isParsingContentTypes: false,
+		hasUnparseableContentTypes: false,
+		hasWordDirectory: false,
+		hasPresentationDirectory: false,
+		hasSpreadsheetDirectory: false,
+		hasThreeDimensionalModelEntry: false,
+	};
+}
+function updateOpenXmlZipDetectionStateFromFilename(openXmlState, filename) {
+	if (filename.startsWith('word/')) {
+		openXmlState.hasWordDirectory = true;
+	}
+	if (filename.startsWith('ppt/')) {
+		openXmlState.hasPresentationDirectory = true;
+	}
+	if (filename.startsWith('xl/')) {
+		openXmlState.hasSpreadsheetDirectory = true;
+	}
+	if (
+		filename.startsWith('3D/')
+		&& filename.endsWith('.model')
+	) {
+		openXmlState.hasThreeDimensionalModelEntry = true;
+	}
+}
+function getOpenXmlFileTypeFromDirectoryNames(openXmlState) {
+	if (openXmlState.hasWordDirectory) {
+		return {
+			ext: 'docx',
+			mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+		};
+	}
+	if (openXmlState.hasPresentationDirectory) {
+		return {
+			ext: 'pptx',
+			mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+		};
+	}
+	if (openXmlState.hasSpreadsheetDirectory) {
+		return {
+			ext: 'xlsx',
+			mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+		};
+	}
+	if (openXmlState.hasThreeDimensionalModelEntry) {
+		return {
+			ext: '3mf',
+			mime: 'model/3mf',
+		};
+	}
+}
+function getOpenXmlFileTypeFromZipEntries(openXmlState) {
+	// Only use directory-name heuristic when [Content_Types].xml was present in the archive
+	// but its handler was skipped (not invoked, not currently running, and not already resolved).
+	// This avoids guessing from directory names when content-type parsing already gave a definitive answer or failed.
+	if (
+		!openXmlState.hasContentTypesEntry
+		|| openXmlState.hasUnparseableContentTypes
+		|| openXmlState.isParsingContentTypes
+		|| openXmlState.hasParsedContentTypesEntry
+	) {
+		return;
+	}
+	return getOpenXmlFileTypeFromDirectoryNames(openXmlState);
+}
+function getOpenXmlMimeTypeFromContentTypesXml(xmlContent) {
+	// We only need the `ContentType="...main+xml"` value, so a small string scan is enough and avoids full XML parsing.
+	const endPosition = xmlContent.indexOf('.main+xml"');
+	if (endPosition === -1) {
+		const mimeType = 'application/vnd.ms-package.3dmanufacturing-3dmodel+xml';
+		if (xmlContent.includes(`ContentType="${mimeType}"`)) {
+			return mimeType;
+		}
+		return;
+	}
+	const truncatedContent = xmlContent.slice(0, endPosition);
+	const firstQuotePosition = truncatedContent.lastIndexOf('"');
+	// If no quote is found, `lastIndexOf` returns -1 and this intentionally falls back to the full truncated prefix.
+	return truncatedContent.slice(firstQuotePosition + 1);
+}
+const zipDataDescriptorSignature = 0x08_07_4B_50;
+const zipDataDescriptorLengthInBytes = 16;
+const zipDataDescriptorOverlapLengthInBytes = zipDataDescriptorLengthInBytes - 1;
+function findZipDataDescriptorOffset(buffer, bytesConsumed) {
+	if (buffer.length < zipDataDescriptorLengthInBytes) {
+		return -1;
+	}
+	const lastPossibleDescriptorOffset = buffer.length - zipDataDescriptorLengthInBytes;
+	for (let index = 0; index <= lastPossibleDescriptorOffset; index++) {
+		if (
+			Token.UINT32_LE.get(buffer, index) === zipDataDescriptorSignature
+			&& Token.UINT32_LE.get(buffer, index + 8) === bytesConsumed + index
+		) {
+			return index;
+		}
+	}
+	return -1;
+}
+async function readZipDataDescriptorEntryWithLimit(zipHandler, {shouldBuffer, maximumLength = maximumZipEntrySizeInBytes} = {}) {
+	const {syncBuffer} = zipHandler;
+	const {length: syncBufferLength} = syncBuffer;
+	const chunks = [];
+	let bytesConsumed = 0;
+	for (;;) {
+		const length = await zipHandler.tokenizer.peekBuffer(syncBuffer, {mayBeLess: true});
+		const dataDescriptorOffset = findZipDataDescriptorOffset(syncBuffer.subarray(0, length), bytesConsumed);
+		const retainedLength = dataDescriptorOffset >= 0
+			? 0
+			: (
+				length === syncBufferLength
+					? Math.min(zipDataDescriptorOverlapLengthInBytes, length - 1)
+					: 0
+			);
+		const chunkLength = dataDescriptorOffset >= 0 ? dataDescriptorOffset : length - retainedLength;
+		if (chunkLength === 0) {
+			break;
+		}
+		bytesConsumed += chunkLength;
+		if (bytesConsumed > maximumLength) {
+			throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
+		}
+		if (shouldBuffer) {
+			const data = new Uint8Array(chunkLength);
+			await zipHandler.tokenizer.readBuffer(data);
+			chunks.push(data);
+		} else {
+			await zipHandler.tokenizer.ignore(chunkLength);
+		}
+		if (dataDescriptorOffset >= 0) {
+			break;
+		}
+	}
+	if (!hasUnknownFileSize(zipHandler.tokenizer)) {
+		zipHandler.knownSizeDescriptorScannedBytes += bytesConsumed;
+	}
+	if (!shouldBuffer) {
+		return;
+	}
+	return mergeByteChunks(chunks, bytesConsumed);
+}
+function getRemainingZipScanBudget(zipHandler, startOffset) {
+	if (hasUnknownFileSize(zipHandler.tokenizer)) {
+		return Math.max(0, maximumUntrustedSkipSizeInBytes - (zipHandler.tokenizer.position - startOffset));
+	}
+	return Math.max(0, maximumZipEntrySizeInBytes - zipHandler.knownSizeDescriptorScannedBytes);
+}
+async function readZipEntryData(zipHandler, zipHeader, {shouldBuffer, maximumDescriptorLength = maximumZipEntrySizeInBytes} = {}) {
+	if (
+		zipHeader.dataDescriptor
+		&& zipHeader.compressedSize === 0
+	) {
+		return readZipDataDescriptorEntryWithLimit(zipHandler, {
+			shouldBuffer,
+			maximumLength: maximumDescriptorLength,
+		});
+	}
+	if (!shouldBuffer) {
+		await safeIgnore(zipHandler.tokenizer, zipHeader.compressedSize, {
+			maximumLength: hasUnknownFileSize(zipHandler.tokenizer) ? maximumZipEntrySizeInBytes : zipHandler.tokenizer.fileInfo.size,
+			reason: 'ZIP entry compressed data',
+		});
+		return;
+	}
+	const maximumLength = getMaximumZipBufferedReadLength(zipHandler.tokenizer);
+	if (
+		!Number.isFinite(zipHeader.compressedSize)
+		|| zipHeader.compressedSize < 0
+		|| zipHeader.compressedSize > maximumLength
+	) {
+		throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
+	}
+	const fileData = new Uint8Array(zipHeader.compressedSize);
+	await zipHandler.tokenizer.readBuffer(fileData);
+	return fileData;
+}
+// Override the default inflate to enforce decompression size limits, since @tokenizer/inflate does not expose a configuration hook for this.
+ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
+	if (zipHeader.compressedMethod === 0) {
+		return callback(fileData);
+	}
+	if (zipHeader.compressedMethod !== 8) {
+		throw new Error(`Unsupported ZIP compression method: ${zipHeader.compressedMethod}`);
+	}
+	const uncompressedData = await decompressDeflateRawWithLimit(fileData, {maximumLength: maximumZipEntrySizeInBytes});
+	return callback(uncompressedData);
+};
+ZipHandler.prototype.unzip = async function (fileCallback) {
+	let stop = false;
+	let zipEntryCount = 0;
+	const zipScanStart = this.tokenizer.position;
+	this.knownSizeDescriptorScannedBytes = 0;
+	do {
+		if (hasExceededUnknownSizeScanBudget(this.tokenizer, zipScanStart, maximumUntrustedSkipSizeInBytes)) {
+			throw new ParserHardLimitError(`ZIP stream probing exceeds ${maximumUntrustedSkipSizeInBytes} bytes`);
+		}
+		const zipHeader = await this.readLocalFileHeader();
+		if (!zipHeader) {
+			break;
+		}
+		zipEntryCount++;
+		if (zipEntryCount > maximumZipEntryCount) {
+			throw new Error(`ZIP entry count exceeds ${maximumZipEntryCount}`);
+		}
+		const next = fileCallback(zipHeader);
+		stop = Boolean(next.stop);
+		await this.tokenizer.ignore(zipHeader.extraFieldLength);
+		const fileData = await readZipEntryData(this, zipHeader, {
+			shouldBuffer: Boolean(next.handler),
+			maximumDescriptorLength: Math.min(maximumZipEntrySizeInBytes, getRemainingZipScanBudget(this, zipScanStart)),
+		});
+		if (next.handler) {
+			await this.inflate(zipHeader, fileData, next.handler);
+		}
+		if (zipHeader.dataDescriptor) {
+			const dataDescriptor = new Uint8Array(zipDataDescriptorLengthInBytes);
+			await this.tokenizer.readBuffer(dataDescriptor);
+			if (Token.UINT32_LE.get(dataDescriptor, 0) !== zipDataDescriptorSignature) {
+				throw new Error(`Expected data-descriptor-signature at position ${this.tokenizer.position - dataDescriptor.length}`);
+			}
+		}
+		if (hasExceededUnknownSizeScanBudget(this.tokenizer, zipScanStart, maximumUntrustedSkipSizeInBytes)) {
+			throw new ParserHardLimitError(`ZIP stream probing exceeds ${maximumUntrustedSkipSizeInBytes} bytes`);
+		}
+	} while (!stop);
+};
+export async function detectZip(tokenizer) {
+	let fileType;
+	const openXmlState = createOpenXmlZipDetectionState();
+	const iWorkState = createIWorkZipDetectionState();
+	try {
+		await new ZipHandler(tokenizer).unzip(zipHeader => {
+			updateOpenXmlZipDetectionStateFromFilename(openXmlState, zipHeader.filename);
+			updateIWorkZipDetectionStateFromFilename(iWorkState, zipHeader.filename);
+			// Early exit for Keynote or Numbers when markers are definitive
+			if (iWorkState.hasDocumentEntry && (iWorkState.hasMasterSlideEntry || iWorkState.hasTablesEntry)) {
+				fileType = getIWorkFileTypeFromZipEntries(iWorkState);
+				return {stop: true};
+			}
+			const isOpenXmlContentTypesEntry = zipHeader.filename === '[Content_Types].xml';
+			const openXmlFileTypeFromEntries = getOpenXmlFileTypeFromZipEntries(openXmlState);
+			if (
+				!isOpenXmlContentTypesEntry
+				&& openXmlFileTypeFromEntries
+			) {
+				fileType = openXmlFileTypeFromEntries;
+				return {
+					stop: true,
+				};
+			}
+			switch (zipHeader.filename) {
+				case 'META-INF/mozilla.rsa':
+					fileType = {
+						ext: 'xpi',
+						mime: 'application/x-xpinstall',
+					};
+					return {
+						stop: true,
+					};
+				case 'META-INF/MANIFEST.MF':
+					fileType = {
+						ext: 'jar',
+						mime: 'application/java-archive',
+					};
+					return {
+						stop: true,
+					};
+				case 'mimetype':
+					if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
+						return {};
+					}
+					return {
+						async handler(fileData) {
+							// Use TextDecoder to decode the UTF-8 encoded data
+							const mimeType = new TextDecoder('utf-8').decode(fileData).trim();
+							fileType = getFileTypeFromMimeType(mimeType);
+						},
+						stop: true,
+					};
+				case '[Content_Types].xml': {
+					openXmlState.hasContentTypesEntry = true;
+					if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
+						openXmlState.hasUnparseableContentTypes = true;
+						return {};
+					}
+					openXmlState.isParsingContentTypes = true;
+					return {
+						async handler(fileData) {
+							// Use TextDecoder to decode the UTF-8 encoded data
+							const xmlContent = new TextDecoder('utf-8').decode(fileData);
+							const mimeType = getOpenXmlMimeTypeFromContentTypesXml(xmlContent);
+							if (mimeType) {
+								fileType = getFileTypeFromMimeType(mimeType);
+							}
+							openXmlState.hasParsedContentTypesEntry = true;
+							openXmlState.isParsingContentTypes = false;
+						},
+						stop: true,
+					};
+				}
+				default:
+					if (/classes\d*\.dex/v.test(zipHeader.filename)) {
+						fileType = {
+							ext: 'apk',
+							mime: 'application/vnd.android.package-archive',
+						};
+						return {stop: true};
+					}
+					return {};
+			}
+		});
+	} catch (error) {
+		if (!isRecoverableZipError(error)) {
+			throw error;
+		}
+		if (openXmlState.isParsingContentTypes) {
+			openXmlState.isParsingContentTypes = false;
+			openXmlState.hasUnparseableContentTypes = true;
+		}
+		// When the stream was truncated before reaching [Content_Types].xml, use directory names as a fallback.
+		// This handles LibreOffice-created OOXML files where [Content_Types].xml appears after content entries.
+		if (!fileType && error instanceof strtok3.EndOfStreamError && !openXmlState.hasContentTypesEntry) {
+			fileType = getOpenXmlFileTypeFromDirectoryNames(openXmlState);
+		}
+	}
+	const iWorkFileType = hasUnknownFileSize(tokenizer)
+		&& iWorkState.hasDocumentEntry
+		&& !iWorkState.hasMasterSlideEntry
+		&& !iWorkState.hasTablesEntry
+		&& !iWorkState.hasCalculationEngineEntry
+		? undefined
+		: getIWorkFileTypeFromZipEntries(iWorkState);
+	return fileType ?? getOpenXmlFileTypeFromZipEntries(openXmlState) ?? iWorkFileType ?? {
+		ext: 'zip',
+		mime: 'application/zip',
+	};
+}