mime-bytes 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.js ADDED
@@ -0,0 +1,11 @@
1
+ // Main export file for mime-bytes package
2
+ // Export the main detector class and convenience functions
3
+ export { FileTypeDetector, defaultDetector, detectFromStream, detectFromBuffer, detectFromExtension } from './file-type-detector';
4
+ // Export registry types and functions
5
+ export { FILE_TYPES, CONTENT_TYPE_MAPPINGS, getFileTypeByMagicBytes, getFileTypeByExtension, getFileTypesByCategory, getContentTypeByExtension, detectCharset } from './file-types-registry';
6
+ // Export peek stream functionality
7
+ export { peek, BufferPeekStream } from './peak';
8
+ // Export utility functions
9
+ export * from './utils/magic-bytes';
10
+ export * from './utils/mime-types';
11
+ export * from './utils/extensions';
package/esm/peak.js ADDED
@@ -0,0 +1,90 @@
1
+ // TypeScript implementation of peek stream for efficient file type detection
2
+ // Based on reference-packages/buffer-peak/peak.js
3
+ import { Transform } from 'stream';
4
+ export class BufferPeekStream extends Transform {
5
+ peekBytes;
6
+ buffer;
7
+ bufferLength;
8
+ peeked;
9
+ constructor(options) {
10
+ super(options);
11
+ this.peekBytes = options.peekBytes || 16;
12
+ this.buffer = Buffer.alloc(0);
13
+ this.bufferLength = 0;
14
+ this.peeked = false;
15
+ }
16
+ _transform(chunk, encoding, callback) {
17
+ if (this.peeked) {
18
+ // After peeking, just pass through
19
+ this.push(chunk);
20
+ callback();
21
+ return;
22
+ }
23
+ // Accumulate data until we have enough to peek
24
+ const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, encoding);
25
+ this.buffer = Buffer.concat([this.buffer, chunkBuffer]);
26
+ this.bufferLength += chunkBuffer.length;
27
+ if (this.bufferLength >= this.peekBytes) {
28
+ // We have enough data to peek
29
+ this.peeked = true;
30
+ // Emit the peek event with the requested bytes
31
+ const peekBuffer = this.buffer.slice(0, this.peekBytes);
32
+ this.emit('peek', peekBuffer);
33
+ // Push all accumulated data
34
+ this.push(this.buffer);
35
+ this.buffer = Buffer.alloc(0);
36
+ this.bufferLength = 0;
37
+ callback();
38
+ }
39
+ else {
40
+ // Need more data
41
+ callback();
42
+ }
43
+ }
44
+ _flush(callback) {
45
+ if (!this.peeked && this.bufferLength > 0) {
46
+ // Not enough data was received, emit what we have
47
+ this.peeked = true;
48
+ this.emit('peek', this.buffer);
49
+ this.push(this.buffer);
50
+ }
51
+ callback();
52
+ }
53
+ }
54
+ export function peek(source, bytesOrCallback, callback) {
55
+ let bytes;
56
+ let cb;
57
+ if (typeof bytesOrCallback === 'function') {
58
+ bytes = 16; // Default peek bytes
59
+ cb = bytesOrCallback;
60
+ }
61
+ else {
62
+ bytes = bytesOrCallback;
63
+ cb = callback;
64
+ }
65
+ const dest = new BufferPeekStream({ peekBytes: bytes });
66
+ if (cb) {
67
+ dest.once('peek', (buffer) => {
68
+ cb(null, buffer, dest);
69
+ });
70
+ dest.once('error', (err) => {
71
+ cb(err, Buffer.alloc(0), dest);
72
+ });
73
+ }
74
+ return source.pipe(dest);
75
+ }
76
+ // Promise-based version
77
+ peek.promise = function (source, bytes = 16) {
78
+ return new Promise((resolve, reject) => {
79
+ const dest = peek(source, bytes, (err, buffer, stream) => {
80
+ if (err) {
81
+ reject(err);
82
+ }
83
+ else {
84
+ resolve([buffer, stream]);
85
+ }
86
+ });
87
+ // Handle source errors
88
+ source.once('error', reject);
89
+ });
90
+ };
@@ -0,0 +1,114 @@
1
+ // Extension utility functions
2
+ // Normalize file extension
3
+ export function normalizeExtension(extension) {
4
+ return extension.toLowerCase().replace(/^\./, '');
5
+ }
6
+ // Extract extension from filename
7
+ export function getExtension(filename) {
8
+ const lastDot = filename.lastIndexOf('.');
9
+ if (lastDot === -1 || lastDot === filename.length - 1) {
10
+ return '';
11
+ }
12
+ return normalizeExtension(filename.substring(lastDot + 1));
13
+ }
14
+ // Check if extension is commonly associated with compressed files
15
+ export function isCompressedExtension(extension) {
16
+ const compressed = [
17
+ 'zip', 'rar', '7z', 'tar', 'gz', 'bz2', 'xz', 'lz', 'lzma', 'z',
18
+ 'tgz', 'tbz', 'tbz2', 'txz', 'tlz', 'arc', 'arj', 'cab', 'dmg',
19
+ 'iso', 'lha', 'lzh', 'pkg', 'deb', 'rpm', 'msi', 'jar', 'war',
20
+ 'ear', 'sar', 'aar', 'apk', 'ipa', 'xpi', 'egg', 'whl', 'gem'
21
+ ];
22
+ return compressed.includes(normalizeExtension(extension));
23
+ }
24
+ // Check if extension is commonly associated with document files
25
+ export function isDocumentExtension(extension) {
26
+ const documents = [
27
+ 'pdf', 'doc', 'docx', 'odt', 'rtf', 'tex', 'wpd', 'txt', 'md',
28
+ 'xls', 'xlsx', 'ods', 'csv', 'ppt', 'pptx', 'odp', 'epub', 'mobi',
29
+ 'azw', 'azw3', 'fb2', 'lit', 'pdb', 'ps', 'eps', 'indd', 'xps'
30
+ ];
31
+ return documents.includes(normalizeExtension(extension));
32
+ }
33
+ // Check if extension is commonly associated with media files
34
+ export function isMediaExtension(extension) {
35
+ const media = [
36
+ // Video
37
+ 'mp4', 'avi', 'mkv', 'mov', 'wmv', 'flv', 'webm', 'vob', 'ogv',
38
+ 'ogg', 'm4v', '3gp', '3g2', 'mpg', 'mpeg', 'mp2', 'mpe', 'mpv',
39
+ 'm2v', 'svi', 'mxf', 'roq', 'nsv', 'f4v', 'f4p', 'f4a', 'f4b',
40
+ // Audio
41
+ 'mp3', 'wav', 'flac', 'aac', 'ogg', 'oga', 'wma', 'm4a', 'opus',
42
+ 'ape', 'wv', 'amr', 'ac3', 'dts', 'spx', 'mid', 'midi', 'kar',
43
+ 'aiff', 'aif', 'aifc', 'au', 'snd', 'voc', 'ra', 'rm', 'ram'
44
+ ];
45
+ return media.includes(normalizeExtension(extension));
46
+ }
47
+ // Check if extension is commonly associated with image files
48
+ export function isImageExtension(extension) {
49
+ const images = [
50
+ 'jpg', 'jpeg', 'png', 'gif', 'bmp', 'svg', 'webp', 'ico', 'tif',
51
+ 'tiff', 'psd', 'raw', 'heif', 'heic', 'indd', 'ai', 'eps', 'ps',
52
+ 'xcf', 'cdr', 'cmx', 'dib', 'jxr', 'hdp', 'wdp', 'cur', 'icns',
53
+ 'pbm', 'pgm', 'ppm', 'pnm', 'pcx', 'dcx', 'dds', 'dng', 'cr2',
54
+ 'cr3', 'crw', 'nef', 'nrw', 'orf', 'raf', 'rw2', 'rwl', 'srw',
55
+ 'arw', 'srf', 'sr2', 'bay', 'cap', 'iiq', 'eip', 'dcs', 'dcr',
56
+ 'drf', 'k25', 'kdc', 'mdc', 'mef', 'mos', 'mrw', 'pef', 'ptx',
57
+ 'pxn', 'r3d', 'x3f', 'qoi'
58
+ ];
59
+ return images.includes(normalizeExtension(extension));
60
+ }
61
+ // Check if extension is commonly associated with executable files
62
+ export function isExecutableExtension(extension) {
63
+ const executables = [
64
+ 'exe', 'dll', 'so', 'dylib', 'app', 'deb', 'rpm', 'dmg', 'pkg',
65
+ 'msi', 'bat', 'cmd', 'sh', 'ps1', 'vbs', 'js', 'jar', 'class',
66
+ 'pyc', 'pyo', 'elf', 'o', 'out', 'bin', 'run', 'com', 'scr',
67
+ 'cpl', 'ocx', 'sys', 'drv', 'efi', 'mui', 'ax', 'ime', 'rs',
68
+ 'tsp', 'fon', 'wasm', 'ko', 'mod', 'prx', 'puff', 'axf', 'dex'
69
+ ];
70
+ return executables.includes(normalizeExtension(extension));
71
+ }
72
+ // Get category from extension
73
+ export function getCategoryFromExtension(extension) {
74
+ const ext = normalizeExtension(extension);
75
+ if (isImageExtension(ext))
76
+ return 'image';
77
+ if (isMediaExtension(ext))
78
+ return 'media';
79
+ if (isDocumentExtension(ext))
80
+ return 'document';
81
+ if (isCompressedExtension(ext))
82
+ return 'archive';
83
+ if (isExecutableExtension(ext))
84
+ return 'executable';
85
+ // Check for specific categories
86
+ const categories = {
87
+ font: ['ttf', 'otf', 'woff', 'woff2', 'eot', 'fon', 'fnt'],
88
+ database: ['db', 'db3', 'sqlite', 'sqlite3', 'mdb', 'accdb', 'dbf'],
89
+ code: ['js', 'ts', 'jsx', 'tsx', 'py', 'java', 'c', 'cpp', 'h', 'hpp', 'cs', 'php', 'rb', 'go', 'rs', 'swift', 'kt', 'scala', 'r', 'lua', 'pl', 'sh', 'bash', 'zsh', 'fish', 'ps1', 'psm1', 'psd1', 'bat', 'cmd'],
90
+ config: ['json', 'xml', 'yaml', 'yml', 'toml', 'ini', 'cfg', 'conf', 'properties', 'env'],
91
+ text: ['txt', 'md', 'markdown', 'rst', 'asciidoc', 'adoc', 'org', 'tex', 'log']
92
+ };
93
+ for (const [category, extensions] of Object.entries(categories)) {
94
+ if (extensions.includes(ext)) {
95
+ return category;
96
+ }
97
+ }
98
+ return 'other';
99
+ }
100
+ // Common double extensions (e.g., .tar.gz)
101
+ const DOUBLE_EXTENSIONS = [
102
+ 'tar.gz', 'tar.bz2', 'tar.xz', 'tar.lz', 'tar.lzma', 'tar.Z',
103
+ 'tar.br', 'tar.zst', 'user.js', 'min.js', 'min.css', 'd.ts'
104
+ ];
105
+ // Get double extension if applicable
106
+ export function getDoubleExtension(filename) {
107
+ const lower = filename.toLowerCase();
108
+ for (const doubleExt of DOUBLE_EXTENSIONS) {
109
+ if (lower.endsWith('.' + doubleExt)) {
110
+ return doubleExt;
111
+ }
112
+ }
113
+ return null;
114
+ }
@@ -0,0 +1,61 @@
1
+ // Magic bytes utility functions
2
+ export function hexToBuffer(hexArray) {
3
+ const bytes = hexArray.map(hex => {
4
+ if (hex === '?')
5
+ return 0; // Wildcard placeholder
6
+ return parseInt(hex.replace(/0x/i, ''), 16);
7
+ });
8
+ return Buffer.from(bytes);
9
+ }
10
+ export function bufferToHex(buffer) {
11
+ return buffer.toString('hex');
12
+ }
13
+ export function compareBytes(buffer, pattern, offset = 0) {
14
+ // Empty patterns should not match
15
+ if (!pattern || pattern.length === 0) {
16
+ return false;
17
+ }
18
+ if (offset + pattern.length > buffer.length) {
19
+ return false;
20
+ }
21
+ for (let i = 0; i < pattern.length; i++) {
22
+ if (pattern[i] === '?')
23
+ continue; // Skip wildcards
24
+ const expectedByte = parseInt(pattern[i].replace(/0x/i, ''), 16);
25
+ const actualByte = buffer[offset + i];
26
+ if (expectedByte !== actualByte) {
27
+ return false;
28
+ }
29
+ }
30
+ return true;
31
+ }
32
+ export function findMagicBytes(buffer, patterns) {
33
+ for (let i = 0; i < patterns.length; i++) {
34
+ const { pattern, offset = 0 } = patterns[i];
35
+ if (compareBytes(buffer, pattern, offset)) {
36
+ return i;
37
+ }
38
+ }
39
+ return -1;
40
+ }
41
+ // Extract a specific number of bytes from buffer at offset
42
+ export function extractBytes(buffer, offset, length) {
43
+ if (offset + length > buffer.length) {
44
+ return buffer.slice(offset);
45
+ }
46
+ return buffer.slice(offset, offset + length);
47
+ }
48
+ // Check if buffer contains text-like content
49
+ export function isTextLike(buffer) {
50
+ const sampleSize = Math.min(buffer.length, 512);
51
+ let printableCount = 0;
52
+ for (let i = 0; i < sampleSize; i++) {
53
+ const byte = buffer[i];
54
+ // Check for printable ASCII characters, tabs, newlines, carriage returns
55
+ if ((byte >= 32 && byte <= 126) || byte === 9 || byte === 10 || byte === 13) {
56
+ printableCount++;
57
+ }
58
+ }
59
+ // If more than 85% are printable characters, likely text
60
+ return (printableCount / sampleSize) > 0.85;
61
+ }
@@ -0,0 +1,90 @@
1
+ // MIME type utility functions
2
+ // Common MIME type categories
3
+ export const MIME_CATEGORIES = {
4
+ IMAGE: 'image',
5
+ VIDEO: 'video',
6
+ AUDIO: 'audio',
7
+ APPLICATION: 'application',
8
+ TEXT: 'text',
9
+ FONT: 'font'
10
+ };
11
+ // Extract category from MIME type
12
+ export function getMimeCategory(mimeType) {
13
+ const category = mimeType.split('/')[0];
14
+ if (Object.values(MIME_CATEGORIES).includes(category)) {
15
+ return category;
16
+ }
17
+ return null;
18
+ }
19
+ // Check if MIME type is binary
20
+ export function isBinaryMimeType(mimeType) {
21
+ const textTypes = [
22
+ 'text/',
23
+ 'application/json',
24
+ 'application/xml',
25
+ 'application/javascript',
26
+ 'application/typescript',
27
+ 'application/x-sh',
28
+ 'application/x-csh',
29
+ 'application/x-python',
30
+ 'application/x-ruby',
31
+ 'application/x-perl'
32
+ ];
33
+ return !textTypes.some(type => mimeType.startsWith(type));
34
+ }
35
+ // Normalize MIME type (remove parameters)
36
+ export function normalizeMimeType(mimeType) {
37
+ return mimeType.split(';')[0].trim().toLowerCase();
38
+ }
39
+ // Get file category from MIME type
40
+ export function getFileCategoryFromMime(mimeType) {
41
+ const normalized = normalizeMimeType(mimeType);
42
+ if (normalized.startsWith('image/'))
43
+ return 'image';
44
+ if (normalized.startsWith('video/'))
45
+ return 'video';
46
+ if (normalized.startsWith('audio/'))
47
+ return 'audio';
48
+ if (normalized.startsWith('font/'))
49
+ return 'font';
50
+ if (normalized.startsWith('text/'))
51
+ return 'text';
52
+ // Special cases for application types
53
+ if (normalized.includes('zip') || normalized.includes('compressed') || normalized.includes('archive')) {
54
+ return 'archive';
55
+ }
56
+ if (normalized.includes('pdf') || normalized.includes('document') || normalized.includes('msword') || normalized.includes('officedocument')) {
57
+ return 'document';
58
+ }
59
+ if (normalized.includes('executable') || normalized.includes('x-msdownload') || normalized.includes('x-elf') || normalized.includes('x-mach')) {
60
+ return 'executable';
61
+ }
62
+ if (normalized.includes('sqlite') || normalized.includes('database')) {
63
+ return 'database';
64
+ }
65
+ return 'other';
66
+ }
67
+ // Common MIME type aliases
68
+ const MIME_ALIASES = {
69
+ 'application/x-javascript': 'application/javascript',
70
+ 'text/javascript': 'application/javascript',
71
+ 'application/x-mpegURL': 'application/vnd.apple.mpegurl',
72
+ 'audio/mp3': 'audio/mpeg',
73
+ 'audio/x-mp3': 'audio/mpeg',
74
+ 'audio/x-mpeg': 'audio/mpeg',
75
+ 'video/x-m4v': 'video/mp4',
76
+ 'audio/x-m4a': 'audio/mp4',
77
+ 'image/jpg': 'image/jpeg',
78
+ 'image/x-png': 'image/png',
79
+ 'image/x-icon': 'image/vnd.microsoft.icon',
80
+ 'text/xml': 'application/xml',
81
+ 'application/x-compressed': 'application/x-compress',
82
+ 'application/x-gzip': 'application/gzip',
83
+ 'application/x-bzip': 'application/x-bzip2',
84
+ 'application/x-tar': 'application/tar'
85
+ };
86
+ // Resolve MIME type aliases
87
+ export function resolveMimeAlias(mimeType) {
88
+ const normalized = normalizeMimeType(mimeType);
89
+ return MIME_ALIASES[normalized] || normalized;
90
+ }
@@ -0,0 +1,101 @@
1
+ import { Readable } from 'stream';
2
+ import { FileTypeDefinition, DetectionResult } from './file-types-registry';
3
+ export interface FileTypeDetectorOptions {
4
+ peekBytes?: number;
5
+ checkMultipleOffsets?: boolean;
6
+ maxOffset?: number;
7
+ }
8
+ export declare class FileTypeDetector {
9
+ private fileTypes;
10
+ private options;
11
+ private magicBytesCache;
12
+ private extensionCache;
13
+ constructor(options?: FileTypeDetectorOptions);
14
+ /**
15
+ * Detect file type from a stream (PRIMARY METHOD - memory efficient)
16
+ * @param stream - Readable stream to detect from
17
+ * @returns Detection result or null if not detected
18
+ */
19
+ detectFromStream(stream: Readable): Promise<DetectionResult | null>;
20
+ /**
21
+ * Detect file type from an already-read buffer
22
+ * @param buffer - Buffer to detect from
23
+ * @returns Detection result or null if not detected
24
+ */
25
+ detectFromBuffer(buffer: Buffer): Promise<DetectionResult | null>;
26
+ /**
27
+ * Detect file type from extension only
28
+ * @param extension - File extension (with or without dot)
29
+ * @returns Array of possible detection results with lower confidence
30
+ */
31
+ detectFromExtension(extension: string): DetectionResult[];
32
+ /**
33
+ * Get all file types by category
34
+ * @param category - Category name (e.g., 'image', 'video', 'archive')
35
+ * @returns Array of file type definitions
36
+ */
37
+ getByCategory(category: string): FileTypeDefinition[];
38
+ /**
39
+ * Add a new file type dynamically
40
+ * @param fileType - File type definition to add
41
+ */
42
+ addFileType(fileType: FileTypeDefinition): void;
43
+ /**
44
+ * Remove a file type by name
45
+ * @param name - Name of the file type to remove
46
+ */
47
+ removeFileType(name: string): boolean;
48
+ /**
49
+ * Clear all caches
50
+ */
51
+ clearCache(): void;
52
+ /**
53
+ * Get all registered file types
54
+ * @returns Array of all file type definitions
55
+ */
56
+ getAllFileTypes(): FileTypeDefinition[];
57
+ /**
58
+ * Check magic bytes at a specific offset
59
+ * @private
60
+ */
61
+ private checkMagicBytesAtOffset;
62
+ /**
63
+ * Generate offsets to check based on buffer size
64
+ * @private
65
+ */
66
+ private generateOffsets;
67
+ /**
68
+ * Enhance detection result with additional information
69
+ * @private
70
+ */
71
+ private enhanceDetectionResult;
72
+ /**
73
+ * Detect file type with fallback to extension
74
+ * @param input - Readable stream or Buffer
75
+ * @param filename - Optional filename for extension fallback
76
+ * @returns Detection result with attached stream for reuse (if input was stream)
77
+ */
78
+ detectWithFallback(input: Readable | Buffer, filename?: string): Promise<(DetectionResult & {
79
+ _stream?: Readable;
80
+ }) | null>;
81
+ /**
82
+ * Check if a buffer matches a specific file type
83
+ * @param buffer - Buffer to check
84
+ * @param fileTypeName - Name of the file type to check against
85
+ * @returns True if matches, false otherwise
86
+ */
87
+ isFileType(buffer: Buffer, fileTypeName: string): boolean;
88
+ /**
89
+ * Get statistics about registered file types
90
+ * @returns Statistics object
91
+ */
92
+ getStatistics(): {
93
+ totalTypes: number;
94
+ byCategory: Record<string, number>;
95
+ byMimePrefix: Record<string, number>;
96
+ };
97
+ }
98
+ export declare const defaultDetector: FileTypeDetector;
99
+ export declare function detectFromStream(stream: Readable): Promise<DetectionResult | null>;
100
+ export declare function detectFromBuffer(buffer: Buffer): Promise<DetectionResult | null>;
101
+ export declare function detectFromExtension(extension: string): DetectionResult[];