mime-bytes 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,381 @@
1
+ "use strict";
2
+ // Main file type detector class with stream-focused API
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ exports.defaultDetector = exports.FileTypeDetector = void 0;
5
+ exports.detectFromStream = detectFromStream;
6
+ exports.detectFromBuffer = detectFromBuffer;
7
+ exports.detectFromExtension = detectFromExtension;
8
+ const peak_1 = require("./peak");
9
+ const file_types_registry_1 = require("./file-types-registry");
10
+ const magic_bytes_1 = require("./utils/magic-bytes");
11
+ const extensions_1 = require("./utils/extensions");
12
+ const mime_types_1 = require("./utils/mime-types");
13
+ class FileTypeDetector {
14
+ fileTypes;
15
+ options;
16
+ magicBytesCache;
17
+ extensionCache;
18
+ constructor(options = {}) {
19
+ // Create a copy of FILE_TYPES to avoid modifying the global registry
20
+ this.fileTypes = [...file_types_registry_1.FILE_TYPES];
21
+ this.options = {
22
+ peekBytes: options.peekBytes || 32,
23
+ checkMultipleOffsets: options.checkMultipleOffsets !== false,
24
+ maxOffset: options.maxOffset || 12
25
+ };
26
+ this.magicBytesCache = new Map();
27
+ this.extensionCache = new Map();
28
+ }
29
+ /**
30
+ * Detect file type from a stream (PRIMARY METHOD - memory efficient)
31
+ * @param stream - Readable stream to detect from
32
+ * @returns Detection result or null if not detected
33
+ */
34
+ async detectFromStream(stream) {
35
+ try {
36
+ const [buffer, peekStream] = await peak_1.peek.promise(stream, this.options.peekBytes);
37
+ // Check multiple offsets for different file types
38
+ const result = await this.detectFromBuffer(buffer);
39
+ // Important: Return the peek stream so it can be used for further processing
40
+ // The caller should use peekStream instead of the original stream
41
+ if (result) {
42
+ result._stream = peekStream;
43
+ }
44
+ return result;
45
+ }
46
+ catch (error) {
47
+ // Handle stream errors gracefully
48
+ // Only log errors in non-test environments
49
+ if (process.env.NODE_ENV !== 'test') {
50
+ console.error('Error detecting file type from stream:', error);
51
+ }
52
+ return null;
53
+ }
54
+ }
55
+ /**
56
+ * Detect file type from an already-read buffer
57
+ * @param buffer - Buffer to detect from
58
+ * @returns Detection result or null if not detected
59
+ */
60
+ async detectFromBuffer(buffer) {
61
+ if (!buffer || buffer.length === 0) {
62
+ return null;
63
+ }
64
+ // Check multiple offsets if enabled
65
+ const offsets = this.options.checkMultipleOffsets
66
+ ? this.generateOffsets(buffer.length)
67
+ : [0];
68
+ for (const offset of offsets) {
69
+ const fileType = this.checkMagicBytesAtOffset(buffer, offset);
70
+ if (fileType) {
71
+ return this.enhanceDetectionResult(fileType, buffer);
72
+ }
73
+ }
74
+ // No magic bytes matched, but we can still detect charset for unknown files
75
+ const charset = (0, file_types_registry_1.detectCharset)(buffer);
76
+ if (charset !== 'binary') {
77
+ // Return a generic text file result
78
+ return {
79
+ name: 'text',
80
+ mimeType: 'text/plain',
81
+ extensions: ['txt'],
82
+ charset,
83
+ contentType: 'text/plain',
84
+ confidence: 0.5 // Lower confidence since we only detected charset
85
+ };
86
+ }
87
+ return null;
88
+ }
89
+ /**
90
+ * Detect file type from extension only
91
+ * @param extension - File extension (with or without dot)
92
+ * @returns Array of possible detection results with lower confidence
93
+ */
94
+ detectFromExtension(extension) {
95
+ const cleanExt = (0, extensions_1.normalizeExtension)(extension);
96
+ // Check cache first
97
+ if (this.extensionCache.has(cleanExt)) {
98
+ const cachedTypes = this.extensionCache.get(cleanExt);
99
+ return cachedTypes.map(fileType => ({
100
+ name: fileType.name,
101
+ mimeType: (0, mime_types_1.resolveMimeAlias)(fileType.mimeType),
102
+ extensions: fileType.extensions,
103
+ charset: 'unknown', // Can't determine charset from extension alone
104
+ contentType: (0, file_types_registry_1.getContentTypeByExtension)(cleanExt) || fileType.mimeType,
105
+ confidence: 0.8 // Lower confidence for extension-only detection
106
+ }));
107
+ }
108
+ const fileTypes = (0, file_types_registry_1.getFileTypeByExtension)(cleanExt);
109
+ // Cache the result
110
+ this.extensionCache.set(cleanExt, fileTypes);
111
+ return fileTypes.map(fileType => ({
112
+ name: fileType.name,
113
+ mimeType: (0, mime_types_1.resolveMimeAlias)(fileType.mimeType),
114
+ extensions: fileType.extensions,
115
+ charset: 'unknown', // Can't determine charset from extension alone
116
+ contentType: (0, file_types_registry_1.getContentTypeByExtension)(cleanExt) || fileType.mimeType,
117
+ confidence: 0.8 // Lower confidence for extension-only detection
118
+ }));
119
+ }
120
+ /**
121
+ * Get all file types by category
122
+ * @param category - Category name (e.g., 'image', 'video', 'archive')
123
+ * @returns Array of file type definitions
124
+ */
125
+ getByCategory(category) {
126
+ return (0, file_types_registry_1.getFileTypesByCategory)(category);
127
+ }
128
+ /**
129
+ * Add a new file type dynamically
130
+ * @param fileType - File type definition to add
131
+ */
132
+ addFileType(fileType) {
133
+ this.fileTypes.push(fileType);
134
+ // Clear caches when file types change
135
+ this.clearCache();
136
+ }
137
+ /**
138
+ * Remove a file type by name
139
+ * @param name - Name of the file type to remove
140
+ */
141
+ removeFileType(name) {
142
+ const index = this.fileTypes.findIndex(ft => ft.name === name);
143
+ if (index !== -1) {
144
+ this.fileTypes.splice(index, 1);
145
+ // Clear caches when file types change
146
+ this.clearCache();
147
+ return true;
148
+ }
149
+ return false;
150
+ }
151
+ /**
152
+ * Clear all caches
153
+ */
154
+ clearCache() {
155
+ this.magicBytesCache.clear();
156
+ this.extensionCache.clear();
157
+ }
158
+ /**
159
+ * Get all registered file types
160
+ * @returns Array of all file type definitions
161
+ */
162
+ getAllFileTypes() {
163
+ return [...this.fileTypes];
164
+ }
165
+ /**
166
+ * Check magic bytes at a specific offset
167
+ * @private
168
+ */
169
+ checkMagicBytesAtOffset(buffer, offset) {
170
+ // Check each file type in order
171
+ for (const fileType of this.fileTypes) {
172
+ // Skip if this file type requires a different offset
173
+ if (fileType.offset !== undefined && fileType.offset !== offset)
174
+ continue;
175
+ // Skip if offset is not 0 and file type doesn't specify an offset
176
+ if (offset > 0 && fileType.offset === undefined)
177
+ continue;
178
+ // Check if magic bytes match
179
+ if ((0, magic_bytes_1.compareBytes)(buffer, fileType.magicBytes, offset)) {
180
+ return fileType;
181
+ }
182
+ }
183
+ return null;
184
+ }
185
+ /**
186
+ * Generate offsets to check based on buffer size
187
+ * @private
188
+ */
189
+ generateOffsets(bufferLength) {
190
+ const offsets = [];
191
+ for (let i = 0; i <= this.options.maxOffset && i < bufferLength; i += 4) {
192
+ offsets.push(i);
193
+ }
194
+ return offsets;
195
+ }
196
+ /**
197
+ * Enhance detection result with additional information
198
+ * @private
199
+ */
200
+ enhanceDetectionResult(fileType, buffer) {
201
+ // Use charset from file type definition if available, otherwise detect it
202
+ const charset = fileType.charset || (0, file_types_registry_1.detectCharset)(buffer);
203
+ // Determine content type based on extension and charset
204
+ let contentType = fileType.contentType || fileType.mimeType;
205
+ // Try charset-aware content type lookup first
206
+ if (fileType.extensions.length > 0) {
207
+ const primaryExt = fileType.extensions[0];
208
+ const charsetAwareContentType = (0, file_types_registry_1.getContentTypeForExtension)(primaryExt, charset);
209
+ if (charsetAwareContentType) {
210
+ contentType = charsetAwareContentType;
211
+ }
212
+ else if (!fileType.contentType) {
213
+ // Fall back to regular content type lookup if no charset-specific match
214
+ const inferredContentType = (0, file_types_registry_1.getContentTypeByExtension)(primaryExt);
215
+ if (inferredContentType) {
216
+ contentType = inferredContentType;
217
+ }
218
+ }
219
+ }
220
+ return {
221
+ name: fileType.name,
222
+ mimeType: (0, mime_types_1.resolveMimeAlias)(fileType.mimeType),
223
+ extensions: fileType.extensions,
224
+ charset,
225
+ contentType: (0, mime_types_1.resolveMimeAlias)(contentType),
226
+ confidence: 1.0 // High confidence for magic bytes detection
227
+ };
228
+ }
229
+ /**
230
+ * Detect file type with fallback to extension
231
+ * @param input - Readable stream or Buffer
232
+ * @param filename - Optional filename for extension fallback
233
+ * @returns Detection result with attached stream for reuse (if input was stream)
234
+ */
235
+ async detectWithFallback(input, filename) {
236
+ try {
237
+ let buffer;
238
+ let peekStream;
239
+ // Handle both Buffer and Readable inputs
240
+ if (Buffer.isBuffer(input)) {
241
+ buffer = input;
242
+ }
243
+ else {
244
+ const peekResult = await peak_1.peek.promise(input, this.options.peekBytes);
245
+ buffer = peekResult[0];
246
+ peekStream = peekResult[1];
247
+ }
248
+ // Try magic bytes detection first
249
+ const magicResult = await this.detectFromBuffer(buffer);
250
+ if (magicResult) {
251
+ // If we have a filename, try to enhance with more specific content type
252
+ if (filename) {
253
+ const lastDot = filename.lastIndexOf('.');
254
+ if (lastDot !== -1) {
255
+ const extension = filename.substring(lastDot + 1);
256
+ // Check for generic text files that might have specific content types
257
+ if (magicResult.name === 'text' && magicResult.charset) {
258
+ const contentType = (0, file_types_registry_1.getContentTypeForExtension)(extension, magicResult.charset);
259
+ if (contentType) {
260
+ // Enhance the result with charset-aware content type
261
+ const enhancedResult = {
262
+ ...magicResult,
263
+ contentType,
264
+ confidence: 0.8 // Higher confidence since we have both magic bytes and extension
265
+ };
266
+ return peekStream ? { ...enhancedResult, _stream: peekStream } : enhancedResult;
267
+ }
268
+ }
269
+ // Check for ZIP files that might be Office Open XML or other specific formats
270
+ if (magicResult.name === 'zip' || magicResult.mimeType === 'application/zip') {
271
+ const contentType = (0, file_types_registry_1.getContentTypeForExtension)(extension, magicResult.charset || 'binary');
272
+ if (contentType && contentType !== 'application/zip') {
273
+ // Enhance the result with more specific content type
274
+ const enhancedResult = {
275
+ ...magicResult,
276
+ contentType,
277
+ confidence: 0.9 // High confidence for known ZIP-based formats
278
+ };
279
+ return peekStream ? { ...enhancedResult, _stream: peekStream } : enhancedResult;
280
+ }
281
+ }
282
+ }
283
+ }
284
+ // Attach the peek stream for reuse if available
285
+ return peekStream ? { ...magicResult, _stream: peekStream } : magicResult;
286
+ }
287
+ // Fallback to extension if filename provided
288
+ if (filename) {
289
+ const lastDot = filename.lastIndexOf('.');
290
+ if (lastDot !== -1) {
291
+ const extension = filename.substring(lastDot + 1);
292
+ const charset = (0, file_types_registry_1.detectCharset)(buffer);
293
+ // Try charset-aware content type lookup
294
+ const contentType = (0, file_types_registry_1.getContentTypeForExtension)(extension, charset);
295
+ if (contentType) {
296
+ // Create a result based on extension and detected charset
297
+ const result = {
298
+ name: extension.toLowerCase(),
299
+ mimeType: contentType,
300
+ extensions: [extension.toLowerCase()],
301
+ charset,
302
+ contentType,
303
+ confidence: 0.7 // Higher confidence when charset matches
304
+ };
305
+ return peekStream ? { ...result, _stream: peekStream } : result;
306
+ }
307
+ // Fall back to regular extension detection
308
+ const extensionResults = this.detectFromExtension(extension);
309
+ if (extensionResults.length > 0) {
310
+ const result = {
311
+ ...extensionResults[0],
312
+ charset, // Use detected charset
313
+ confidence: 0.6 // Lower confidence for fallback
314
+ };
315
+ // Update content type if charset-specific mapping exists
316
+ const charsetContentType = (0, file_types_registry_1.getContentTypeForExtension)(extension, charset);
317
+ if (charsetContentType) {
318
+ result.contentType = charsetContentType;
319
+ }
320
+ return peekStream ? { ...result, _stream: peekStream } : result;
321
+ }
322
+ }
323
+ }
324
+ // No detection possible
325
+ return null;
326
+ }
327
+ catch (error) {
328
+ // Only log errors in non-test environments
329
+ if (process.env.NODE_ENV !== 'test') {
330
+ console.error('Error in detectWithFallback:', error);
331
+ }
332
+ return null;
333
+ }
334
+ }
335
+ /**
336
+ * Check if a buffer matches a specific file type
337
+ * @param buffer - Buffer to check
338
+ * @param fileTypeName - Name of the file type to check against
339
+ * @returns True if matches, false otherwise
340
+ */
341
+ isFileType(buffer, fileTypeName) {
342
+ const fileType = this.fileTypes.find(ft => ft.name === fileTypeName);
343
+ if (!fileType)
344
+ return false;
345
+ const offset = fileType.offset || 0;
346
+ return (0, magic_bytes_1.compareBytes)(buffer, fileType.magicBytes, offset);
347
+ }
348
+ /**
349
+ * Get statistics about registered file types
350
+ * @returns Statistics object
351
+ */
352
+ getStatistics() {
353
+ const stats = {
354
+ totalTypes: this.fileTypes.length,
355
+ byCategory: {},
356
+ byMimePrefix: {}
357
+ };
358
+ for (const fileType of this.fileTypes) {
359
+ // Count by category
360
+ const category = fileType.category || 'other';
361
+ stats.byCategory[category] = (stats.byCategory[category] || 0) + 1;
362
+ // Count by MIME prefix
363
+ const mimePrefix = fileType.mimeType.split('/')[0];
364
+ stats.byMimePrefix[mimePrefix] = (stats.byMimePrefix[mimePrefix] || 0) + 1;
365
+ }
366
+ return stats;
367
+ }
368
+ }
369
+ exports.FileTypeDetector = FileTypeDetector;
370
+ // Export a default instance for convenience
371
+ exports.defaultDetector = new FileTypeDetector();
372
+ // Convenience functions using the default detector
373
+ async function detectFromStream(stream) {
374
+ return exports.defaultDetector.detectFromStream(stream);
375
+ }
376
+ async function detectFromBuffer(buffer) {
377
+ return exports.defaultDetector.detectFromBuffer(buffer);
378
+ }
379
+ function detectFromExtension(extension) {
380
+ return exports.defaultDetector.detectFromExtension(extension);
381
+ }
@@ -0,0 +1,28 @@
1
+ export interface FileTypeDefinition {
2
+ name: string;
3
+ magicBytes: string[];
4
+ mimeType: string;
5
+ extensions: string[];
6
+ offset?: number;
7
+ description?: string;
8
+ category?: string;
9
+ charset?: string;
10
+ contentType?: string;
11
+ }
12
+ export declare const FILE_TYPES: FileTypeDefinition[];
13
+ export type ContentTypeMapping = [string[], string] | [string[], string, string];
14
+ export declare const CONTENT_TYPE_MAPPINGS: ContentTypeMapping[];
15
+ export declare function getFileTypeByMagicBytes(magicBytes: string, offset?: number): FileTypeDefinition | null;
16
+ export declare function getFileTypeByExtension(extension: string): FileTypeDefinition[];
17
+ export declare function getFileTypesByCategory(category: string): FileTypeDefinition[];
18
+ export declare function getContentTypeByExtension(extension: string): string | null;
19
+ export declare function detectCharset(buffer: Buffer): string;
20
+ export declare function getContentTypeForExtension(extension: string, charset?: string): string | null;
21
+ export interface DetectionResult {
22
+ name: string;
23
+ mimeType: string;
24
+ extensions: string[];
25
+ charset: string;
26
+ contentType: string;
27
+ confidence: number;
28
+ }