mime-bytes 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,374 @@
1
+ // Main file type detector class with stream-focused API
2
+ import { peek } from './peak';
3
+ import { FILE_TYPES, getFileTypeByExtension, getFileTypesByCategory, getContentTypeByExtension, detectCharset, getContentTypeForExtension } from './file-types-registry';
4
+ import { compareBytes } from './utils/magic-bytes';
5
+ import { normalizeExtension } from './utils/extensions';
6
+ import { resolveMimeAlias } from './utils/mime-types';
7
+ export class FileTypeDetector {
8
+ fileTypes;
9
+ options;
10
+ magicBytesCache;
11
+ extensionCache;
12
+ constructor(options = {}) {
13
+ // Create a copy of FILE_TYPES to avoid modifying the global registry
14
+ this.fileTypes = [...FILE_TYPES];
15
+ this.options = {
16
+ peekBytes: options.peekBytes || 32,
17
+ checkMultipleOffsets: options.checkMultipleOffsets !== false,
18
+ maxOffset: options.maxOffset || 12
19
+ };
20
+ this.magicBytesCache = new Map();
21
+ this.extensionCache = new Map();
22
+ }
23
+ /**
24
+ * Detect file type from a stream (PRIMARY METHOD - memory efficient)
25
+ * @param stream - Readable stream to detect from
26
+ * @returns Detection result or null if not detected
27
+ */
28
+ async detectFromStream(stream) {
29
+ try {
30
+ const [buffer, peekStream] = await peek.promise(stream, this.options.peekBytes);
31
+ // Check multiple offsets for different file types
32
+ const result = await this.detectFromBuffer(buffer);
33
+ // Important: Return the peek stream so it can be used for further processing
34
+ // The caller should use peekStream instead of the original stream
35
+ if (result) {
36
+ result._stream = peekStream;
37
+ }
38
+ return result;
39
+ }
40
+ catch (error) {
41
+ // Handle stream errors gracefully
42
+ // Only log errors in non-test environments
43
+ if (process.env.NODE_ENV !== 'test') {
44
+ console.error('Error detecting file type from stream:', error);
45
+ }
46
+ return null;
47
+ }
48
+ }
49
+ /**
50
+ * Detect file type from an already-read buffer
51
+ * @param buffer - Buffer to detect from
52
+ * @returns Detection result or null if not detected
53
+ */
54
+ async detectFromBuffer(buffer) {
55
+ if (!buffer || buffer.length === 0) {
56
+ return null;
57
+ }
58
+ // Check multiple offsets if enabled
59
+ const offsets = this.options.checkMultipleOffsets
60
+ ? this.generateOffsets(buffer.length)
61
+ : [0];
62
+ for (const offset of offsets) {
63
+ const fileType = this.checkMagicBytesAtOffset(buffer, offset);
64
+ if (fileType) {
65
+ return this.enhanceDetectionResult(fileType, buffer);
66
+ }
67
+ }
68
+ // No magic bytes matched, but we can still detect charset for unknown files
69
+ const charset = detectCharset(buffer);
70
+ if (charset !== 'binary') {
71
+ // Return a generic text file result
72
+ return {
73
+ name: 'text',
74
+ mimeType: 'text/plain',
75
+ extensions: ['txt'],
76
+ charset,
77
+ contentType: 'text/plain',
78
+ confidence: 0.5 // Lower confidence since we only detected charset
79
+ };
80
+ }
81
+ return null;
82
+ }
83
+ /**
84
+ * Detect file type from extension only
85
+ * @param extension - File extension (with or without dot)
86
+ * @returns Array of possible detection results with lower confidence
87
+ */
88
+ detectFromExtension(extension) {
89
+ const cleanExt = normalizeExtension(extension);
90
+ // Check cache first
91
+ if (this.extensionCache.has(cleanExt)) {
92
+ const cachedTypes = this.extensionCache.get(cleanExt);
93
+ return cachedTypes.map(fileType => ({
94
+ name: fileType.name,
95
+ mimeType: resolveMimeAlias(fileType.mimeType),
96
+ extensions: fileType.extensions,
97
+ charset: 'unknown', // Can't determine charset from extension alone
98
+ contentType: getContentTypeByExtension(cleanExt) || fileType.mimeType,
99
+ confidence: 0.8 // Lower confidence for extension-only detection
100
+ }));
101
+ }
102
+ const fileTypes = getFileTypeByExtension(cleanExt);
103
+ // Cache the result
104
+ this.extensionCache.set(cleanExt, fileTypes);
105
+ return fileTypes.map(fileType => ({
106
+ name: fileType.name,
107
+ mimeType: resolveMimeAlias(fileType.mimeType),
108
+ extensions: fileType.extensions,
109
+ charset: 'unknown', // Can't determine charset from extension alone
110
+ contentType: getContentTypeByExtension(cleanExt) || fileType.mimeType,
111
+ confidence: 0.8 // Lower confidence for extension-only detection
112
+ }));
113
+ }
114
+ /**
115
+ * Get all file types by category
116
+ * @param category - Category name (e.g., 'image', 'video', 'archive')
117
+ * @returns Array of file type definitions
118
+ */
119
+ getByCategory(category) {
120
+ return getFileTypesByCategory(category);
121
+ }
122
+ /**
123
+ * Add a new file type dynamically
124
+ * @param fileType - File type definition to add
125
+ */
126
+ addFileType(fileType) {
127
+ this.fileTypes.push(fileType);
128
+ // Clear caches when file types change
129
+ this.clearCache();
130
+ }
131
+ /**
132
+ * Remove a file type by name
133
+ * @param name - Name of the file type to remove
134
+ */
135
+ removeFileType(name) {
136
+ const index = this.fileTypes.findIndex(ft => ft.name === name);
137
+ if (index !== -1) {
138
+ this.fileTypes.splice(index, 1);
139
+ // Clear caches when file types change
140
+ this.clearCache();
141
+ return true;
142
+ }
143
+ return false;
144
+ }
145
+ /**
146
+ * Clear all caches
147
+ */
148
+ clearCache() {
149
+ this.magicBytesCache.clear();
150
+ this.extensionCache.clear();
151
+ }
152
+ /**
153
+ * Get all registered file types
154
+ * @returns Array of all file type definitions
155
+ */
156
+ getAllFileTypes() {
157
+ return [...this.fileTypes];
158
+ }
159
+ /**
160
+ * Check magic bytes at a specific offset
161
+ * @private
162
+ */
163
+ checkMagicBytesAtOffset(buffer, offset) {
164
+ // Check each file type in order
165
+ for (const fileType of this.fileTypes) {
166
+ // Skip if this file type requires a different offset
167
+ if (fileType.offset !== undefined && fileType.offset !== offset)
168
+ continue;
169
+ // Skip if offset is not 0 and file type doesn't specify an offset
170
+ if (offset > 0 && fileType.offset === undefined)
171
+ continue;
172
+ // Check if magic bytes match
173
+ if (compareBytes(buffer, fileType.magicBytes, offset)) {
174
+ return fileType;
175
+ }
176
+ }
177
+ return null;
178
+ }
179
+ /**
180
+ * Generate offsets to check based on buffer size
181
+ * @private
182
+ */
183
+ generateOffsets(bufferLength) {
184
+ const offsets = [];
185
+ for (let i = 0; i <= this.options.maxOffset && i < bufferLength; i += 4) {
186
+ offsets.push(i);
187
+ }
188
+ return offsets;
189
+ }
190
+ /**
191
+ * Enhance detection result with additional information
192
+ * @private
193
+ */
194
+ enhanceDetectionResult(fileType, buffer) {
195
+ // Use charset from file type definition if available, otherwise detect it
196
+ const charset = fileType.charset || detectCharset(buffer);
197
+ // Determine content type based on extension and charset
198
+ let contentType = fileType.contentType || fileType.mimeType;
199
+ // Try charset-aware content type lookup first
200
+ if (fileType.extensions.length > 0) {
201
+ const primaryExt = fileType.extensions[0];
202
+ const charsetAwareContentType = getContentTypeForExtension(primaryExt, charset);
203
+ if (charsetAwareContentType) {
204
+ contentType = charsetAwareContentType;
205
+ }
206
+ else if (!fileType.contentType) {
207
+ // Fall back to regular content type lookup if no charset-specific match
208
+ const inferredContentType = getContentTypeByExtension(primaryExt);
209
+ if (inferredContentType) {
210
+ contentType = inferredContentType;
211
+ }
212
+ }
213
+ }
214
+ return {
215
+ name: fileType.name,
216
+ mimeType: resolveMimeAlias(fileType.mimeType),
217
+ extensions: fileType.extensions,
218
+ charset,
219
+ contentType: resolveMimeAlias(contentType),
220
+ confidence: 1.0 // High confidence for magic bytes detection
221
+ };
222
+ }
223
+ /**
224
+ * Detect file type with fallback to extension
225
+ * @param input - Readable stream or Buffer
226
+ * @param filename - Optional filename for extension fallback
227
+ * @returns Detection result with attached stream for reuse (if input was stream)
228
+ */
229
+ async detectWithFallback(input, filename) {
230
+ try {
231
+ let buffer;
232
+ let peekStream;
233
+ // Handle both Buffer and Readable inputs
234
+ if (Buffer.isBuffer(input)) {
235
+ buffer = input;
236
+ }
237
+ else {
238
+ const peekResult = await peek.promise(input, this.options.peekBytes);
239
+ buffer = peekResult[0];
240
+ peekStream = peekResult[1];
241
+ }
242
+ // Try magic bytes detection first
243
+ const magicResult = await this.detectFromBuffer(buffer);
244
+ if (magicResult) {
245
+ // If we have a filename, try to enhance with more specific content type
246
+ if (filename) {
247
+ const lastDot = filename.lastIndexOf('.');
248
+ if (lastDot !== -1) {
249
+ const extension = filename.substring(lastDot + 1);
250
+ // Check for generic text files that might have specific content types
251
+ if (magicResult.name === 'text' && magicResult.charset) {
252
+ const contentType = getContentTypeForExtension(extension, magicResult.charset);
253
+ if (contentType) {
254
+ // Enhance the result with charset-aware content type
255
+ const enhancedResult = {
256
+ ...magicResult,
257
+ contentType,
258
+ confidence: 0.8 // Higher confidence since we have both magic bytes and extension
259
+ };
260
+ return peekStream ? { ...enhancedResult, _stream: peekStream } : enhancedResult;
261
+ }
262
+ }
263
+ // Check for ZIP files that might be Office Open XML or other specific formats
264
+ if (magicResult.name === 'zip' || magicResult.mimeType === 'application/zip') {
265
+ const contentType = getContentTypeForExtension(extension, magicResult.charset || 'binary');
266
+ if (contentType && contentType !== 'application/zip') {
267
+ // Enhance the result with more specific content type
268
+ const enhancedResult = {
269
+ ...magicResult,
270
+ contentType,
271
+ confidence: 0.9 // High confidence for known ZIP-based formats
272
+ };
273
+ return peekStream ? { ...enhancedResult, _stream: peekStream } : enhancedResult;
274
+ }
275
+ }
276
+ }
277
+ }
278
+ // Attach the peek stream for reuse if available
279
+ return peekStream ? { ...magicResult, _stream: peekStream } : magicResult;
280
+ }
281
+ // Fallback to extension if filename provided
282
+ if (filename) {
283
+ const lastDot = filename.lastIndexOf('.');
284
+ if (lastDot !== -1) {
285
+ const extension = filename.substring(lastDot + 1);
286
+ const charset = detectCharset(buffer);
287
+ // Try charset-aware content type lookup
288
+ const contentType = getContentTypeForExtension(extension, charset);
289
+ if (contentType) {
290
+ // Create a result based on extension and detected charset
291
+ const result = {
292
+ name: extension.toLowerCase(),
293
+ mimeType: contentType,
294
+ extensions: [extension.toLowerCase()],
295
+ charset,
296
+ contentType,
297
+ confidence: 0.7 // Higher confidence when charset matches
298
+ };
299
+ return peekStream ? { ...result, _stream: peekStream } : result;
300
+ }
301
+ // Fall back to regular extension detection
302
+ const extensionResults = this.detectFromExtension(extension);
303
+ if (extensionResults.length > 0) {
304
+ const result = {
305
+ ...extensionResults[0],
306
+ charset, // Use detected charset
307
+ confidence: 0.6 // Lower confidence for fallback
308
+ };
309
+ // Update content type if charset-specific mapping exists
310
+ const charsetContentType = getContentTypeForExtension(extension, charset);
311
+ if (charsetContentType) {
312
+ result.contentType = charsetContentType;
313
+ }
314
+ return peekStream ? { ...result, _stream: peekStream } : result;
315
+ }
316
+ }
317
+ }
318
+ // No detection possible
319
+ return null;
320
+ }
321
+ catch (error) {
322
+ // Only log errors in non-test environments
323
+ if (process.env.NODE_ENV !== 'test') {
324
+ console.error('Error in detectWithFallback:', error);
325
+ }
326
+ return null;
327
+ }
328
+ }
329
+ /**
330
+ * Check if a buffer matches a specific file type
331
+ * @param buffer - Buffer to check
332
+ * @param fileTypeName - Name of the file type to check against
333
+ * @returns True if matches, false otherwise
334
+ */
335
+ isFileType(buffer, fileTypeName) {
336
+ const fileType = this.fileTypes.find(ft => ft.name === fileTypeName);
337
+ if (!fileType)
338
+ return false;
339
+ const offset = fileType.offset || 0;
340
+ return compareBytes(buffer, fileType.magicBytes, offset);
341
+ }
342
+ /**
343
+ * Get statistics about registered file types
344
+ * @returns Statistics object
345
+ */
346
+ getStatistics() {
347
+ const stats = {
348
+ totalTypes: this.fileTypes.length,
349
+ byCategory: {},
350
+ byMimePrefix: {}
351
+ };
352
+ for (const fileType of this.fileTypes) {
353
+ // Count by category
354
+ const category = fileType.category || 'other';
355
+ stats.byCategory[category] = (stats.byCategory[category] || 0) + 1;
356
+ // Count by MIME prefix
357
+ const mimePrefix = fileType.mimeType.split('/')[0];
358
+ stats.byMimePrefix[mimePrefix] = (stats.byMimePrefix[mimePrefix] || 0) + 1;
359
+ }
360
+ return stats;
361
+ }
362
+ }
363
+ // Export a default instance for convenience
364
+ export const defaultDetector = new FileTypeDetector();
365
+ // Convenience functions using the default detector
366
+ export async function detectFromStream(stream) {
367
+ return defaultDetector.detectFromStream(stream);
368
+ }
369
+ export async function detectFromBuffer(buffer) {
370
+ return defaultDetector.detectFromBuffer(buffer);
371
+ }
372
+ export function detectFromExtension(extension) {
373
+ return defaultDetector.detectFromExtension(extension);
374
+ }