codesummary 1.1.1 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/scanner.js CHANGED
@@ -1,468 +1,387 @@
1
- import fs from 'fs-extra';
2
- import path from 'path';
3
- import chalk from 'chalk';
4
- import ErrorHandler from './errorHandler.js';
5
-
6
- /**
7
- * File Scanner for CodeSummary
8
- * Handles recursive directory traversal and file filtering
9
- */
10
- export class Scanner {
11
- constructor(config) {
12
- this.config = config;
13
- this.allowedExtensions = new Set(config.allowedExtensions.map(ext => ext.toLowerCase()));
14
- this.excludeDirs = new Set(config.excludeDirs);
15
- this.excludeFiles = config.excludeFiles || [];
16
- }
17
-
18
- /**
19
- * Scan a directory recursively and return files grouped by extension
20
- * @param {string} rootPath - Root directory to scan
21
- * @returns {Promise<object>} Object with extensions as keys and file arrays as values
22
- */
23
- async scanDirectory(rootPath) {
24
- const scanErrors = [];
25
- const scanWarnings = [];
26
-
27
- try {
28
- // For scanner paths, we only need basic validation (no aggressive sanitization)
29
- if (!rootPath || typeof rootPath !== 'string') {
30
- throw new Error('Invalid root path: must be a non-empty string');
31
- }
32
-
33
- // Just resolve the path and validate it exists
34
- const resolvedRoot = path.resolve(rootPath);
35
- const stats = await fs.stat(resolvedRoot);
36
-
37
- if (!stats.isDirectory()) {
38
- throw new Error(`Path is not a directory: ${resolvedRoot}`);
39
- }
40
-
41
- console.log(chalk.gray(`Scanning directory: ${resolvedRoot}`));
42
-
43
- const filesByExtension = {};
44
- const scannedFiles = new Set(); // Prevent duplicates
45
- const scanContext = {
46
- errors: scanErrors,
47
- warnings: scanWarnings,
48
- skippedDirectories: 0,
49
- skippedFiles: 0,
50
- processedFiles: 0
51
- };
52
-
53
- await this.walkDirectory(resolvedRoot, resolvedRoot, filesByExtension, scannedFiles, scanContext);
54
-
55
- // Sort files within each extension group
56
- Object.keys(filesByExtension).forEach(ext => {
57
- filesByExtension[ext].sort((a, b) => a.relativePath.localeCompare(b.relativePath));
58
- });
59
-
60
- // Report scan summary with warnings/errors
61
- this.reportScanIssues(scanContext);
62
-
63
- return filesByExtension;
64
- } catch (error) {
65
- if (error.code === 'ENOENT') {
66
- throw new Error(`Directory does not exist: ${rootPath}`);
67
- } else if (error.code === 'EACCES') {
68
- throw new Error(`Permission denied accessing directory: ${rootPath}`);
69
- }
70
- throw error;
71
- }
72
- }
73
-
74
- /**
75
- * Recursively walk through directory structure
76
- * @param {string} currentPath - Current directory being processed
77
- * @param {string} rootPath - Original root path for relative path calculation
78
- * @param {object} filesByExtension - Accumulator object for results
79
- * @param {Set} scannedFiles - Set to track processed files and avoid duplicates
80
- * @param {object} scanContext - Context object to track scan statistics
81
- */
82
- async walkDirectory(currentPath, rootPath, filesByExtension, scannedFiles, scanContext) {
83
- try {
84
- const entries = await fs.readdir(currentPath, { withFileTypes: true });
85
-
86
- for (const entry of entries) {
87
- const fullPath = path.join(currentPath, entry.name);
88
- const relativePath = path.relative(rootPath, fullPath);
89
-
90
- if (entry.isDirectory()) {
91
- // Skip excluded directories and hidden directories (unless explicitly allowed)
92
- if (this.shouldSkipDirectory(entry.name, relativePath)) {
93
- scanContext.skippedDirectories++;
94
- continue;
95
- }
96
-
97
- // Recursively scan subdirectory
98
- await this.walkDirectory(fullPath, rootPath, filesByExtension, scannedFiles, scanContext);
99
- } else if (entry.isFile()) {
100
- // Process file if it matches criteria
101
- await this.processFile(fullPath, rootPath, filesByExtension, scannedFiles, scanContext);
102
- } else if (entry.isSymbolicLink()) {
103
- // Handle symbolic links with caution
104
- scanContext.warnings.push(`Skipped symbolic link: ${relativePath}`);
105
- }
106
- // Skip other special files (devices, sockets, etc.)
107
- }
108
- } catch (error) {
109
- // Track errors in context for better reporting
110
- const relativePath = path.relative(rootPath, currentPath);
111
-
112
- if (error.code === 'EACCES' || error.code === 'EPERM') {
113
- scanContext.errors.push(`Permission denied: ${relativePath}`);
114
- } else if (error.code === 'ENOENT') {
115
- scanContext.warnings.push(`Directory no longer exists: ${relativePath}`);
116
- } else if (error.code === 'ENOTDIR') {
117
- scanContext.warnings.push(`Path is not a directory: ${relativePath}`);
118
- } else {
119
- scanContext.errors.push(`Cannot read directory ${relativePath}: ${error.message}`);
120
- }
121
- }
122
- }
123
-
124
- /**
125
- * Process a single file and add it to results if it matches criteria
126
- * @param {string} fullPath - Absolute path to the file
127
- * @param {string} rootPath - Root path for relative calculation
128
- * @param {object} filesByExtension - Results accumulator
129
- * @param {Set} scannedFiles - Set of already processed files
130
- * @param {object} scanContext - Context object to track scan statistics
131
- */
132
- async processFile(fullPath, rootPath, filesByExtension, scannedFiles, scanContext) {
133
- try {
134
- const relativePath = path.relative(rootPath, fullPath);
135
-
136
- // Avoid processing the same file twice (in case of symlinks)
137
- if (scannedFiles.has(fullPath)) {
138
- return;
139
- }
140
- scannedFiles.add(fullPath);
141
-
142
- const extension = path.extname(relativePath).toLowerCase();
143
-
144
- // Skip files without extensions or not in allowed list
145
- if (!extension || !this.allowedExtensions.has(extension)) {
146
- scanContext.skippedFiles++;
147
- return;
148
- }
149
-
150
- // Skip hidden files (starting with .) unless explicitly needed
151
- const fileName = path.basename(relativePath);
152
- if (fileName.startsWith('.') && !this.isAllowedHiddenFile(fileName)) {
153
- scanContext.skippedFiles++;
154
- return;
155
- }
156
-
157
- // Check if file should be excluded by pattern (e.g., *-lock.json)
158
- if (this.shouldExcludeFile(fileName)) {
159
- scanContext.skippedFiles++;
160
- return;
161
- }
162
-
163
- // Verify file is readable
164
- const stats = await fs.stat(fullPath);
165
- if (!stats.isFile()) {
166
- scanContext.warnings.push(`Skipped non-regular file: ${relativePath}`);
167
- return;
168
- }
169
-
170
- // Check file size limits
171
- const MAX_INDIVIDUAL_FILE_SIZE = 100 * 1024 * 1024; // 100MB per file
172
- if (stats.size > MAX_INDIVIDUAL_FILE_SIZE) {
173
- scanContext.warnings.push(`Skipped large file (${Math.round(stats.size / 1024 / 1024)}MB): ${relativePath}`);
174
- scanContext.skippedFiles++;
175
- return;
176
- }
177
-
178
- // Add to results
179
- if (!filesByExtension[extension]) {
180
- filesByExtension[extension] = [];
181
- }
182
-
183
- filesByExtension[extension].push({
184
- relativePath: relativePath.replace(/\\/g, '/'), // Normalize path separators
185
- absolutePath: fullPath,
186
- size: stats.size,
187
- modified: stats.mtime
188
- });
189
-
190
- scanContext.processedFiles++;
191
-
192
- } catch (error) {
193
- // Handle file processing errors with appropriate context
194
- const relativePath = path.relative(rootPath, fullPath);
195
-
196
- if (error.code === 'EACCES' || error.code === 'EPERM') {
197
- scanContext.errors.push(`Permission denied: ${relativePath}`);
198
- } else if (error.code === 'ENOENT') {
199
- // File might have been deleted during scan
200
- scanContext.warnings.push(`File no longer exists: ${relativePath}`);
201
- } else if (error.code === 'EISDIR') {
202
- scanContext.warnings.push(`Path is a directory, not a file: ${relativePath}`);
203
- } else {
204
- scanContext.errors.push(`Cannot process file ${relativePath}: ${error.message}`);
205
- }
206
-
207
- scanContext.skippedFiles++;
208
- }
209
- }
210
-
211
- /**
212
- * Determine if a directory should be skipped
213
- * @param {string} dirName - Directory name
214
- * @param {string} relativePath - Relative path from root
215
- * @returns {boolean} True if directory should be skipped
216
- */
217
- shouldSkipDirectory(dirName, relativePath) {
218
- // Skip directories in exclude list
219
- if (this.excludeDirs.has(dirName)) {
220
- return true;
221
- }
222
-
223
- // Skip hidden directories (starting with .) unless explicitly allowed
224
- if (dirName.startsWith('.') && !this.isAllowedHiddenDirectory(dirName)) {
225
- return true;
226
- }
227
-
228
- // Skip common build/cache directories that might not be in exclude list
229
- const commonSkipDirs = new Set([
230
- 'tmp', 'temp', 'cache', '.cache', 'logs', '.logs',
231
- 'bower_components', 'vendor', '.vendor'
232
- ]);
233
-
234
- if (commonSkipDirs.has(dirName.toLowerCase())) {
235
- return true;
236
- }
237
-
238
- return false;
239
- }
240
-
241
- /**
242
- * Check if a file should be excluded based on patterns
243
- * @param {string} fileName - File name to check
244
- * @returns {boolean} True if file should be excluded
245
- */
246
- shouldExcludeFile(fileName) {
247
- for (const pattern of this.excludeFiles) {
248
- if (this.matchesPattern(fileName, pattern)) {
249
- return true;
250
- }
251
- }
252
- return false;
253
- }
254
-
255
- /**
256
- * Simple glob pattern matching
257
- * @param {string} fileName - File name to test
258
- * @param {string} pattern - Pattern to match (supports * wildcards)
259
- * @returns {boolean} True if pattern matches
260
- */
261
- matchesPattern(fileName, pattern) {
262
- // Exact match
263
- if (pattern === fileName) {
264
- return true;
265
- }
266
-
267
- // Convert glob pattern to regex
268
- const regexPattern = pattern
269
- .replace(/\./g, '\\.') // Escape dots
270
- .replace(/\*/g, '.*'); // Convert * to .*
271
-
272
- const regex = new RegExp(`^${regexPattern}$`, 'i');
273
- return regex.test(fileName);
274
- }
275
-
276
- /**
277
- * Check if a hidden file should be included
278
- * @param {string} fileName - File name
279
- * @returns {boolean} True if file should be included
280
- */
281
- isAllowedHiddenFile(fileName) {
282
- const allowedHiddenFiles = new Set([
283
- '.gitignore', '.gitattributes', '.editorconfig',
284
- '.eslintrc.js', '.eslintrc.json', '.prettierrc',
285
- '.env.example', '.htaccess'
286
- ]);
287
-
288
- return allowedHiddenFiles.has(fileName);
289
- }
290
-
291
- /**
292
- * Check if a hidden directory should be included
293
- * @param {string} dirName - Directory name
294
- * @returns {boolean} True if directory should be included
295
- */
296
- isAllowedHiddenDirectory(dirName) {
297
- const allowedHiddenDirs = new Set([
298
- '.github', '.gitlab', '.circleci'
299
- ]);
300
-
301
- return allowedHiddenDirs.has(dirName);
302
- }
303
-
304
- /**
305
- * Get file extension descriptions for user display
306
- * @param {object} filesByExtension - Files grouped by extension
307
- * @returns {Array} Array of extension info objects
308
- */
309
- getExtensionInfo(filesByExtension) {
310
- const extensionDescriptions = {
311
- '.js': 'JavaScript',
312
- '.ts': 'TypeScript',
313
- '.jsx': 'React JSX',
314
- '.tsx': 'TypeScript JSX',
315
- '.json': 'JSON',
316
- '.xml': 'XML',
317
- '.html': 'HTML',
318
- '.css': 'CSS',
319
- '.scss': 'SCSS',
320
- '.sass': 'Sass',
321
- '.md': 'Markdown',
322
- '.txt': 'Text',
323
- '.py': 'Python',
324
- '.java': 'Java',
325
- '.cs': 'C#',
326
- '.cpp': 'C++',
327
- '.c': 'C',
328
- '.h': 'Header',
329
- '.yaml': 'YAML',
330
- '.yml': 'YAML',
331
- '.sh': 'Shell Script',
332
- '.bat': 'Batch File',
333
- '.ps1': 'PowerShell',
334
- '.php': 'PHP',
335
- '.rb': 'Ruby',
336
- '.go': 'Go',
337
- '.rs': 'Rust',
338
- '.swift': 'Swift',
339
- '.kt': 'Kotlin',
340
- '.scala': 'Scala',
341
- '.vue': 'Vue.js',
342
- '.svelte': 'Svelte',
343
- '.dockerfile': 'Dockerfile',
344
- '.sql': 'SQL',
345
- '.graphql': 'GraphQL'
346
- };
347
-
348
- return Object.keys(filesByExtension)
349
- .sort()
350
- .map(ext => ({
351
- extension: ext,
352
- description: extensionDescriptions[ext] || 'Unknown',
353
- count: filesByExtension[ext].length,
354
- files: filesByExtension[ext]
355
- }));
356
- }
357
-
358
- /**
359
- * Calculate total statistics for scanned files
360
- * @param {object} filesByExtension - Files grouped by extension
361
- * @returns {object} Statistics object
362
- */
363
- calculateStatistics(filesByExtension) {
364
- let totalFiles = 0;
365
- let totalSize = 0;
366
- const extensionCount = Object.keys(filesByExtension).length;
367
-
368
- Object.values(filesByExtension).forEach(files => {
369
- totalFiles += files.length;
370
- totalSize += files.reduce((sum, file) => sum + file.size, 0);
371
- });
372
-
373
- return {
374
- totalFiles,
375
- totalSize,
376
- extensionCount,
377
- averageFileSize: totalFiles > 0 ? Math.round(totalSize / totalFiles) : 0,
378
- totalSizeFormatted: this.formatFileSize(totalSize)
379
- };
380
- }
381
-
382
- /**
383
- * Format file size in human readable format
384
- * @param {number} bytes - Size in bytes
385
- * @returns {string} Formatted size string
386
- */
387
- formatFileSize(bytes) {
388
- const units = ['B', 'KB', 'MB', 'GB'];
389
- let size = bytes;
390
- let unitIndex = 0;
391
-
392
- while (size >= 1024 && unitIndex < units.length - 1) {
393
- size /= 1024;
394
- unitIndex++;
395
- }
396
-
397
- return `${size.toFixed(1)} ${units[unitIndex]}`;
398
- }
399
-
400
- /**
401
- * Report scan issues and statistics
402
- * @param {object} scanContext - Context object with scan statistics
403
- */
404
- reportScanIssues(scanContext) {
405
- const { errors, warnings, skippedDirectories, skippedFiles, processedFiles } = scanContext;
406
-
407
- // Report critical errors
408
- if (errors.length > 0) {
409
- console.log(chalk.red(`\n⚠️ ${errors.length} scan error(s):`));
410
- errors.slice(0, 5).forEach(error => {
411
- console.log(chalk.red(` • ${error}`));
412
- });
413
- if (errors.length > 5) {
414
- console.log(chalk.gray(` ... and ${errors.length - 5} more errors`));
415
- }
416
- }
417
-
418
- // Report warnings (less critical)
419
- if (warnings.length > 0 && process.env.NODE_ENV === 'development') {
420
- console.log(chalk.yellow(`\n⚠️ ${warnings.length} scan warning(s):`));
421
- warnings.slice(0, 3).forEach(warning => {
422
- console.log(chalk.yellow(` • ${warning}`));
423
- });
424
- if (warnings.length > 3) {
425
- console.log(chalk.gray(` ... and ${warnings.length - 3} more warnings`));
426
- }
427
- }
428
-
429
- // Report summary statistics
430
- const totalIssues = errors.length + warnings.length;
431
- if (skippedFiles > 0 || skippedDirectories > 0 || totalIssues > 0) {
432
- console.log(chalk.gray(`\n📊 Scan Statistics:`));
433
- console.log(chalk.gray(` Processed: ${processedFiles} files`));
434
- if (skippedFiles > 0) {
435
- console.log(chalk.gray(` Skipped: ${skippedFiles} files`));
436
- }
437
- if (skippedDirectories > 0) {
438
- console.log(chalk.gray(` Skipped: ${skippedDirectories} directories`));
439
- }
440
- if (totalIssues > 0) {
441
- console.log(chalk.gray(` Issues: ${errors.length} errors, ${warnings.length} warnings`));
442
- }
443
- }
444
-
445
- // Warn if scan completeness is compromised
446
- if (errors.length > 0) {
447
- console.log(chalk.yellow(`\n⚠️ WARNING: Scan may be incomplete due to ${errors.length} access errors.`));
448
- console.log(chalk.gray(' Some files or directories could not be accessed.'));
449
- }
450
- }
451
-
452
- /**
453
- * Display scan results summary
454
- * @param {object} filesByExtension - Files grouped by extension
455
- */
456
- displayScanSummary(filesByExtension) {
457
- const stats = this.calculateStatistics(filesByExtension);
458
- const extensions = Object.keys(filesByExtension).sort();
459
-
460
- console.log(chalk.cyan('\n📊 Scan Summary:'));
461
- console.log(chalk.gray(` Extensions found: ${extensions.join(', ')}`));
462
- console.log(chalk.gray(` Total files: ${stats.totalFiles}`));
463
- console.log(chalk.gray(` Total size: ${stats.totalSizeFormatted}`));
464
- console.log();
465
- }
466
- }
467
-
1
+ import fs from 'fs-extra';
2
+ import path from 'path';
3
+ import chalk from 'chalk';
4
+ import ErrorHandler from './errorHandler.js';
5
+ import { formatFileSize, getExtensionDescription, matchesGlobPattern } from './utils.js';
6
+
7
+ /**
8
+ * File Scanner for CodeSummary
9
+ * Handles recursive directory traversal and file filtering
10
+ */
11
+ export class Scanner {
12
+ constructor(config) {
13
+ this.config = config;
14
+ this.allowedExtensions = new Set(config.allowedExtensions.map(ext => ext.toLowerCase()));
15
+ this.excludeDirs = new Set(config.excludeDirs);
16
+ this.excludeFiles = config.excludeFiles || [];
17
+ }
18
+
19
+ /**
20
+ * Scan a directory recursively and return files grouped by extension
21
+ * @param {string} rootPath - Root directory to scan
22
+ * @returns {Promise<object>} Object with extensions as keys and file arrays as values
23
+ */
24
+ async scanDirectory(rootPath) {
25
+ const scanErrors = [];
26
+ const scanWarnings = [];
27
+
28
+ try {
29
+ // For scanner paths, we only need basic validation (no aggressive sanitization)
30
+ if (!rootPath || typeof rootPath !== 'string') {
31
+ throw new Error('Invalid root path: must be a non-empty string');
32
+ }
33
+
34
+ // Just resolve the path and validate it exists
35
+ const resolvedRoot = path.resolve(rootPath);
36
+ const stats = await fs.stat(resolvedRoot);
37
+
38
+ if (!stats.isDirectory()) {
39
+ throw new Error(`Path is not a directory: ${resolvedRoot}`);
40
+ }
41
+
42
+ console.log(chalk.gray(`Scanning directory: ${resolvedRoot}`));
43
+
44
+ const filesByExtension = {};
45
+ const scannedFiles = new Set(); // Prevent duplicates
46
+ const scanContext = {
47
+ errors: scanErrors,
48
+ warnings: scanWarnings,
49
+ skippedDirectories: 0,
50
+ skippedFiles: 0,
51
+ processedFiles: 0
52
+ };
53
+
54
+ await this.walkDirectory(resolvedRoot, resolvedRoot, filesByExtension, scannedFiles, scanContext);
55
+
56
+ // Sort files within each extension group
57
+ Object.keys(filesByExtension).forEach(ext => {
58
+ filesByExtension[ext].sort((a, b) => a.relativePath.localeCompare(b.relativePath));
59
+ });
60
+
61
+ // Report scan summary with warnings/errors
62
+ this.reportScanIssues(scanContext);
63
+
64
+ return filesByExtension;
65
+ } catch (error) {
66
+ if (error.code === 'ENOENT') {
67
+ throw new Error(`Directory does not exist: ${rootPath}`);
68
+ } else if (error.code === 'EACCES') {
69
+ throw new Error(`Permission denied accessing directory: ${rootPath}`);
70
+ }
71
+ throw error;
72
+ }
73
+ }
74
+
75
+ /**
76
+ * Recursively walk through directory structure
77
+ * @param {string} currentPath - Current directory being processed
78
+ * @param {string} rootPath - Original root path for relative path calculation
79
+ * @param {object} filesByExtension - Accumulator object for results
80
+ * @param {Set} scannedFiles - Set to track processed files and avoid duplicates
81
+ * @param {object} scanContext - Context object to track scan statistics
82
+ */
83
+ async walkDirectory(currentPath, rootPath, filesByExtension, scannedFiles, scanContext) {
84
+ try {
85
+ const entries = await fs.readdir(currentPath, { withFileTypes: true });
86
+
87
+ for (const entry of entries) {
88
+ const fullPath = path.join(currentPath, entry.name);
89
+ const relativePath = path.relative(rootPath, fullPath);
90
+
91
+ if (entry.isDirectory()) {
92
+ // Skip excluded directories and hidden directories (unless explicitly allowed)
93
+ if (this.shouldSkipDirectory(entry.name, relativePath)) {
94
+ scanContext.skippedDirectories++;
95
+ continue;
96
+ }
97
+
98
+ // Recursively scan subdirectory
99
+ await this.walkDirectory(fullPath, rootPath, filesByExtension, scannedFiles, scanContext);
100
+ } else if (entry.isFile()) {
101
+ // Process file if it matches criteria
102
+ await this.processFile(fullPath, rootPath, filesByExtension, scannedFiles, scanContext);
103
+ } else if (entry.isSymbolicLink()) {
104
+ // Handle symbolic links with caution
105
+ scanContext.warnings.push(`Skipped symbolic link: ${relativePath}`);
106
+ }
107
+ // Skip other special files (devices, sockets, etc.)
108
+ }
109
+ } catch (error) {
110
+ // Track errors in context for better reporting
111
+ const relativePath = path.relative(rootPath, currentPath);
112
+
113
+ if (error.code === 'EACCES' || error.code === 'EPERM') {
114
+ scanContext.errors.push(`Permission denied: ${relativePath}`);
115
+ } else if (error.code === 'ENOENT') {
116
+ scanContext.warnings.push(`Directory no longer exists: ${relativePath}`);
117
+ } else if (error.code === 'ENOTDIR') {
118
+ scanContext.warnings.push(`Path is not a directory: ${relativePath}`);
119
+ } else {
120
+ scanContext.errors.push(`Cannot read directory ${relativePath}: ${error.message}`);
121
+ }
122
+ }
123
+ }
124
+
125
+ /**
126
+ * Process a single file and add it to results if it matches criteria
127
+ * @param {string} fullPath - Absolute path to the file
128
+ * @param {string} rootPath - Root path for relative calculation
129
+ * @param {object} filesByExtension - Results accumulator
130
+ * @param {Set} scannedFiles - Set of already processed files
131
+ * @param {object} scanContext - Context object to track scan statistics
132
+ */
133
+ async processFile(fullPath, rootPath, filesByExtension, scannedFiles, scanContext) {
134
+ try {
135
+ const relativePath = path.relative(rootPath, fullPath);
136
+
137
+ // Avoid processing the same file twice (in case of symlinks)
138
+ if (scannedFiles.has(fullPath)) {
139
+ return;
140
+ }
141
+ scannedFiles.add(fullPath);
142
+
143
+ const extension = path.extname(relativePath).toLowerCase();
144
+
145
+ // Skip files without extensions or not in allowed list
146
+ if (!extension || !this.allowedExtensions.has(extension)) {
147
+ scanContext.skippedFiles++;
148
+ return;
149
+ }
150
+
151
+ // Skip hidden files (starting with .) unless explicitly needed
152
+ const fileName = path.basename(relativePath);
153
+ if (fileName.startsWith('.') && !this.isAllowedHiddenFile(fileName)) {
154
+ scanContext.skippedFiles++;
155
+ return;
156
+ }
157
+
158
+ // Check if file should be excluded by pattern (e.g., *-lock.json)
159
+ if (this.shouldExcludeFile(fileName)) {
160
+ scanContext.skippedFiles++;
161
+ return;
162
+ }
163
+
164
+ // Verify file is readable
165
+ const stats = await fs.stat(fullPath);
166
+ if (!stats.isFile()) {
167
+ scanContext.warnings.push(`Skipped non-regular file: ${relativePath}`);
168
+ return;
169
+ }
170
+
171
+ // Check file size limits
172
+ const MAX_INDIVIDUAL_FILE_SIZE = 100 * 1024 * 1024; // 100MB per file
173
+ if (stats.size > MAX_INDIVIDUAL_FILE_SIZE) {
174
+ scanContext.warnings.push(`Skipped large file (${Math.round(stats.size / 1024 / 1024)}MB): ${relativePath}`);
175
+ scanContext.skippedFiles++;
176
+ return;
177
+ }
178
+
179
+ // Add to results
180
+ if (!filesByExtension[extension]) {
181
+ filesByExtension[extension] = [];
182
+ }
183
+
184
+ filesByExtension[extension].push({
185
+ relativePath: relativePath.replace(/\\/g, '/'), // Normalize path separators
186
+ absolutePath: fullPath,
187
+ size: stats.size,
188
+ modified: stats.mtime
189
+ });
190
+
191
+ scanContext.processedFiles++;
192
+
193
+ } catch (error) {
194
+ // Handle file processing errors with appropriate context
195
+ const relativePath = path.relative(rootPath, fullPath);
196
+
197
+ if (error.code === 'EACCES' || error.code === 'EPERM') {
198
+ scanContext.errors.push(`Permission denied: ${relativePath}`);
199
+ } else if (error.code === 'ENOENT') {
200
+ // File might have been deleted during scan
201
+ scanContext.warnings.push(`File no longer exists: ${relativePath}`);
202
+ } else if (error.code === 'EISDIR') {
203
+ scanContext.warnings.push(`Path is a directory, not a file: ${relativePath}`);
204
+ } else {
205
+ scanContext.errors.push(`Cannot process file ${relativePath}: ${error.message}`);
206
+ }
207
+
208
+ scanContext.skippedFiles++;
209
+ }
210
+ }
211
+
212
+ /**
213
+ * Determine if a directory should be skipped
214
+ * @param {string} dirName - Directory name
215
+ * @param {string} relativePath - Relative path from root
216
+ * @returns {boolean} True if directory should be skipped
217
+ */
218
+ shouldSkipDirectory(dirName, relativePath) {
219
+ // Skip directories in exclude list
220
+ if (this.excludeDirs.has(dirName)) {
221
+ return true;
222
+ }
223
+
224
+ // Skip hidden directories (starting with .) unless explicitly allowed
225
+ if (dirName.startsWith('.') && !this.isAllowedHiddenDirectory(dirName)) {
226
+ return true;
227
+ }
228
+
229
+ // Skip common build/cache directories that might not be in exclude list
230
+ const commonSkipDirs = new Set([
231
+ 'tmp', 'temp', 'cache', '.cache', 'logs', '.logs',
232
+ 'bower_components', 'vendor', '.vendor'
233
+ ]);
234
+
235
+ if (commonSkipDirs.has(dirName.toLowerCase())) {
236
+ return true;
237
+ }
238
+
239
+ return false;
240
+ }
241
+
242
+ /**
243
+ * Check if a file should be excluded based on patterns
244
+ * @param {string} fileName - File name to check
245
+ * @returns {boolean} True if file should be excluded
246
+ */
247
+ shouldExcludeFile(fileName) {
248
+ return this.excludeFiles.some(pattern => matchesGlobPattern(fileName, pattern));
249
+ }
250
+
251
+ /**
252
+ * Check if a hidden file should be included
253
+ * @param {string} fileName - File name
254
+ * @returns {boolean} True if file should be included
255
+ */
256
+ isAllowedHiddenFile(fileName) {
257
+ const allowedHiddenFiles = new Set([
258
+ '.gitignore', '.gitattributes', '.editorconfig',
259
+ '.eslintrc.js', '.eslintrc.json', '.prettierrc',
260
+ '.env.example', '.htaccess'
261
+ ]);
262
+
263
+ return allowedHiddenFiles.has(fileName);
264
+ }
265
+
266
+ /**
267
+ * Check if a hidden directory should be included
268
+ * @param {string} dirName - Directory name
269
+ * @returns {boolean} True if directory should be included
270
+ */
271
+ isAllowedHiddenDirectory(dirName) {
272
+ const allowedHiddenDirs = new Set([
273
+ '.github', '.gitlab', '.circleci'
274
+ ]);
275
+
276
+ return allowedHiddenDirs.has(dirName);
277
+ }
278
+
279
+ /**
280
+ * Get file extension descriptions for user display
281
+ * @param {object} filesByExtension - Files grouped by extension
282
+ * @returns {Array} Array of extension info objects
283
+ */
284
+ getExtensionInfo(filesByExtension) {
285
+ return Object.keys(filesByExtension)
286
+ .sort()
287
+ .map(ext => ({
288
+ extension: ext,
289
+ description: getExtensionDescription(ext),
290
+ count: filesByExtension[ext].length,
291
+ files: filesByExtension[ext]
292
+ }));
293
+ }
294
+
295
+ /**
296
+ * Calculate total statistics for scanned files
297
+ * @param {object} filesByExtension - Files grouped by extension
298
+ * @returns {object} Statistics object
299
+ */
300
+ calculateStatistics(filesByExtension) {
301
+ let totalFiles = 0;
302
+ let totalSize = 0;
303
+ const extensionCount = Object.keys(filesByExtension).length;
304
+
305
+ Object.values(filesByExtension).forEach(files => {
306
+ totalFiles += files.length;
307
+ totalSize += files.reduce((sum, file) => sum + file.size, 0);
308
+ });
309
+
310
+ return {
311
+ totalFiles,
312
+ totalSize,
313
+ extensionCount,
314
+ averageFileSize: totalFiles > 0 ? Math.round(totalSize / totalFiles) : 0,
315
+ totalSizeFormatted: formatFileSize(totalSize)
316
+ };
317
+ }
318
+
319
+ /**
320
+ * Report scan issues and statistics
321
+ * @param {object} scanContext - Context object with scan statistics
322
+ */
323
+ reportScanIssues(scanContext) {
324
+ const { errors, warnings, skippedDirectories, skippedFiles, processedFiles } = scanContext;
325
+
326
+ // Report critical errors
327
+ if (errors.length > 0) {
328
+ console.log(chalk.red(`\n⚠️ ${errors.length} scan error(s):`));
329
+ errors.slice(0, 5).forEach(error => {
330
+ console.log(chalk.red(` • ${error}`));
331
+ });
332
+ if (errors.length > 5) {
333
+ console.log(chalk.gray(` ... and ${errors.length - 5} more errors`));
334
+ }
335
+ }
336
+
337
+ // Report warnings (less critical)
338
+ if (warnings.length > 0 && process.env.NODE_ENV === 'development') {
339
+ console.log(chalk.yellow(`\n⚠️ ${warnings.length} scan warning(s):`));
340
+ warnings.slice(0, 3).forEach(warning => {
341
+ console.log(chalk.yellow(` • ${warning}`));
342
+ });
343
+ if (warnings.length > 3) {
344
+ console.log(chalk.gray(` ... and ${warnings.length - 3} more warnings`));
345
+ }
346
+ }
347
+
348
+ // Report summary statistics
349
+ const totalIssues = errors.length + warnings.length;
350
+ if (skippedFiles > 0 || skippedDirectories > 0 || totalIssues > 0) {
351
+ console.log(chalk.gray(`\n📊 Scan Statistics:`));
352
+ console.log(chalk.gray(` Processed: ${processedFiles} files`));
353
+ if (skippedFiles > 0) {
354
+ console.log(chalk.gray(` Skipped: ${skippedFiles} files`));
355
+ }
356
+ if (skippedDirectories > 0) {
357
+ console.log(chalk.gray(` Skipped: ${skippedDirectories} directories`));
358
+ }
359
+ if (totalIssues > 0) {
360
+ console.log(chalk.gray(` Issues: ${errors.length} errors, ${warnings.length} warnings`));
361
+ }
362
+ }
363
+
364
+ // Warn if scan completeness is compromised
365
+ if (errors.length > 0) {
366
+ console.log(chalk.yellow(`\n⚠️ WARNING: Scan may be incomplete due to ${errors.length} access errors.`));
367
+ console.log(chalk.gray(' Some files or directories could not be accessed.'));
368
+ }
369
+ }
370
+
371
+ /**
372
+ * Display scan results summary
373
+ * @param {object} filesByExtension - Files grouped by extension
374
+ */
375
+ displayScanSummary(filesByExtension) {
376
+ const stats = this.calculateStatistics(filesByExtension);
377
+ const extensions = Object.keys(filesByExtension).sort();
378
+
379
+ console.log(chalk.cyan('\n📊 Scan Summary:'));
380
+ console.log(chalk.gray(` Extensions found: ${extensions.join(', ')}`));
381
+ console.log(chalk.gray(` Total files: ${stats.totalFiles}`));
382
+ console.log(chalk.gray(` Total size: ${stats.totalSizeFormatted}`));
383
+ console.log();
384
+ }
385
+ }
386
+
468
387
  export default Scanner;