@tb.p/dd 1.1.3 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,7 +48,11 @@ async function performFileOperations(options, dbManager) {
48
48
  }
49
49
 
50
50
  // Convert map values back to array, maintaining priority order
51
- allFiles.push(...Array.from(fileMap.values()).sort((a, b) => a.priority - b.priority));
51
+ // Use a loop instead of apply/spread to avoid stack overflow with very large arrays
52
+ const sortedFiles = Array.from(fileMap.values()).sort((a, b) => a.priority - b.priority);
53
+ for (const file of sortedFiles) {
54
+ allFiles.push(file);
55
+ }
52
56
 
53
57
  // Store files in database
54
58
  let successCount = 0;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tb.p/dd",
3
- "version": "1.1.3",
3
+ "version": "1.2.0",
4
4
  "description": "A comprehensive command-line tool for finding and removing duplicate files using content-based hashing",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -18,6 +18,15 @@ class CandidateDetection {
18
18
  onProgress: null,
19
19
  ...options
20
20
  };
21
+
22
+ // Enforce minimum batch size to prevent stack overflow
23
+ // Batch sizes below 10 can cause excessive nested promises
24
+ if (this.options.batchSize < 10) {
25
+ if (this.options.verbose) {
26
+ console.log(`⚠️ Batch size ${this.options.batchSize} is too small, using minimum of 10 to prevent stack overflow`);
27
+ }
28
+ this.options.batchSize = 10;
29
+ }
21
30
  this.stats = {
22
31
  totalFiles: 0,
23
32
  uniqueFiles: 0,
@@ -514,9 +523,13 @@ export async function runCandidateDetection(options) {
514
523
  maxConcurrency: parseInt(options.maxConcurrency) || 5,
515
524
  verbose: options.verbose || false,
516
525
  onProgress: options.verbose ? (progress) => {
517
- if (progress.phase === 'hashing') {
518
- const percentage = (progress.percentage || 0).toFixed(1);
519
- console.log(`📊 Hashing progress: ${progress.processed}/${progress.total} files (${percentage}%) - ${progress.hashedFiles} hashed, ${progress.errors} errors`);
526
+ if (progress && progress.phase === 'hashing') {
527
+ const processed = progress.processed || 0;
528
+ const total = progress.total || 0;
529
+ const hashedFiles = progress.hashedFiles || 0;
530
+ const errors = progress.errors || 0;
531
+ const percentage = total > 0 ? ((processed / total) * 100).toFixed(1) : '0.0';
532
+ console.log(`📊 Hashing progress: ${processed}/${total} files (${percentage}%) - ${hashedFiles} hashed, ${errors} errors`);
520
533
  }
521
534
  } : null
522
535
  });
@@ -19,7 +19,13 @@ async function scanDirectory(dirPath, options = {}) {
19
19
 
20
20
  try {
21
21
  const filePaths = await new Promise((resolve, reject) => {
22
+ // Add timeout to prevent hanging on very large directories
23
+ const timeout = setTimeout(() => {
24
+ reject(new Error('Directory scan timeout - directory may be too large or contain circular references'));
25
+ }, 300000); // 5 minute timeout
26
+
22
27
  recursive(dirPath, (err, files) => {
28
+ clearTimeout(timeout);
23
29
  if (err) reject(err);
24
30
  else resolve(files);
25
31
  });
@@ -36,18 +42,35 @@ async function scanDirectory(dirPath, options = {}) {
36
42
  }
37
43
 
38
44
  async function getBatchFileInfo(filePaths, options = {}) {
39
- // OPTIMIZATION: Batch all stat() operations for parallel I/O
40
- const statPromises = filePaths.map(async (filePath) => {
41
- try {
42
- const normalizedPath = normalizePath(filePath);
43
- const stats = await fs.stat(normalizedPath);
44
- return { filePath: normalizedPath, stats, error: null };
45
- } catch (error) {
46
- return { filePath, stats: null, error };
47
- }
48
- });
45
+ // OPTIMIZATION: Batch stat() operations with controlled concurrency to prevent stack overflow
46
+ const BATCH_SIZE = 100; // Process files in chunks to prevent excessive Promise.all
47
+
48
+ // Safety check: prevent processing extremely large file lists that could cause stack overflow
49
+ if (filePaths.length > 100000) {
50
+ console.warn(`Warning: Very large file list detected (${filePaths.length} files). This may cause performance issues.`);
51
+ console.warn('Consider processing smaller directories or using file filters to reduce the scope.');
52
+ }
53
+
54
+ const statResults = [];
55
+
56
+ for (let i = 0; i < filePaths.length; i += BATCH_SIZE) {
57
+ const batch = filePaths.slice(i, i + BATCH_SIZE);
58
+ const statPromises = batch.map(async (filePath) => {
59
+ try {
60
+ const normalizedPath = normalizePath(filePath);
61
+ const stats = await fs.stat(normalizedPath);
62
+ return { filePath: normalizedPath, stats, error: null };
63
+ } catch (error) {
64
+ return { filePath, stats: null, error };
65
+ }
66
+ });
49
67
 
50
- const statResults = await Promise.all(statPromises);
68
+ const batchResults = await Promise.all(statPromises);
69
+ // Use a loop instead of apply/spread to avoid stack overflow with very large arrays
70
+ for (const result of batchResults) {
71
+ statResults.push(result);
72
+ }
73
+ }
51
74
  const files = [];
52
75
 
53
76
  for (const result of statResults) {
@@ -113,7 +136,10 @@ async function scanDirectories(options = {}) {
113
136
  for (const dirPath of targets) {
114
137
  try {
115
138
  const files = await scanDirectory(dirPath, options);
116
- allFiles.push(...files);
139
+ // Use a loop instead of apply/spread to avoid stack overflow with very large arrays
140
+ for (const file of files) {
141
+ allFiles.push(file);
142
+ }
117
143
  } catch (error) {
118
144
  console.warn(`Warning: Failed to scan directory ${dirPath}: ${error.message}`);
119
145
  }
@@ -94,6 +94,30 @@ function validateOptions(options) {
94
94
  process.exit(1);
95
95
  }
96
96
  }
97
+
98
+ // Validate batch size to prevent stack overflow
99
+ if (options.batchSize) {
100
+ const batchSize = parseInt(options.batchSize);
101
+ if (isNaN(batchSize) || batchSize < 1) {
102
+ console.error('Error: Batch size must be a positive number');
103
+ process.exit(1);
104
+ } else if (batchSize < 10) {
105
+ console.warn(`Warning: Batch size ${batchSize} is very small and may cause performance issues or stack overflow. Recommended minimum: 10`);
106
+ } else if (batchSize > 1000) {
107
+ console.warn(`Warning: Batch size ${batchSize} is very large and may cause memory issues. Recommended maximum: 1000`);
108
+ }
109
+ }
110
+
111
+ // Validate max concurrency
112
+ if (options.maxConcurrency) {
113
+ const maxConcurrency = parseInt(options.maxConcurrency);
114
+ if (isNaN(maxConcurrency) || maxConcurrency < 1) {
115
+ console.error('Error: Max concurrency must be a positive number');
116
+ process.exit(1);
117
+ } else if (maxConcurrency > 50) {
118
+ console.warn(`Warning: Max concurrency ${maxConcurrency} is very high and may cause system instability. Recommended maximum: 50`);
119
+ }
120
+ }
97
121
  }
98
122
 
99
123
  export {