@tb.p/dd 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/controllers/getExtensionsController.js +27 -0
- package/controllers/newController.js +72 -0
- package/controllers/resumeController.js +233 -0
- package/database/README.md +262 -0
- package/database/dbConnection.js +314 -0
- package/database/dbOperations.js +332 -0
- package/database/dbUtils.js +125 -0
- package/database/dbValidator.js +325 -0
- package/database/index.js +102 -0
- package/index.js +75 -0
- package/package.json +32 -0
- package/processors/optionETL.js +82 -0
- package/utils/README.md +261 -0
- package/utils/candidateDetection.js +541 -0
- package/utils/duplicateMover.js +140 -0
- package/utils/duplicateReporter.js +91 -0
- package/utils/fileHasher.js +195 -0
- package/utils/fileMover.js +180 -0
- package/utils/fileScanner.js +128 -0
- package/utils/fileSystemUtils.js +192 -0
- package/utils/index.js +5 -0
- package/validators/optionValidator.js +103 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
function parsePipeSeparated(pipeString) {
|
|
3
|
+
if (!pipeString) return [];
|
|
4
|
+
return pipeString.split('|').map(item => item.trim()).filter(item => item.length > 0);
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
function normalizePaths(paths) {
|
|
8
|
+
return paths.map(p => {
|
|
9
|
+
// Preserve original relative paths, only resolve if they don't start with ./
|
|
10
|
+
if (p.startsWith('./') || p.startsWith('../')) {
|
|
11
|
+
return p;
|
|
12
|
+
}
|
|
13
|
+
return path.resolve(p);
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function normalizeExtensions(extensions) {
|
|
18
|
+
return extensions.map(ext => {
|
|
19
|
+
return ext.replace(/^\.+/, '').toLowerCase();
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function determineMode(options) {
|
|
24
|
+
if (options.getExtensions) return 'get-extensions';
|
|
25
|
+
if (options.detectCandidates) return 'detect-candidates';
|
|
26
|
+
if (options.candidateStats) return 'candidate-stats';
|
|
27
|
+
if (options.resume) return 'resume';
|
|
28
|
+
if (options.save) return 'save';
|
|
29
|
+
return 'unknown';
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function processOptions(rawOptions) {
|
|
33
|
+
const targets = parsePipeSeparated(rawOptions.targets);
|
|
34
|
+
const normalizedTargets = normalizePaths(targets);
|
|
35
|
+
|
|
36
|
+
// Parse extensions - support both comma and pipe separators
|
|
37
|
+
const extensions = rawOptions.extensions ?
|
|
38
|
+
rawOptions.extensions.split(/[,|]/).map(item => item.trim()).filter(item => item.length > 0) :
|
|
39
|
+
[];
|
|
40
|
+
const normalizedExtensions = normalizeExtensions(extensions);
|
|
41
|
+
|
|
42
|
+
const mode = determineMode(rawOptions);
|
|
43
|
+
|
|
44
|
+
// Determine path preservation setting
|
|
45
|
+
const preservePaths = rawOptions.flatDuplicates ? false : (rawOptions.preservePaths !== false);
|
|
46
|
+
|
|
47
|
+
const processedOptions = {
|
|
48
|
+
mode,
|
|
49
|
+
targets: normalizedTargets,
|
|
50
|
+
...(extensions.length > 0 && { extensions: normalizedExtensions }),
|
|
51
|
+
...(rawOptions.resume && { resumeDb: path.resolve(rawOptions.resume) }),
|
|
52
|
+
...(rawOptions.save && { saveDb: path.resolve(rawOptions.save) }),
|
|
53
|
+
...(rawOptions.detectCandidates && { detectCandidates: true }),
|
|
54
|
+
...(rawOptions.candidateStats && { candidateStats: true }),
|
|
55
|
+
...(rawOptions.hashAlgorithm && { hashAlgorithm: rawOptions.hashAlgorithm }),
|
|
56
|
+
...(rawOptions.batchSize && { batchSize: rawOptions.batchSize }),
|
|
57
|
+
...(rawOptions.maxConcurrency && { maxConcurrency: rawOptions.maxConcurrency }),
|
|
58
|
+
...(rawOptions.verbose && { verbose: true }),
|
|
59
|
+
move: Boolean(rawOptions.move),
|
|
60
|
+
preservePaths,
|
|
61
|
+
_raw: rawOptions
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
// Determine database path for candidate operations
|
|
65
|
+
if (processedOptions.mode === 'detect-candidates' || processedOptions.mode === 'candidate-stats') {
|
|
66
|
+
if (rawOptions.resume) {
|
|
67
|
+
processedOptions.database = path.resolve(rawOptions.resume);
|
|
68
|
+
} else if (rawOptions.save) {
|
|
69
|
+
processedOptions.database = path.resolve(rawOptions.save);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return processedOptions;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export {
|
|
77
|
+
processOptions,
|
|
78
|
+
parsePipeSeparated,
|
|
79
|
+
normalizePaths,
|
|
80
|
+
normalizeExtensions,
|
|
81
|
+
determineMode
|
|
82
|
+
};
|
package/utils/README.md
ADDED
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
# File System Utilities for @tb.p/dd
|
|
2
|
+
|
|
3
|
+
This directory contains comprehensive file system utilities for the @tb.p/dd file deduplication tool. The utilities are organized into specialized modules for different file operations.
|
|
4
|
+
|
|
5
|
+
## 📁 File Structure
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
utils/
|
|
9
|
+
├── index.js # Main exports and module aggregation
|
|
10
|
+
├── fileSystemUtils.js # Core file system operations
|
|
11
|
+
├── fileScanner.js # File scanning and discovery
|
|
12
|
+
├── fileMover.js # File moving and management
|
|
13
|
+
├── fileHasher.js # File hashing and content comparison
|
|
14
|
+
├── examples.js # Comprehensive usage examples
|
|
15
|
+
└── README.md # This documentation
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## 🚀 Quick Start
|
|
19
|
+
|
|
20
|
+
```javascript
|
|
21
|
+
const {
|
|
22
|
+
scanDirectories,
|
|
23
|
+
findDuplicateFilesByHash,
|
|
24
|
+
moveDuplicateFiles,
|
|
25
|
+
formatFileSize
|
|
26
|
+
} = require('./utils');
|
|
27
|
+
|
|
28
|
+
// Scan directories for files
|
|
29
|
+
const files = await scanDirectories({
|
|
30
|
+
targets: ['./src', './docs'],
|
|
31
|
+
extensions: ['js', 'md'],
|
|
32
|
+
maxSize: 10 * 1024 * 1024 // 10MB
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
// Find duplicate files
|
|
36
|
+
const duplicates = await findDuplicateFilesByHash(files, {
|
|
37
|
+
algorithm: 'sha256'
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
// Move duplicates
|
|
41
|
+
await moveDuplicateFiles(duplicates, './duplicates');
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## 📚 Module Documentation
|
|
45
|
+
|
|
46
|
+
### fileSystemUtils.js
|
|
47
|
+
Core file system operations and utilities.
|
|
48
|
+
|
|
49
|
+
**Key Functions:**
|
|
50
|
+
- `pathExists(filePath)` - Check if path exists (async)
|
|
51
|
+
- `getFileStats(filePath)` - Get file statistics
|
|
52
|
+
- `createDirectory(dirPath)` - Create directory recursively
|
|
53
|
+
- `moveFile(src, dest, options)` - Move file with options
|
|
54
|
+
- `copyFile(src, dest, options)` - Copy file with options
|
|
55
|
+
- `formatFileSize(bytes)` - Format bytes to human readable size
|
|
56
|
+
|
|
57
|
+
**Example:**
|
|
58
|
+
```javascript
|
|
59
|
+
const { pathExists, createDirectory, moveFile } = require('./fileSystemUtils');
|
|
60
|
+
|
|
61
|
+
if (await pathExists('./source.txt')) {
|
|
62
|
+
await createDirectory('./dest');
|
|
63
|
+
await moveFile('./source.txt', './dest/source.txt', { overwrite: true });
|
|
64
|
+
}
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### fileScanner.js
|
|
68
|
+
Advanced file scanning and discovery operations.
|
|
69
|
+
|
|
70
|
+
**Key Functions:**
|
|
71
|
+
- `scanDirectory(dirPath, options)` - Scan single directory
|
|
72
|
+
- `scanDirectories(options)` - Scan multiple directories
|
|
73
|
+
- `findFilesByExtension(dirPaths, extensions, options)` - Find files by extension
|
|
74
|
+
- `findFilesByPattern(dirPaths, pattern, options)` - Find files by pattern
|
|
75
|
+
- `findDuplicatesBySize(files)` - Find duplicates by file size
|
|
76
|
+
- `getScanStatistics(files)` - Get comprehensive scan statistics
|
|
77
|
+
|
|
78
|
+
**Example:**
|
|
79
|
+
```javascript
|
|
80
|
+
const { scanDirectories, findFilesByExtension, getScanStatistics } = require('./fileScanner');
|
|
81
|
+
|
|
82
|
+
// Scan with options
|
|
83
|
+
const files = await scanDirectories({
|
|
84
|
+
targets: ['./src'],
|
|
85
|
+
extensions: ['js', 'ts'],
|
|
86
|
+
minSize: 1000,
|
|
87
|
+
maxSize: 1024 * 1024,
|
|
88
|
+
excludeDirs: ['node_modules'],
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
// Get statistics
|
|
92
|
+
const stats = getScanStatistics(files);
|
|
93
|
+
console.log(`Found ${stats.totalFiles} files, ${formatFileSize(stats.totalSize)} total`);
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### fileMover.js
|
|
97
|
+
File moving, copying, and management operations.
|
|
98
|
+
|
|
99
|
+
**Key Functions:**
|
|
100
|
+
- `moveSingleFile(source, destination, options)` - Move single file
|
|
101
|
+
- `moveMultipleFiles(filePairs, options)` - Move multiple files
|
|
102
|
+
- `moveFilesToDirectory(filePaths, destDir, options)` - Move files to directory
|
|
103
|
+
- `moveDuplicateFiles(duplicates, destDir, options)` - Move duplicate files
|
|
104
|
+
- `batchMoveFiles(filePairs, options)` - Batch move with progress tracking
|
|
105
|
+
- `createHardLink(source, destination, options)` - Create hard links
|
|
106
|
+
|
|
107
|
+
**Example:**
|
|
108
|
+
```javascript
|
|
109
|
+
const { moveDuplicateFiles, batchMoveFiles } = require('./fileMover');
|
|
110
|
+
|
|
111
|
+
// Move duplicates
|
|
112
|
+
const duplicates = [
|
|
113
|
+
{ path: './file1.txt', size: 1024 },
|
|
114
|
+
{ path: './file2.txt', size: 1024 }
|
|
115
|
+
];
|
|
116
|
+
|
|
117
|
+
await moveDuplicateFiles(duplicates, './duplicates', {
|
|
118
|
+
overwrite: false,
|
|
119
|
+
createBackup: true,
|
|
120
|
+
dryRun: false
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
// Batch move with progress
|
|
124
|
+
const filePairs = [
|
|
125
|
+
{ source: './src/file1.js', destination: './dest/file1.js' },
|
|
126
|
+
{ source: './src/file2.js', destination: './dest/file2.js' }
|
|
127
|
+
];
|
|
128
|
+
|
|
129
|
+
const results = await batchMoveFiles(filePairs, {
|
|
130
|
+
onProgress: (progress) => console.log(`Moving: ${progress.source}`)
|
|
131
|
+
});
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### fileHasher.js
|
|
135
|
+
File hashing and content comparison operations.
|
|
136
|
+
|
|
137
|
+
**Key Functions:**
|
|
138
|
+
- `calculateFileHash(filePath, options)` - Calculate file hash
|
|
139
|
+
- `calculateMultipleHashes(filePath, algorithms, options)` - Multiple algorithms
|
|
140
|
+
- `findDuplicateFilesByHash(files, options)` - Find duplicates by hash
|
|
141
|
+
- `compareFilesByHash(file1, file2, options)` - Compare two files
|
|
142
|
+
- `calculateHashesInParallel(filePaths, options)` - Parallel hashing
|
|
143
|
+
- `verifyFileIntegrity(filePath, expectedHash, options)` - Verify file integrity
|
|
144
|
+
|
|
145
|
+
**Example:**
|
|
146
|
+
```javascript
|
|
147
|
+
const { calculateFileHash, findDuplicateFilesByHash, compareFilesByHash } = require('./fileHasher');
|
|
148
|
+
|
|
149
|
+
// Calculate hash
|
|
150
|
+
const hashResult = await calculateFileHash('./file.txt', {
|
|
151
|
+
algorithm: 'sha256',
|
|
152
|
+
onProgress: (progress) => console.log(`${progress.percentage}%`)
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
// Find duplicates
|
|
156
|
+
const duplicates = await findDuplicateFilesByHash(files, {
|
|
157
|
+
algorithm: 'sha256',
|
|
158
|
+
maxConcurrency: 5
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
// Compare files
|
|
162
|
+
const areIdentical = await compareFilesByHash('./file1.txt', './file2.txt');
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
## ⚙️ Configuration Options
|
|
166
|
+
|
|
167
|
+
### Scan Options
|
|
168
|
+
```javascript
|
|
169
|
+
const scanOptions = {
|
|
170
|
+
extensions: ['js', 'md'], // Filter by extensions
|
|
171
|
+
minSize: 1000, // Minimum file size (bytes)
|
|
172
|
+
maxSize: 10 * 1024 * 1024, // Maximum file size (bytes)
|
|
173
|
+
, // Include hidden files
|
|
174
|
+
excludeDirs: ['node_modules'], // Directories to exclude
|
|
175
|
+
excludeFiles: ['*.log'], // File patterns to exclude
|
|
176
|
+
followSymlinks: false, // Follow symbolic links
|
|
177
|
+
maxDepth: Infinity // Maximum directory depth
|
|
178
|
+
};
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### Move Options
|
|
182
|
+
```javascript
|
|
183
|
+
const moveOptions = {
|
|
184
|
+
overwrite: false, // Overwrite existing files
|
|
185
|
+
createBackup: true, // Create backup before moving
|
|
186
|
+
backupSuffix: '.backup', // Backup file suffix
|
|
187
|
+
dryRun: false, // Simulate without actually moving
|
|
188
|
+
onProgress: (progress) => {}, // Progress callback
|
|
189
|
+
onError: (error, result) => {} // Error callback
|
|
190
|
+
};
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
### Hash Options
|
|
194
|
+
```javascript
|
|
195
|
+
const hashOptions = {
|
|
196
|
+
algorithm: 'sha256', // Hash algorithm
|
|
197
|
+
chunkSize: 64 * 1024, // Chunk size for streaming
|
|
198
|
+
includeMetadata: false, // Include file metadata
|
|
199
|
+
onProgress: (progress) => {}, // Progress callback
|
|
200
|
+
maxFileSize: Infinity, // Maximum file size to hash
|
|
201
|
+
maxConcurrency: 5 // Max parallel operations
|
|
202
|
+
};
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
## 🔧 Available Hash Algorithms
|
|
206
|
+
|
|
207
|
+
- `md5` - MD5 (fast, less secure)
|
|
208
|
+
- `sha1` - SHA-1 (fast, moderate security)
|
|
209
|
+
- `sha256` - SHA-256 (recommended, good security)
|
|
210
|
+
- `sha512` - SHA-512 (slower, high security)
|
|
211
|
+
- `blake2b512` - BLAKE2b-512 (fast, high security)
|
|
212
|
+
- `blake2s256` - BLAKE2s-256 (fast, good security)
|
|
213
|
+
|
|
214
|
+
## 📊 Performance Tips
|
|
215
|
+
|
|
216
|
+
1. **Use size-based filtering first** - Filter by file size before hashing
|
|
217
|
+
2. **Limit file size** - Set `maxFileSize` to avoid hashing huge files
|
|
218
|
+
3. **Use parallel operations** - Set `maxConcurrency` for batch operations
|
|
219
|
+
4. **Stream large files** - Use appropriate `chunkSize` for streaming
|
|
220
|
+
5. **Exclude unnecessary directories** - Use `excludeDirs` to skip irrelevant paths
|
|
221
|
+
|
|
222
|
+
## 🚨 Error Handling
|
|
223
|
+
|
|
224
|
+
All utilities include comprehensive error handling:
|
|
225
|
+
|
|
226
|
+
```javascript
|
|
227
|
+
try {
|
|
228
|
+
const files = await scanDirectories({ ...options, targets: ['./src'] });
|
|
229
|
+
} catch (error) {
|
|
230
|
+
console.error('Scan failed:', error.message);
|
|
231
|
+
// Handle specific error types
|
|
232
|
+
if (error.code === 'ENOENT') {
|
|
233
|
+
console.error('Directory not found');
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
## 📝 Examples
|
|
239
|
+
|
|
240
|
+
See `examples.js` for comprehensive usage examples including:
|
|
241
|
+
- Basic file operations
|
|
242
|
+
- File scanning workflows
|
|
243
|
+
- Duplicate detection
|
|
244
|
+
- File moving strategies
|
|
245
|
+
- Complete deduplication workflow
|
|
246
|
+
|
|
247
|
+
## 🔗 Integration
|
|
248
|
+
|
|
249
|
+
The utilities are designed to work together seamlessly:
|
|
250
|
+
|
|
251
|
+
```javascript
|
|
252
|
+
// Complete deduplication workflow
|
|
253
|
+
const files = await scanDirectories({ ...scanOptions, targets });
|
|
254
|
+
const sizeDuplicates = findDuplicatesBySize(files);
|
|
255
|
+
const hashDuplicates = await findDuplicateFilesByHash(sizeDuplicates);
|
|
256
|
+
await moveDuplicateFiles(hashDuplicates, './duplicates');
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
## 📄 License
|
|
260
|
+
|
|
261
|
+
MIT License - See main project license for details.
|