@tb.p/dd 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,82 @@
1
+ import path from 'path';
2
+ function parsePipeSeparated(pipeString) {
3
+ if (!pipeString) return [];
4
+ return pipeString.split('|').map(item => item.trim()).filter(item => item.length > 0);
5
+ }
6
+
7
+ function normalizePaths(paths) {
8
+ return paths.map(p => {
9
+ // Preserve original relative paths, only resolve if they don't start with ./
10
+ if (p.startsWith('./') || p.startsWith('../')) {
11
+ return p;
12
+ }
13
+ return path.resolve(p);
14
+ });
15
+ }
16
+
17
+ function normalizeExtensions(extensions) {
18
+ return extensions.map(ext => {
19
+ return ext.replace(/^\.+/, '').toLowerCase();
20
+ });
21
+ }
22
+
23
+ function determineMode(options) {
24
+ if (options.getExtensions) return 'get-extensions';
25
+ if (options.detectCandidates) return 'detect-candidates';
26
+ if (options.candidateStats) return 'candidate-stats';
27
+ if (options.resume) return 'resume';
28
+ if (options.save) return 'save';
29
+ return 'unknown';
30
+ }
31
+
32
+ function processOptions(rawOptions) {
33
+ const targets = parsePipeSeparated(rawOptions.targets);
34
+ const normalizedTargets = normalizePaths(targets);
35
+
36
+ // Parse extensions - support both comma and pipe separators
37
+ const extensions = rawOptions.extensions ?
38
+ rawOptions.extensions.split(/[,|]/).map(item => item.trim()).filter(item => item.length > 0) :
39
+ [];
40
+ const normalizedExtensions = normalizeExtensions(extensions);
41
+
42
+ const mode = determineMode(rawOptions);
43
+
44
+ // Determine path preservation setting
45
+ const preservePaths = rawOptions.flatDuplicates ? false : (rawOptions.preservePaths !== false);
46
+
47
+ const processedOptions = {
48
+ mode,
49
+ targets: normalizedTargets,
50
+ ...(extensions.length > 0 && { extensions: normalizedExtensions }),
51
+ ...(rawOptions.resume && { resumeDb: path.resolve(rawOptions.resume) }),
52
+ ...(rawOptions.save && { saveDb: path.resolve(rawOptions.save) }),
53
+ ...(rawOptions.detectCandidates && { detectCandidates: true }),
54
+ ...(rawOptions.candidateStats && { candidateStats: true }),
55
+ ...(rawOptions.hashAlgorithm && { hashAlgorithm: rawOptions.hashAlgorithm }),
56
+ ...(rawOptions.batchSize && { batchSize: rawOptions.batchSize }),
57
+ ...(rawOptions.maxConcurrency && { maxConcurrency: rawOptions.maxConcurrency }),
58
+ ...(rawOptions.verbose && { verbose: true }),
59
+ move: Boolean(rawOptions.move),
60
+ preservePaths,
61
+ _raw: rawOptions
62
+ };
63
+
64
+ // Determine database path for candidate operations
65
+ if (processedOptions.mode === 'detect-candidates' || processedOptions.mode === 'candidate-stats') {
66
+ if (rawOptions.resume) {
67
+ processedOptions.database = path.resolve(rawOptions.resume);
68
+ } else if (rawOptions.save) {
69
+ processedOptions.database = path.resolve(rawOptions.save);
70
+ }
71
+ }
72
+
73
+ return processedOptions;
74
+ }
75
+
76
+ export {
77
+ processOptions,
78
+ parsePipeSeparated,
79
+ normalizePaths,
80
+ normalizeExtensions,
81
+ determineMode
82
+ };
@@ -0,0 +1,261 @@
1
+ # File System Utilities for @tb.p/dd
2
+
3
+ This directory contains comprehensive file system utilities for the @tb.p/dd file deduplication tool. The utilities are organized into specialized modules for different file operations.
4
+
5
+ ## 📁 File Structure
6
+
7
+ ```
8
+ utils/
9
+ ├── index.js # Main exports and module aggregation
10
+ ├── fileSystemUtils.js # Core file system operations
11
+ ├── fileScanner.js # File scanning and discovery
12
+ ├── fileMover.js # File moving and management
13
+ ├── fileHasher.js # File hashing and content comparison
14
+ ├── examples.js # Comprehensive usage examples
15
+ └── README.md # This documentation
16
+ ```
17
+
18
+ ## 🚀 Quick Start
19
+
20
+ ```javascript
21
+ const {
22
+ scanDirectories,
23
+ findDuplicateFilesByHash,
24
+ moveDuplicateFiles,
25
+ formatFileSize
26
+ } = require('./utils');
27
+
28
+ // Scan directories for files
29
+ const files = await scanDirectories({
30
+ targets: ['./src', './docs'],
31
+ extensions: ['js', 'md'],
32
+ maxSize: 10 * 1024 * 1024 // 10MB
33
+ });
34
+
35
+ // Find duplicate files
36
+ const duplicates = await findDuplicateFilesByHash(files, {
37
+ algorithm: 'sha256'
38
+ });
39
+
40
+ // Move duplicates
41
+ await moveDuplicateFiles(duplicates, './duplicates');
42
+ ```
43
+
44
+ ## 📚 Module Documentation
45
+
46
+ ### fileSystemUtils.js
47
+ Core file system operations and utilities.
48
+
49
+ **Key Functions:**
50
+ - `pathExists(filePath)` - Check if path exists (async)
51
+ - `getFileStats(filePath)` - Get file statistics
52
+ - `createDirectory(dirPath)` - Create directory recursively
53
+ - `moveFile(src, dest, options)` - Move file with options
54
+ - `copyFile(src, dest, options)` - Copy file with options
55
+ - `formatFileSize(bytes)` - Format bytes to human readable size
56
+
57
+ **Example:**
58
+ ```javascript
59
+ const { pathExists, createDirectory, moveFile } = require('./fileSystemUtils');
60
+
61
+ if (await pathExists('./source.txt')) {
62
+ await createDirectory('./dest');
63
+ await moveFile('./source.txt', './dest/source.txt', { overwrite: true });
64
+ }
65
+ ```
66
+
67
+ ### fileScanner.js
68
+ Advanced file scanning and discovery operations.
69
+
70
+ **Key Functions:**
71
+ - `scanDirectory(dirPath, options)` - Scan single directory
72
+ - `scanDirectories(options)` - Scan multiple directories
73
+ - `findFilesByExtension(dirPaths, extensions, options)` - Find files by extension
74
+ - `findFilesByPattern(dirPaths, pattern, options)` - Find files by pattern
75
+ - `findDuplicatesBySize(files)` - Find duplicates by file size
76
+ - `getScanStatistics(files)` - Get comprehensive scan statistics
77
+
78
+ **Example:**
79
+ ```javascript
80
+ const { scanDirectories, findFilesByExtension, getScanStatistics } = require('./fileScanner');
81
+
82
+ // Scan with options
83
+ const files = await scanDirectories({
84
+ targets: ['./src'],
85
+ extensions: ['js', 'ts'],
86
+ minSize: 1000,
87
+ maxSize: 1024 * 1024,
88
+ excludeDirs: ['node_modules'],
89
+ });
90
+
91
+ // Get statistics
92
+ const stats = getScanStatistics(files);
93
+ console.log(`Found ${stats.totalFiles} files, ${formatFileSize(stats.totalSize)} total`);
94
+ ```
95
+
96
+ ### fileMover.js
97
+ File moving, copying, and management operations.
98
+
99
+ **Key Functions:**
100
+ - `moveSingleFile(source, destination, options)` - Move single file
101
+ - `moveMultipleFiles(filePairs, options)` - Move multiple files
102
+ - `moveFilesToDirectory(filePaths, destDir, options)` - Move files to directory
103
+ - `moveDuplicateFiles(duplicates, destDir, options)` - Move duplicate files
104
+ - `batchMoveFiles(filePairs, options)` - Batch move with progress tracking
105
+ - `createHardLink(source, destination, options)` - Create hard links
106
+
107
+ **Example:**
108
+ ```javascript
109
+ const { moveDuplicateFiles, batchMoveFiles } = require('./fileMover');
110
+
111
+ // Move duplicates
112
+ const duplicates = [
113
+ { path: './file1.txt', size: 1024 },
114
+ { path: './file2.txt', size: 1024 }
115
+ ];
116
+
117
+ await moveDuplicateFiles(duplicates, './duplicates', {
118
+ overwrite: false,
119
+ createBackup: true,
120
+ dryRun: false
121
+ });
122
+
123
+ // Batch move with progress
124
+ const filePairs = [
125
+ { source: './src/file1.js', destination: './dest/file1.js' },
126
+ { source: './src/file2.js', destination: './dest/file2.js' }
127
+ ];
128
+
129
+ const results = await batchMoveFiles(filePairs, {
130
+ onProgress: (progress) => console.log(`Moving: ${progress.source}`)
131
+ });
132
+ ```
133
+
134
+ ### fileHasher.js
135
+ File hashing and content comparison operations.
136
+
137
+ **Key Functions:**
138
+ - `calculateFileHash(filePath, options)` - Calculate file hash
139
+ - `calculateMultipleHashes(filePath, algorithms, options)` - Multiple algorithms
140
+ - `findDuplicateFilesByHash(files, options)` - Find duplicates by hash
141
+ - `compareFilesByHash(file1, file2, options)` - Compare two files
142
+ - `calculateHashesInParallel(filePaths, options)` - Parallel hashing
143
+ - `verifyFileIntegrity(filePath, expectedHash, options)` - Verify file integrity
144
+
145
+ **Example:**
146
+ ```javascript
147
+ const { calculateFileHash, findDuplicateFilesByHash, compareFilesByHash } = require('./fileHasher');
148
+
149
+ // Calculate hash
150
+ const hashResult = await calculateFileHash('./file.txt', {
151
+ algorithm: 'sha256',
152
+ onProgress: (progress) => console.log(`${progress.percentage}%`)
153
+ });
154
+
155
+ // Find duplicates
156
+ const duplicates = await findDuplicateFilesByHash(files, {
157
+ algorithm: 'sha256',
158
+ maxConcurrency: 5
159
+ });
160
+
161
+ // Compare files
162
+ const areIdentical = await compareFilesByHash('./file1.txt', './file2.txt');
163
+ ```
164
+
165
+ ## ⚙️ Configuration Options
166
+
167
+ ### Scan Options
168
+ ```javascript
169
+ const scanOptions = {
170
+ extensions: ['js', 'md'], // Filter by extensions
171
+ minSize: 1000, // Minimum file size (bytes)
172
+ maxSize: 10 * 1024 * 1024, // Maximum file size (bytes)
173
+ , // Include hidden files
174
+ excludeDirs: ['node_modules'], // Directories to exclude
175
+ excludeFiles: ['*.log'], // File patterns to exclude
176
+ followSymlinks: false, // Follow symbolic links
177
+ maxDepth: Infinity // Maximum directory depth
178
+ };
179
+ ```
180
+
181
+ ### Move Options
182
+ ```javascript
183
+ const moveOptions = {
184
+ overwrite: false, // Overwrite existing files
185
+ createBackup: true, // Create backup before moving
186
+ backupSuffix: '.backup', // Backup file suffix
187
+ dryRun: false, // Simulate without actually moving
188
+ onProgress: (progress) => {}, // Progress callback
189
+ onError: (error, result) => {} // Error callback
190
+ };
191
+ ```
192
+
193
+ ### Hash Options
194
+ ```javascript
195
+ const hashOptions = {
196
+ algorithm: 'sha256', // Hash algorithm
197
+ chunkSize: 64 * 1024, // Chunk size for streaming
198
+ includeMetadata: false, // Include file metadata
199
+ onProgress: (progress) => {}, // Progress callback
200
+ maxFileSize: Infinity, // Maximum file size to hash
201
+ maxConcurrency: 5 // Max parallel operations
202
+ };
203
+ ```
204
+
205
+ ## 🔧 Available Hash Algorithms
206
+
207
+ - `md5` - MD5 (fast, less secure)
208
+ - `sha1` - SHA-1 (fast, moderate security)
209
+ - `sha256` - SHA-256 (recommended, good security)
210
+ - `sha512` - SHA-512 (slower, high security)
211
+ - `blake2b512` - BLAKE2b-512 (fast, high security)
212
+ - `blake2s256` - BLAKE2s-256 (fast, good security)
213
+
214
+ ## 📊 Performance Tips
215
+
216
+ 1. **Use size-based filtering first** - Filter by file size before hashing
217
+ 2. **Limit file size** - Set `maxFileSize` to avoid hashing huge files
218
+ 3. **Use parallel operations** - Set `maxConcurrency` for batch operations
219
+ 4. **Stream large files** - Use appropriate `chunkSize` for streaming
220
+ 5. **Exclude unnecessary directories** - Use `excludeDirs` to skip irrelevant paths
221
+
222
+ ## 🚨 Error Handling
223
+
224
+ All utilities include comprehensive error handling:
225
+
226
+ ```javascript
227
+ try {
228
+ const files = await scanDirectories({ ...options, targets: ['./src'] });
229
+ } catch (error) {
230
+ console.error('Scan failed:', error.message);
231
+ // Handle specific error types
232
+ if (error.code === 'ENOENT') {
233
+ console.error('Directory not found');
234
+ }
235
+ }
236
+ ```
237
+
238
+ ## 📝 Examples
239
+
240
+ See `examples.js` for comprehensive usage examples including:
241
+ - Basic file operations
242
+ - File scanning workflows
243
+ - Duplicate detection
244
+ - File moving strategies
245
+ - Complete deduplication workflow
246
+
247
+ ## 🔗 Integration
248
+
249
+ The utilities are designed to work together seamlessly:
250
+
251
+ ```javascript
252
+ // Complete deduplication workflow
253
+ const files = await scanDirectories({ ...scanOptions, targets });
254
+ const sizeDuplicates = findDuplicatesBySize(files);
255
+ const hashDuplicates = await findDuplicateFilesByHash(sizeDuplicates);
256
+ await moveDuplicateFiles(hashDuplicates, './duplicates');
257
+ ```
258
+
259
+ ## 📄 License
260
+
261
+ MIT License - See main project license for details.