@tb.p/dd 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,91 @@
1
+ import { createConnection } from '../database/dbConnection.js';
2
+
3
+ /**
4
+ * Display duplicate files to the user
5
+ * @param {Object} options - Options containing database path
6
+ */
7
+ export async function displayDuplicates(options) {
8
+ const databasePath = options.saveDb || options.resumeDb || options.database;
9
+
10
+ if (!databasePath) {
11
+ console.log('ā„¹ļø No database path available for duplicate reporting');
12
+ return;
13
+ }
14
+
15
+ let db;
16
+ try {
17
+ console.log('\nšŸ“‹ Generating duplicate file report...');
18
+
19
+ db = createConnection(databasePath);
20
+ await db.connect();
21
+
22
+ const operations = await import('../database/dbOperations.js');
23
+ const dbOps = operations.createOperations(db);
24
+
25
+ const duplicateGroups = await dbOps.getDuplicateGroups();
26
+
27
+ if (!duplicateGroups || duplicateGroups.length === 0) {
28
+ console.log('āœ… No duplicate files found!');
29
+ return;
30
+ }
31
+
32
+ console.log(`\nšŸ” Found ${duplicateGroups.length} duplicate groups:\n`);
33
+
34
+ let totalDuplicates = 0;
35
+ let totalWastedSpace = 0;
36
+
37
+ duplicateGroups.forEach((group, index) => {
38
+ const filePaths = group.file_paths.split(';');
39
+ const duplicateCount = group.duplicate_count;
40
+ const fileSize = group.file_size;
41
+ const wastedSpace = (duplicateCount - 1) * fileSize;
42
+
43
+ totalDuplicates += duplicateCount;
44
+ totalWastedSpace += wastedSpace;
45
+
46
+ console.log(`šŸ“ Duplicate Group ${index + 1} (${duplicateCount} files, ${formatFileSize(fileSize)} each):`);
47
+ console.log(` Hash: ${group.file_hash}`);
48
+ console.log(` Wasted space: ${formatFileSize(wastedSpace)}`);
49
+ console.log(` Files:`);
50
+
51
+ filePaths.forEach((filePath, fileIndex) => {
52
+ const icon = fileIndex === 0 ? '🟢' : 'šŸ”“';
53
+ const label = fileIndex === 0 ? '[ORIGINAL]' : '[DUPLICATE]';
54
+ console.log(` ${icon} ${label} ${filePath}`);
55
+ });
56
+
57
+ console.log('');
58
+ });
59
+
60
+ console.log(`šŸ“Š Summary:`);
61
+ console.log(` - Duplicate groups: ${duplicateGroups.length}`);
62
+ console.log(` - Total duplicate files: ${totalDuplicates - duplicateGroups.length}`);
63
+ console.log(` - Total wasted space: ${formatFileSize(totalWastedSpace)}`);
64
+
65
+ await db.close();
66
+
67
+ } catch (error) {
68
+ console.error(`āŒ Error generating duplicate report: ${error.message}`);
69
+ if (db) {
70
+ await db.close();
71
+ }
72
+ }
73
+ }
74
+
75
+ /**
76
+ * Format file size in human readable format
77
+ * @param {number} bytes - Size in bytes
78
+ * @returns {string} - Formatted size
79
+ */
80
+ function formatFileSize(bytes) {
81
+ const units = ['B', 'KB', 'MB', 'GB', 'TB'];
82
+ let size = bytes;
83
+ let unitIndex = 0;
84
+
85
+ while (size >= 1024 && unitIndex < units.length - 1) {
86
+ size /= 1024;
87
+ unitIndex++;
88
+ }
89
+
90
+ return `${size.toFixed(2)} ${units[unitIndex]}`;
91
+ }
@@ -0,0 +1,195 @@
1
+ import fs from 'fs';
2
+ import crypto from 'crypto';
3
+ import { blake3 } from 'hash-wasm';
4
+ import { pathExists, getFileSize } from './fileSystemUtils.js';
5
+
6
+ /**
7
+ * File Hasher Utilities for @tb.p/dd
8
+ * Advanced file hashing and content comparison operations
9
+ */
10
+
11
+ /**
12
+ * Hash algorithm types
13
+ */
14
+ const HASH_ALGORITHMS = {
15
+ MD5: 'md5',
16
+ SHA1: 'sha1',
17
+ SHA256: 'sha256',
18
+ SHA512: 'sha512',
19
+ BLAKE3: 'blake3'
20
+ };
21
+
22
+ /**
23
+ * Hash options
24
+ * @typedef {Object} HashOptions
25
+ * @property {string} [algorithm] - Hash algorithm to use
26
+ * @property {number} [chunkSize] - Chunk size for streaming (bytes)
27
+ * @property {boolean} [includeMetadata] - Include file metadata in hash
28
+ * @property {Function} [onProgress] - Progress callback function
29
+ * @property {number} [maxFileSize] - Maximum file size to hash (bytes)
30
+ */
31
+
32
+ /**
33
+ * Hash result
34
+ * @typedef {Object} HashResult
35
+ * @property {string} hash - Calculated hash
36
+ * @property {string} algorithm - Algorithm used
37
+ * @property {number} size - File size in bytes
38
+ * @property {number} [timeMs] - Time taken in milliseconds
39
+ * @property {string} [error] - Error message if failed
40
+ */
41
+
42
+ /**
43
+ * Calculate file hash using streaming
44
+ * @param {string} filePath - Path to file
45
+ * @param {HashOptions} options - Hash options
46
+ * @returns {Promise<HashResult>} - Hash result
47
+ */
48
+ async function calculateFileHash(filePath, options = {}) {
49
+ const {
50
+ algorithm = HASH_ALGORITHMS.BLAKE3,
51
+ chunkSize = 64 * 1024, // 64KB chunks
52
+ includeMetadata = false,
53
+ onProgress = null,
54
+ maxFileSize = Infinity
55
+ } = options;
56
+
57
+ const startTime = Date.now();
58
+ const result = {
59
+ hash: null,
60
+ algorithm,
61
+ size: 0,
62
+ timeMs: 0,
63
+ error: null
64
+ };
65
+
66
+ try {
67
+ // Check if file exists
68
+ if (!await pathExists(filePath)) {
69
+ throw new Error(`File does not exist: ${filePath}`);
70
+ }
71
+
72
+ // Get file size
73
+ result.size = await getFileSize(filePath);
74
+
75
+ // Check file size limit
76
+ if (result.size > maxFileSize) {
77
+ throw new Error(`File too large: ${result.size} bytes (max: ${maxFileSize})`);
78
+ }
79
+
80
+ // Handle BLAKE3 vs other algorithms
81
+ if (algorithm === HASH_ALGORITHMS.BLAKE3) {
82
+ return await calculateBlake3Hash(filePath, result, includeMetadata, onProgress, startTime);
83
+ } else {
84
+ // Create hash instance for crypto algorithms
85
+ const hash = crypto.createHash(algorithm);
86
+
87
+ // Add metadata to hash if requested
88
+ if (includeMetadata) {
89
+ const stats = fs.statSync(filePath);
90
+ hash.update(JSON.stringify({
91
+ size: stats.size,
92
+ mtime: stats.mtime.getTime(),
93
+ ctime: stats.ctime.getTime(),
94
+ mode: stats.mode
95
+ }));
96
+ }
97
+
98
+ // Stream file and calculate hash
99
+ const stream = fs.createReadStream(filePath, { highWaterMark: chunkSize });
100
+ let bytesProcessed = 0;
101
+
102
+ return new Promise((resolve, reject) => {
103
+ stream.on('data', (chunk) => {
104
+ hash.update(chunk);
105
+ bytesProcessed += chunk.length;
106
+
107
+ if (onProgress) {
108
+ onProgress({
109
+ bytesProcessed,
110
+ totalBytes: result.size,
111
+ percentage: (bytesProcessed / result.size) * 100
112
+ });
113
+ }
114
+ });
115
+
116
+ stream.on('end', () => {
117
+ result.hash = hash.digest('hex');
118
+ result.timeMs = Date.now() - startTime;
119
+ resolve(result);
120
+ });
121
+
122
+ stream.on('error', (error) => {
123
+ result.error = error.message;
124
+ reject(error);
125
+ });
126
+ });
127
+ }
128
+
129
+ } catch (error) {
130
+ result.error = error.message;
131
+ result.timeMs = Date.now() - startTime;
132
+ throw error;
133
+ }
134
+ }
135
+
136
+ /**
137
+ * Calculate BLAKE3 hash for a file
138
+ * @param {string} filePath - Path to file
139
+ * @param {Object} result - Result object to update
140
+ * @param {boolean} includeMetadata - Include file metadata in hash
141
+ * @param {Function} onProgress - Progress callback function
142
+ * @param {number} startTime - Start time for timing
143
+ * @returns {Promise<Object>} - Hash result
144
+ */
145
+ async function calculateBlake3Hash(filePath, result, includeMetadata, onProgress, startTime) {
146
+ try {
147
+ // Read file as buffer for BLAKE3
148
+ const fileBuffer = await fs.promises.readFile(filePath);
149
+
150
+ // Add metadata to hash if requested
151
+ if (includeMetadata) {
152
+ const stats = fs.statSync(filePath);
153
+ const metadata = JSON.stringify({
154
+ size: stats.size,
155
+ mtime: stats.mtime.getTime(),
156
+ ctime: stats.ctime.getTime(),
157
+ mode: stats.mode
158
+ });
159
+
160
+ // Combine metadata and file content
161
+ const combinedBuffer = Buffer.concat([
162
+ Buffer.from(metadata),
163
+ fileBuffer
164
+ ]);
165
+
166
+ result.hash = await blake3(combinedBuffer);
167
+ } else {
168
+ result.hash = await blake3(fileBuffer);
169
+ }
170
+
171
+ result.timeMs = Date.now() - startTime;
172
+ return result;
173
+ } catch (error) {
174
+ result.error = error.message;
175
+ result.timeMs = Date.now() - startTime;
176
+ throw error;
177
+ }
178
+ }
179
+
180
+ /**
181
+ * Calculate hash for file content only (no metadata)
182
+ * @param {string} filePath - Path to file
183
+ * @param {string} algorithm - Hash algorithm
184
+ * @returns {Promise<string>} - File content hash
185
+ */
186
+ async function calculateContentHash(filePath, algorithm = HASH_ALGORITHMS.BLAKE3) {
187
+ const result = await calculateFileHash(filePath, { algorithm, includeMetadata: false });
188
+ return result.hash;
189
+ }
190
+
191
+ export {
192
+ HASH_ALGORITHMS,
193
+ calculateFileHash,
194
+ calculateContentHash
195
+ };
@@ -0,0 +1,180 @@
1
+ import path from 'path';
2
+ import {
3
+ pathExists,
4
+ createDirectory,
5
+ moveFile,
6
+ getFileStats,
7
+ joinPaths
8
+ } from './fileSystemUtils.js';
9
+
10
+ /**
11
+ * Move operation result
12
+ * @typedef {Object} MoveResult
13
+ * @property {boolean} success - Whether operation was successful
14
+ * @property {string} sourcePath - Original source path
15
+ * @property {string} destinationPath - Final destination path
16
+ * @property {string} [error] - Error message if failed
17
+ * @property {number} [size] - File size in bytes
18
+ */
19
+
20
+ /**
21
+ * Move a single file
22
+ * @param {string} sourcePath - Source file path
23
+ * @param {string} destinationPath - Destination file path
24
+ * @param {Object} options - Move options
25
+ * @returns {Promise<MoveResult>} - Move result
26
+ */
27
+ async function moveSingleFile(sourcePath, destinationPath, options = {}) {
28
+ const {
29
+ overwrite = false,
30
+ dryRun = false,
31
+ onProgress = null,
32
+ onError = null
33
+ } = options;
34
+
35
+ const result = {
36
+ success: false,
37
+ sourcePath,
38
+ destinationPath,
39
+ error: null,
40
+ size: 0
41
+ };
42
+
43
+ try {
44
+ // Check if source exists
45
+ if (!await pathExists(sourcePath)) {
46
+ throw new Error(`Source file does not exist: ${sourcePath}`);
47
+ }
48
+
49
+ // Get file size
50
+ const stats = await getFileStats(sourcePath);
51
+ result.size = stats.size;
52
+
53
+ // Check if destination exists
54
+ if (!overwrite && await pathExists(destinationPath)) {
55
+ throw new Error(`Destination file already exists: ${destinationPath}`);
56
+ }
57
+
58
+ if (dryRun) {
59
+ console.log(`[DRY RUN] Would move: ${sourcePath} -> ${destinationPath}`);
60
+ result.success = true;
61
+ return result;
62
+ }
63
+
64
+ // Ensure destination directory exists
65
+ const destinationDir = path.dirname(destinationPath);
66
+ await createDirectory(destinationDir);
67
+
68
+ // Move the file
69
+ await moveFile(sourcePath, destinationPath, { overwrite });
70
+
71
+ result.success = true;
72
+
73
+ if (onProgress) {
74
+ onProgress(result);
75
+ }
76
+
77
+ } catch (error) {
78
+ result.error = error.message;
79
+ result.success = false;
80
+
81
+ if (onError) {
82
+ onError(result);
83
+ }
84
+ }
85
+
86
+ return result;
87
+ }
88
+
89
+ /**
90
+ * Move multiple files
91
+ * @param {Array<{source: string, destination: string}>} filePairs - Array of source/destination pairs
92
+ * @param {Object} options - Move options
93
+ * @returns {Promise<MoveResult[]>} - Array of move results
94
+ */
95
+ async function moveMultipleFiles(filePairs, options = {}) {
96
+ const results = [];
97
+
98
+ for (const pair of filePairs) {
99
+ const result = await moveSingleFile(pair.source, pair.destination, options);
100
+ results.push(result);
101
+ }
102
+
103
+ return results;
104
+ }
105
+
106
+ /**
107
+ * Move duplicate files to a duplicates directory
108
+ * @param {Array<{path: string, size: number, hash?: string}>} duplicateFiles - Duplicate files info
109
+ * @param {string} duplicatesDir - Directory for duplicates
110
+ * @param {Object} options - Move options
111
+ * @returns {Promise<MoveResult[]>} - Array of move results
112
+ */
113
+ async function moveDuplicateFiles(duplicateFiles, duplicatesDir, options = {}) {
114
+ const filePairs = [];
115
+
116
+ for (let i = 0; i < duplicateFiles.length; i++) {
117
+ const file = duplicateFiles[i];
118
+ const fileName = path.basename(file.path);
119
+ const nameWithoutExt = path.parse(fileName).name;
120
+ const ext = path.parse(fileName).ext;
121
+
122
+ // Create unique name for duplicate
123
+ let duplicateName = fileName;
124
+ if (i > 0) {
125
+ duplicateName = `${nameWithoutExt}_duplicate_${i}${ext}`;
126
+ }
127
+
128
+ filePairs.push({
129
+ source: file.path,
130
+ destination: joinPaths(duplicatesDir, duplicateName)
131
+ });
132
+ }
133
+
134
+ return await moveMultipleFiles(filePairs, options);
135
+ }
136
+
137
+ /**
138
+ * Move duplicate files to a duplicates directory preserving relative path structure
139
+ * @param {Array<{path: string, size: number, hash?: string}>} duplicateFiles - Duplicate files info
140
+ * @param {string} duplicatesDir - Directory for duplicates
141
+ * @param {string} sourceRoot - Root directory to calculate relative paths from
142
+ * @param {Object} options - Move options
143
+ * @returns {Promise<MoveResult[]>} - Array of move results
144
+ */
145
+ async function moveDuplicateFilesWithPathPreservation(duplicateFiles, duplicatesDir, sourceRoot, options = {}) {
146
+ const filePairs = [];
147
+
148
+ for (let i = 0; i < duplicateFiles.length; i++) {
149
+ const file = duplicateFiles[i];
150
+ const fileName = path.basename(file.path);
151
+ const nameWithoutExt = path.parse(fileName).name;
152
+ const ext = path.parse(fileName).ext;
153
+
154
+ // Calculate relative path from source root
155
+ const relativePath = path.relative(sourceRoot, file.path);
156
+ const relativeDir = path.dirname(relativePath);
157
+
158
+ // Create unique name for duplicate
159
+ let duplicateName = fileName;
160
+ if (i > 0) {
161
+ duplicateName = `${nameWithoutExt}_duplicate_${i}${ext}`;
162
+ }
163
+
164
+ // Preserve directory structure in duplicates folder
165
+ const destinationDir = joinPaths(duplicatesDir, relativeDir);
166
+ const destinationPath = joinPaths(destinationDir, duplicateName);
167
+
168
+ filePairs.push({
169
+ source: file.path,
170
+ destination: destinationPath
171
+ });
172
+ }
173
+
174
+ return await moveMultipleFiles(filePairs, options);
175
+ }
176
+
177
+ export {
178
+ moveDuplicateFiles,
179
+ moveDuplicateFilesWithPathPreservation
180
+ };
@@ -0,0 +1,128 @@
1
+ import { promises as fs } from 'fs';
2
+ import path from 'path';
3
+ import recursive from 'recursive-readdir';
4
+ import {
5
+ pathExists,
6
+ isDirectory,
7
+ getFileExtension,
8
+ normalizePath
9
+ } from './fileSystemUtils.js';
10
+
11
+ async function scanDirectory(dirPath, options = {}) {
12
+ if (!(await pathExists(dirPath))) {
13
+ throw new Error(`Directory does not exist: ${dirPath}`);
14
+ }
15
+
16
+ if (!(await isDirectory(dirPath))) {
17
+ throw new Error(`Path is not a directory: ${dirPath}`);
18
+ }
19
+
20
+ try {
21
+ const filePaths = await new Promise((resolve, reject) => {
22
+ recursive(dirPath, (err, files) => {
23
+ if (err) reject(err);
24
+ else resolve(files);
25
+ });
26
+ });
27
+
28
+ // OPTIMIZATION: Batch stat() operations for better performance
29
+ const files = await getBatchFileInfo(filePaths, options);
30
+
31
+ return files;
32
+ } catch (error) {
33
+ console.warn(`Warning: Error reading directory ${dirPath}: ${error.message}`);
34
+ return [];
35
+ }
36
+ }
37
+
38
+ async function getBatchFileInfo(filePaths, options = {}) {
39
+ // OPTIMIZATION: Batch all stat() operations for parallel I/O
40
+ const statPromises = filePaths.map(async (filePath) => {
41
+ try {
42
+ const normalizedPath = normalizePath(filePath);
43
+ const stats = await fs.stat(normalizedPath);
44
+ return { filePath: normalizedPath, stats, error: null };
45
+ } catch (error) {
46
+ return { filePath, stats: null, error };
47
+ }
48
+ });
49
+
50
+ const statResults = await Promise.all(statPromises);
51
+ const files = [];
52
+
53
+ for (const result of statResults) {
54
+ if (result.error) {
55
+ console.warn(`Warning: Could not process file ${result.filePath}: ${result.error.message}`);
56
+ continue;
57
+ }
58
+
59
+ const absolutePath = path.resolve(result.filePath);
60
+ const extension = getFileExtension(result.filePath);
61
+
62
+ const fileInfo = {
63
+ path: absolutePath,
64
+ name: path.basename(result.filePath),
65
+ extension: extension,
66
+ size: result.stats.size,
67
+ hash: null, // Will be calculated separately if needed
68
+ active: true, // File is active by default
69
+ };
70
+
71
+ if (shouldIncludeFile(fileInfo, options)) {
72
+ files.push(fileInfo);
73
+ }
74
+ }
75
+
76
+ return files;
77
+ }
78
+
79
+ async function getFileInfo(filePath, options = {}) {
80
+ const normalizedPath = normalizePath(filePath);
81
+ const absolutePath = path.resolve(normalizedPath);
82
+ const stats = await fs.stat(normalizedPath);
83
+ const extension = getFileExtension(normalizedPath);
84
+
85
+ return {
86
+ path: absolutePath,
87
+ name: path.basename(normalizedPath),
88
+ extension: extension,
89
+ size: stats.size,
90
+ hash: null, // Will be calculated separately if needed
91
+ active: true, // File is active by default
92
+ };
93
+ }
94
+
95
+ function shouldIncludeFile(fileInfo, options) {
96
+ const {
97
+ extensions = null
98
+ } = options;
99
+
100
+ if (extensions && extensions.length > 0) {
101
+ if (!extensions.includes(fileInfo.extension)) {
102
+ return false;
103
+ }
104
+ }
105
+
106
+ return true;
107
+ }
108
+
109
+ async function scanDirectories(options = {}) {
110
+ const { targets = [] } = options;
111
+ const allFiles = [];
112
+
113
+ for (const dirPath of targets) {
114
+ try {
115
+ const files = await scanDirectory(dirPath, options);
116
+ allFiles.push(...files);
117
+ } catch (error) {
118
+ console.warn(`Warning: Failed to scan directory ${dirPath}: ${error.message}`);
119
+ }
120
+ }
121
+
122
+ return allFiles;
123
+ }
124
+
125
+ export {
126
+ scanDirectory,
127
+ scanDirectories
128
+ };