@tb.p/dd 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/controllers/getExtensionsController.js +27 -0
- package/controllers/newController.js +72 -0
- package/controllers/resumeController.js +233 -0
- package/database/README.md +262 -0
- package/database/dbConnection.js +314 -0
- package/database/dbOperations.js +332 -0
- package/database/dbUtils.js +125 -0
- package/database/dbValidator.js +325 -0
- package/database/index.js +102 -0
- package/index.js +75 -0
- package/package.json +32 -0
- package/processors/optionETL.js +82 -0
- package/utils/README.md +261 -0
- package/utils/candidateDetection.js +541 -0
- package/utils/duplicateMover.js +140 -0
- package/utils/duplicateReporter.js +91 -0
- package/utils/fileHasher.js +195 -0
- package/utils/fileMover.js +180 -0
- package/utils/fileScanner.js +128 -0
- package/utils/fileSystemUtils.js +192 -0
- package/utils/index.js +5 -0
- package/validators/optionValidator.js +103 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import { createConnection } from '../database/dbConnection.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Display duplicate files to the user
|
|
5
|
+
* @param {Object} options - Options containing database path
|
|
6
|
+
*/
|
|
7
|
+
export async function displayDuplicates(options) {
|
|
8
|
+
const databasePath = options.saveDb || options.resumeDb || options.database;
|
|
9
|
+
|
|
10
|
+
if (!databasePath) {
|
|
11
|
+
console.log('ā¹ļø No database path available for duplicate reporting');
|
|
12
|
+
return;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
let db;
|
|
16
|
+
try {
|
|
17
|
+
console.log('\nš Generating duplicate file report...');
|
|
18
|
+
|
|
19
|
+
db = createConnection(databasePath);
|
|
20
|
+
await db.connect();
|
|
21
|
+
|
|
22
|
+
const operations = await import('../database/dbOperations.js');
|
|
23
|
+
const dbOps = operations.createOperations(db);
|
|
24
|
+
|
|
25
|
+
const duplicateGroups = await dbOps.getDuplicateGroups();
|
|
26
|
+
|
|
27
|
+
if (!duplicateGroups || duplicateGroups.length === 0) {
|
|
28
|
+
console.log('ā
No duplicate files found!');
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
console.log(`\nš Found ${duplicateGroups.length} duplicate groups:\n`);
|
|
33
|
+
|
|
34
|
+
let totalDuplicates = 0;
|
|
35
|
+
let totalWastedSpace = 0;
|
|
36
|
+
|
|
37
|
+
duplicateGroups.forEach((group, index) => {
|
|
38
|
+
const filePaths = group.file_paths.split(';');
|
|
39
|
+
const duplicateCount = group.duplicate_count;
|
|
40
|
+
const fileSize = group.file_size;
|
|
41
|
+
const wastedSpace = (duplicateCount - 1) * fileSize;
|
|
42
|
+
|
|
43
|
+
totalDuplicates += duplicateCount;
|
|
44
|
+
totalWastedSpace += wastedSpace;
|
|
45
|
+
|
|
46
|
+
console.log(`š Duplicate Group ${index + 1} (${duplicateCount} files, ${formatFileSize(fileSize)} each):`);
|
|
47
|
+
console.log(` Hash: ${group.file_hash}`);
|
|
48
|
+
console.log(` Wasted space: ${formatFileSize(wastedSpace)}`);
|
|
49
|
+
console.log(` Files:`);
|
|
50
|
+
|
|
51
|
+
filePaths.forEach((filePath, fileIndex) => {
|
|
52
|
+
const icon = fileIndex === 0 ? 'š¢' : 'š“';
|
|
53
|
+
const label = fileIndex === 0 ? '[ORIGINAL]' : '[DUPLICATE]';
|
|
54
|
+
console.log(` ${icon} ${label} ${filePath}`);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
console.log('');
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
console.log(`š Summary:`);
|
|
61
|
+
console.log(` - Duplicate groups: ${duplicateGroups.length}`);
|
|
62
|
+
console.log(` - Total duplicate files: ${totalDuplicates - duplicateGroups.length}`);
|
|
63
|
+
console.log(` - Total wasted space: ${formatFileSize(totalWastedSpace)}`);
|
|
64
|
+
|
|
65
|
+
await db.close();
|
|
66
|
+
|
|
67
|
+
} catch (error) {
|
|
68
|
+
console.error(`ā Error generating duplicate report: ${error.message}`);
|
|
69
|
+
if (db) {
|
|
70
|
+
await db.close();
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Format file size in human readable format
|
|
77
|
+
* @param {number} bytes - Size in bytes
|
|
78
|
+
* @returns {string} - Formatted size
|
|
79
|
+
*/
|
|
80
|
+
function formatFileSize(bytes) {
|
|
81
|
+
const units = ['B', 'KB', 'MB', 'GB', 'TB'];
|
|
82
|
+
let size = bytes;
|
|
83
|
+
let unitIndex = 0;
|
|
84
|
+
|
|
85
|
+
while (size >= 1024 && unitIndex < units.length - 1) {
|
|
86
|
+
size /= 1024;
|
|
87
|
+
unitIndex++;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return `${size.toFixed(2)} ${units[unitIndex]}`;
|
|
91
|
+
}
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import crypto from 'crypto';
|
|
3
|
+
import { blake3 } from 'hash-wasm';
|
|
4
|
+
import { pathExists, getFileSize } from './fileSystemUtils.js';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* File Hasher Utilities for @tb.p/dd
|
|
8
|
+
* Advanced file hashing and content comparison operations
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Hash algorithm types
|
|
13
|
+
*/
|
|
14
|
+
const HASH_ALGORITHMS = {
|
|
15
|
+
MD5: 'md5',
|
|
16
|
+
SHA1: 'sha1',
|
|
17
|
+
SHA256: 'sha256',
|
|
18
|
+
SHA512: 'sha512',
|
|
19
|
+
BLAKE3: 'blake3'
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Hash options
|
|
24
|
+
* @typedef {Object} HashOptions
|
|
25
|
+
* @property {string} [algorithm] - Hash algorithm to use
|
|
26
|
+
* @property {number} [chunkSize] - Chunk size for streaming (bytes)
|
|
27
|
+
* @property {boolean} [includeMetadata] - Include file metadata in hash
|
|
28
|
+
* @property {Function} [onProgress] - Progress callback function
|
|
29
|
+
* @property {number} [maxFileSize] - Maximum file size to hash (bytes)
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Hash result
|
|
34
|
+
* @typedef {Object} HashResult
|
|
35
|
+
* @property {string} hash - Calculated hash
|
|
36
|
+
* @property {string} algorithm - Algorithm used
|
|
37
|
+
* @property {number} size - File size in bytes
|
|
38
|
+
* @property {number} [timeMs] - Time taken in milliseconds
|
|
39
|
+
* @property {string} [error] - Error message if failed
|
|
40
|
+
*/
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Calculate file hash using streaming
|
|
44
|
+
* @param {string} filePath - Path to file
|
|
45
|
+
* @param {HashOptions} options - Hash options
|
|
46
|
+
* @returns {Promise<HashResult>} - Hash result
|
|
47
|
+
*/
|
|
48
|
+
async function calculateFileHash(filePath, options = {}) {
|
|
49
|
+
const {
|
|
50
|
+
algorithm = HASH_ALGORITHMS.BLAKE3,
|
|
51
|
+
chunkSize = 64 * 1024, // 64KB chunks
|
|
52
|
+
includeMetadata = false,
|
|
53
|
+
onProgress = null,
|
|
54
|
+
maxFileSize = Infinity
|
|
55
|
+
} = options;
|
|
56
|
+
|
|
57
|
+
const startTime = Date.now();
|
|
58
|
+
const result = {
|
|
59
|
+
hash: null,
|
|
60
|
+
algorithm,
|
|
61
|
+
size: 0,
|
|
62
|
+
timeMs: 0,
|
|
63
|
+
error: null
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
// Check if file exists
|
|
68
|
+
if (!await pathExists(filePath)) {
|
|
69
|
+
throw new Error(`File does not exist: ${filePath}`);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Get file size
|
|
73
|
+
result.size = await getFileSize(filePath);
|
|
74
|
+
|
|
75
|
+
// Check file size limit
|
|
76
|
+
if (result.size > maxFileSize) {
|
|
77
|
+
throw new Error(`File too large: ${result.size} bytes (max: ${maxFileSize})`);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Handle BLAKE3 vs other algorithms
|
|
81
|
+
if (algorithm === HASH_ALGORITHMS.BLAKE3) {
|
|
82
|
+
return await calculateBlake3Hash(filePath, result, includeMetadata, onProgress, startTime);
|
|
83
|
+
} else {
|
|
84
|
+
// Create hash instance for crypto algorithms
|
|
85
|
+
const hash = crypto.createHash(algorithm);
|
|
86
|
+
|
|
87
|
+
// Add metadata to hash if requested
|
|
88
|
+
if (includeMetadata) {
|
|
89
|
+
const stats = fs.statSync(filePath);
|
|
90
|
+
hash.update(JSON.stringify({
|
|
91
|
+
size: stats.size,
|
|
92
|
+
mtime: stats.mtime.getTime(),
|
|
93
|
+
ctime: stats.ctime.getTime(),
|
|
94
|
+
mode: stats.mode
|
|
95
|
+
}));
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Stream file and calculate hash
|
|
99
|
+
const stream = fs.createReadStream(filePath, { highWaterMark: chunkSize });
|
|
100
|
+
let bytesProcessed = 0;
|
|
101
|
+
|
|
102
|
+
return new Promise((resolve, reject) => {
|
|
103
|
+
stream.on('data', (chunk) => {
|
|
104
|
+
hash.update(chunk);
|
|
105
|
+
bytesProcessed += chunk.length;
|
|
106
|
+
|
|
107
|
+
if (onProgress) {
|
|
108
|
+
onProgress({
|
|
109
|
+
bytesProcessed,
|
|
110
|
+
totalBytes: result.size,
|
|
111
|
+
percentage: (bytesProcessed / result.size) * 100
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
stream.on('end', () => {
|
|
117
|
+
result.hash = hash.digest('hex');
|
|
118
|
+
result.timeMs = Date.now() - startTime;
|
|
119
|
+
resolve(result);
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
stream.on('error', (error) => {
|
|
123
|
+
result.error = error.message;
|
|
124
|
+
reject(error);
|
|
125
|
+
});
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
} catch (error) {
|
|
130
|
+
result.error = error.message;
|
|
131
|
+
result.timeMs = Date.now() - startTime;
|
|
132
|
+
throw error;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Calculate BLAKE3 hash for a file
|
|
138
|
+
* @param {string} filePath - Path to file
|
|
139
|
+
* @param {Object} result - Result object to update
|
|
140
|
+
* @param {boolean} includeMetadata - Include file metadata in hash
|
|
141
|
+
* @param {Function} onProgress - Progress callback function
|
|
142
|
+
* @param {number} startTime - Start time for timing
|
|
143
|
+
* @returns {Promise<Object>} - Hash result
|
|
144
|
+
*/
|
|
145
|
+
async function calculateBlake3Hash(filePath, result, includeMetadata, onProgress, startTime) {
|
|
146
|
+
try {
|
|
147
|
+
// Read file as buffer for BLAKE3
|
|
148
|
+
const fileBuffer = await fs.promises.readFile(filePath);
|
|
149
|
+
|
|
150
|
+
// Add metadata to hash if requested
|
|
151
|
+
if (includeMetadata) {
|
|
152
|
+
const stats = fs.statSync(filePath);
|
|
153
|
+
const metadata = JSON.stringify({
|
|
154
|
+
size: stats.size,
|
|
155
|
+
mtime: stats.mtime.getTime(),
|
|
156
|
+
ctime: stats.ctime.getTime(),
|
|
157
|
+
mode: stats.mode
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
// Combine metadata and file content
|
|
161
|
+
const combinedBuffer = Buffer.concat([
|
|
162
|
+
Buffer.from(metadata),
|
|
163
|
+
fileBuffer
|
|
164
|
+
]);
|
|
165
|
+
|
|
166
|
+
result.hash = await blake3(combinedBuffer);
|
|
167
|
+
} else {
|
|
168
|
+
result.hash = await blake3(fileBuffer);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
result.timeMs = Date.now() - startTime;
|
|
172
|
+
return result;
|
|
173
|
+
} catch (error) {
|
|
174
|
+
result.error = error.message;
|
|
175
|
+
result.timeMs = Date.now() - startTime;
|
|
176
|
+
throw error;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Calculate hash for file content only (no metadata)
|
|
182
|
+
* @param {string} filePath - Path to file
|
|
183
|
+
* @param {string} algorithm - Hash algorithm
|
|
184
|
+
* @returns {Promise<string>} - File content hash
|
|
185
|
+
*/
|
|
186
|
+
async function calculateContentHash(filePath, algorithm = HASH_ALGORITHMS.BLAKE3) {
|
|
187
|
+
const result = await calculateFileHash(filePath, { algorithm, includeMetadata: false });
|
|
188
|
+
return result.hash;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
export {
|
|
192
|
+
HASH_ALGORITHMS,
|
|
193
|
+
calculateFileHash,
|
|
194
|
+
calculateContentHash
|
|
195
|
+
};
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
import {
|
|
3
|
+
pathExists,
|
|
4
|
+
createDirectory,
|
|
5
|
+
moveFile,
|
|
6
|
+
getFileStats,
|
|
7
|
+
joinPaths
|
|
8
|
+
} from './fileSystemUtils.js';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Move operation result
|
|
12
|
+
* @typedef {Object} MoveResult
|
|
13
|
+
* @property {boolean} success - Whether operation was successful
|
|
14
|
+
* @property {string} sourcePath - Original source path
|
|
15
|
+
* @property {string} destinationPath - Final destination path
|
|
16
|
+
* @property {string} [error] - Error message if failed
|
|
17
|
+
* @property {number} [size] - File size in bytes
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Move a single file
|
|
22
|
+
* @param {string} sourcePath - Source file path
|
|
23
|
+
* @param {string} destinationPath - Destination file path
|
|
24
|
+
* @param {Object} options - Move options
|
|
25
|
+
* @returns {Promise<MoveResult>} - Move result
|
|
26
|
+
*/
|
|
27
|
+
async function moveSingleFile(sourcePath, destinationPath, options = {}) {
|
|
28
|
+
const {
|
|
29
|
+
overwrite = false,
|
|
30
|
+
dryRun = false,
|
|
31
|
+
onProgress = null,
|
|
32
|
+
onError = null
|
|
33
|
+
} = options;
|
|
34
|
+
|
|
35
|
+
const result = {
|
|
36
|
+
success: false,
|
|
37
|
+
sourcePath,
|
|
38
|
+
destinationPath,
|
|
39
|
+
error: null,
|
|
40
|
+
size: 0
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
try {
|
|
44
|
+
// Check if source exists
|
|
45
|
+
if (!await pathExists(sourcePath)) {
|
|
46
|
+
throw new Error(`Source file does not exist: ${sourcePath}`);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Get file size
|
|
50
|
+
const stats = await getFileStats(sourcePath);
|
|
51
|
+
result.size = stats.size;
|
|
52
|
+
|
|
53
|
+
// Check if destination exists
|
|
54
|
+
if (!overwrite && await pathExists(destinationPath)) {
|
|
55
|
+
throw new Error(`Destination file already exists: ${destinationPath}`);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (dryRun) {
|
|
59
|
+
console.log(`[DRY RUN] Would move: ${sourcePath} -> ${destinationPath}`);
|
|
60
|
+
result.success = true;
|
|
61
|
+
return result;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Ensure destination directory exists
|
|
65
|
+
const destinationDir = path.dirname(destinationPath);
|
|
66
|
+
await createDirectory(destinationDir);
|
|
67
|
+
|
|
68
|
+
// Move the file
|
|
69
|
+
await moveFile(sourcePath, destinationPath, { overwrite });
|
|
70
|
+
|
|
71
|
+
result.success = true;
|
|
72
|
+
|
|
73
|
+
if (onProgress) {
|
|
74
|
+
onProgress(result);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
} catch (error) {
|
|
78
|
+
result.error = error.message;
|
|
79
|
+
result.success = false;
|
|
80
|
+
|
|
81
|
+
if (onError) {
|
|
82
|
+
onError(result);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return result;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Move multiple files
|
|
91
|
+
* @param {Array<{source: string, destination: string}>} filePairs - Array of source/destination pairs
|
|
92
|
+
* @param {Object} options - Move options
|
|
93
|
+
* @returns {Promise<MoveResult[]>} - Array of move results
|
|
94
|
+
*/
|
|
95
|
+
async function moveMultipleFiles(filePairs, options = {}) {
|
|
96
|
+
const results = [];
|
|
97
|
+
|
|
98
|
+
for (const pair of filePairs) {
|
|
99
|
+
const result = await moveSingleFile(pair.source, pair.destination, options);
|
|
100
|
+
results.push(result);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return results;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Move duplicate files to a duplicates directory
|
|
108
|
+
* @param {Array<{path: string, size: number, hash?: string}>} duplicateFiles - Duplicate files info
|
|
109
|
+
* @param {string} duplicatesDir - Directory for duplicates
|
|
110
|
+
* @param {Object} options - Move options
|
|
111
|
+
* @returns {Promise<MoveResult[]>} - Array of move results
|
|
112
|
+
*/
|
|
113
|
+
async function moveDuplicateFiles(duplicateFiles, duplicatesDir, options = {}) {
|
|
114
|
+
const filePairs = [];
|
|
115
|
+
|
|
116
|
+
for (let i = 0; i < duplicateFiles.length; i++) {
|
|
117
|
+
const file = duplicateFiles[i];
|
|
118
|
+
const fileName = path.basename(file.path);
|
|
119
|
+
const nameWithoutExt = path.parse(fileName).name;
|
|
120
|
+
const ext = path.parse(fileName).ext;
|
|
121
|
+
|
|
122
|
+
// Create unique name for duplicate
|
|
123
|
+
let duplicateName = fileName;
|
|
124
|
+
if (i > 0) {
|
|
125
|
+
duplicateName = `${nameWithoutExt}_duplicate_${i}${ext}`;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
filePairs.push({
|
|
129
|
+
source: file.path,
|
|
130
|
+
destination: joinPaths(duplicatesDir, duplicateName)
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
return await moveMultipleFiles(filePairs, options);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Move duplicate files to a duplicates directory preserving relative path structure
|
|
139
|
+
* @param {Array<{path: string, size: number, hash?: string}>} duplicateFiles - Duplicate files info
|
|
140
|
+
* @param {string} duplicatesDir - Directory for duplicates
|
|
141
|
+
* @param {string} sourceRoot - Root directory to calculate relative paths from
|
|
142
|
+
* @param {Object} options - Move options
|
|
143
|
+
* @returns {Promise<MoveResult[]>} - Array of move results
|
|
144
|
+
*/
|
|
145
|
+
async function moveDuplicateFilesWithPathPreservation(duplicateFiles, duplicatesDir, sourceRoot, options = {}) {
|
|
146
|
+
const filePairs = [];
|
|
147
|
+
|
|
148
|
+
for (let i = 0; i < duplicateFiles.length; i++) {
|
|
149
|
+
const file = duplicateFiles[i];
|
|
150
|
+
const fileName = path.basename(file.path);
|
|
151
|
+
const nameWithoutExt = path.parse(fileName).name;
|
|
152
|
+
const ext = path.parse(fileName).ext;
|
|
153
|
+
|
|
154
|
+
// Calculate relative path from source root
|
|
155
|
+
const relativePath = path.relative(sourceRoot, file.path);
|
|
156
|
+
const relativeDir = path.dirname(relativePath);
|
|
157
|
+
|
|
158
|
+
// Create unique name for duplicate
|
|
159
|
+
let duplicateName = fileName;
|
|
160
|
+
if (i > 0) {
|
|
161
|
+
duplicateName = `${nameWithoutExt}_duplicate_${i}${ext}`;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Preserve directory structure in duplicates folder
|
|
165
|
+
const destinationDir = joinPaths(duplicatesDir, relativeDir);
|
|
166
|
+
const destinationPath = joinPaths(destinationDir, duplicateName);
|
|
167
|
+
|
|
168
|
+
filePairs.push({
|
|
169
|
+
source: file.path,
|
|
170
|
+
destination: destinationPath
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return await moveMultipleFiles(filePairs, options);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
export {
|
|
178
|
+
moveDuplicateFiles,
|
|
179
|
+
moveDuplicateFilesWithPathPreservation
|
|
180
|
+
};
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import { promises as fs } from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import recursive from 'recursive-readdir';
|
|
4
|
+
import {
|
|
5
|
+
pathExists,
|
|
6
|
+
isDirectory,
|
|
7
|
+
getFileExtension,
|
|
8
|
+
normalizePath
|
|
9
|
+
} from './fileSystemUtils.js';
|
|
10
|
+
|
|
11
|
+
async function scanDirectory(dirPath, options = {}) {
|
|
12
|
+
if (!(await pathExists(dirPath))) {
|
|
13
|
+
throw new Error(`Directory does not exist: ${dirPath}`);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
if (!(await isDirectory(dirPath))) {
|
|
17
|
+
throw new Error(`Path is not a directory: ${dirPath}`);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
try {
|
|
21
|
+
const filePaths = await new Promise((resolve, reject) => {
|
|
22
|
+
recursive(dirPath, (err, files) => {
|
|
23
|
+
if (err) reject(err);
|
|
24
|
+
else resolve(files);
|
|
25
|
+
});
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
// OPTIMIZATION: Batch stat() operations for better performance
|
|
29
|
+
const files = await getBatchFileInfo(filePaths, options);
|
|
30
|
+
|
|
31
|
+
return files;
|
|
32
|
+
} catch (error) {
|
|
33
|
+
console.warn(`Warning: Error reading directory ${dirPath}: ${error.message}`);
|
|
34
|
+
return [];
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
async function getBatchFileInfo(filePaths, options = {}) {
|
|
39
|
+
// OPTIMIZATION: Batch all stat() operations for parallel I/O
|
|
40
|
+
const statPromises = filePaths.map(async (filePath) => {
|
|
41
|
+
try {
|
|
42
|
+
const normalizedPath = normalizePath(filePath);
|
|
43
|
+
const stats = await fs.stat(normalizedPath);
|
|
44
|
+
return { filePath: normalizedPath, stats, error: null };
|
|
45
|
+
} catch (error) {
|
|
46
|
+
return { filePath, stats: null, error };
|
|
47
|
+
}
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
const statResults = await Promise.all(statPromises);
|
|
51
|
+
const files = [];
|
|
52
|
+
|
|
53
|
+
for (const result of statResults) {
|
|
54
|
+
if (result.error) {
|
|
55
|
+
console.warn(`Warning: Could not process file ${result.filePath}: ${result.error.message}`);
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const absolutePath = path.resolve(result.filePath);
|
|
60
|
+
const extension = getFileExtension(result.filePath);
|
|
61
|
+
|
|
62
|
+
const fileInfo = {
|
|
63
|
+
path: absolutePath,
|
|
64
|
+
name: path.basename(result.filePath),
|
|
65
|
+
extension: extension,
|
|
66
|
+
size: result.stats.size,
|
|
67
|
+
hash: null, // Will be calculated separately if needed
|
|
68
|
+
active: true, // File is active by default
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
if (shouldIncludeFile(fileInfo, options)) {
|
|
72
|
+
files.push(fileInfo);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return files;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
async function getFileInfo(filePath, options = {}) {
|
|
80
|
+
const normalizedPath = normalizePath(filePath);
|
|
81
|
+
const absolutePath = path.resolve(normalizedPath);
|
|
82
|
+
const stats = await fs.stat(normalizedPath);
|
|
83
|
+
const extension = getFileExtension(normalizedPath);
|
|
84
|
+
|
|
85
|
+
return {
|
|
86
|
+
path: absolutePath,
|
|
87
|
+
name: path.basename(normalizedPath),
|
|
88
|
+
extension: extension,
|
|
89
|
+
size: stats.size,
|
|
90
|
+
hash: null, // Will be calculated separately if needed
|
|
91
|
+
active: true, // File is active by default
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function shouldIncludeFile(fileInfo, options) {
|
|
96
|
+
const {
|
|
97
|
+
extensions = null
|
|
98
|
+
} = options;
|
|
99
|
+
|
|
100
|
+
if (extensions && extensions.length > 0) {
|
|
101
|
+
if (!extensions.includes(fileInfo.extension)) {
|
|
102
|
+
return false;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return true;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
async function scanDirectories(options = {}) {
|
|
110
|
+
const { targets = [] } = options;
|
|
111
|
+
const allFiles = [];
|
|
112
|
+
|
|
113
|
+
for (const dirPath of targets) {
|
|
114
|
+
try {
|
|
115
|
+
const files = await scanDirectory(dirPath, options);
|
|
116
|
+
allFiles.push(...files);
|
|
117
|
+
} catch (error) {
|
|
118
|
+
console.warn(`Warning: Failed to scan directory ${dirPath}: ${error.message}`);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return allFiles;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
export {
|
|
126
|
+
scanDirectory,
|
|
127
|
+
scanDirectories
|
|
128
|
+
};
|