@tb.p/dd 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/controllers/getExtensionsController.js +27 -0
- package/controllers/newController.js +72 -0
- package/controllers/resumeController.js +233 -0
- package/database/README.md +262 -0
- package/database/dbConnection.js +314 -0
- package/database/dbOperations.js +332 -0
- package/database/dbUtils.js +125 -0
- package/database/dbValidator.js +325 -0
- package/database/index.js +102 -0
- package/index.js +75 -0
- package/package.json +32 -0
- package/processors/optionETL.js +82 -0
- package/utils/README.md +261 -0
- package/utils/candidateDetection.js +541 -0
- package/utils/duplicateMover.js +140 -0
- package/utils/duplicateReporter.js +91 -0
- package/utils/fileHasher.js +195 -0
- package/utils/fileMover.js +180 -0
- package/utils/fileScanner.js +128 -0
- package/utils/fileSystemUtils.js +192 -0
- package/utils/index.js +5 -0
- package/validators/optionValidator.js +103 -0
|
@@ -0,0 +1,541 @@
|
|
|
1
|
+
import { calculateContentHash } from './fileHasher.js';
|
|
2
|
+
import { pathExists, getFileSize } from './fileSystemUtils.js';
|
|
3
|
+
import { createConnection } from '../database/dbConnection.js';
|
|
4
|
+
import { createOperations } from '../database/dbOperations.js';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Candidate Detection for @tb.p/dd
|
|
8
|
+
* Groups files by size and hashes candidates for duplicate detection
|
|
9
|
+
*/
|
|
10
|
+
class CandidateDetection {
|
|
11
|
+
constructor(dbOperations, options = {}) {
|
|
12
|
+
this.db = dbOperations;
|
|
13
|
+
this.options = {
|
|
14
|
+
hashAlgorithm: 'blake3',
|
|
15
|
+
batchSize: 100,
|
|
16
|
+
maxConcurrency: 5,
|
|
17
|
+
verbose: false,
|
|
18
|
+
onProgress: null,
|
|
19
|
+
...options
|
|
20
|
+
};
|
|
21
|
+
this.stats = {
|
|
22
|
+
totalFiles: 0,
|
|
23
|
+
uniqueFiles: 0,
|
|
24
|
+
candidatesFound: 0,
|
|
25
|
+
hashedFiles: 0,
|
|
26
|
+
skippedFiles: 0,
|
|
27
|
+
errors: 0,
|
|
28
|
+
startTime: null,
|
|
29
|
+
endTime: null
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Detect candidates and process them
|
|
35
|
+
*/
|
|
36
|
+
async detectAndProcessCandidates(options = {}) {
|
|
37
|
+
const config = { ...this.options, ...options };
|
|
38
|
+
|
|
39
|
+
if (config.verbose) {
|
|
40
|
+
console.log('đ Starting candidate detection and processing...');
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
this.stats.startTime = Date.now();
|
|
44
|
+
|
|
45
|
+
try {
|
|
46
|
+
// Find candidates
|
|
47
|
+
const detectionResult = await this.findCandidates();
|
|
48
|
+
|
|
49
|
+
if (!detectionResult.success) {
|
|
50
|
+
return {
|
|
51
|
+
success: false,
|
|
52
|
+
error: `Candidate detection failed: ${detectionResult.error}`,
|
|
53
|
+
stats: this.stats
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (config.verbose) {
|
|
58
|
+
console.log(`đ Found ${detectionResult.totalCandidates} candidates in ${detectionResult.candidateGroups} size groups`);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Process candidates if any found
|
|
62
|
+
if (detectionResult.totalCandidates > 0) {
|
|
63
|
+
const processResult = await this.processCandidates();
|
|
64
|
+
|
|
65
|
+
if (!processResult.success) {
|
|
66
|
+
return {
|
|
67
|
+
success: false,
|
|
68
|
+
error: `Candidate processing failed: ${processResult.error}`,
|
|
69
|
+
stats: this.stats
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (config.verbose) {
|
|
74
|
+
console.log(`â
Processed ${processResult.hashedFiles} files successfully`);
|
|
75
|
+
if (processResult.skippedFiles > 0) {
|
|
76
|
+
console.log(`âī¸ ${processResult.skippedFiles} files already had hashes, skipped`);
|
|
77
|
+
}
|
|
78
|
+
if (processResult.errors > 0) {
|
|
79
|
+
console.log(`â ī¸ ${processResult.errors} files had errors during processing`);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
success: true,
|
|
85
|
+
detection: detectionResult,
|
|
86
|
+
processing: processResult,
|
|
87
|
+
stats: this.stats
|
|
88
|
+
};
|
|
89
|
+
} else {
|
|
90
|
+
if (config.verbose) {
|
|
91
|
+
console.log('âšī¸ No candidates found for processing');
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return {
|
|
95
|
+
success: true,
|
|
96
|
+
detection: detectionResult,
|
|
97
|
+
processing: { hashedFiles: 0, errors: 0 },
|
|
98
|
+
stats: this.stats
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
} catch (error) {
|
|
103
|
+
return {
|
|
104
|
+
success: false,
|
|
105
|
+
error: error.message,
|
|
106
|
+
stats: this.stats
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Find all files with matching sizes (candidates for hashing)
|
|
113
|
+
*/
|
|
114
|
+
async findCandidates() {
|
|
115
|
+
this.stats.totalFiles = 0;
|
|
116
|
+
this.stats.uniqueFiles = 0;
|
|
117
|
+
this.stats.candidatesFound = 0;
|
|
118
|
+
|
|
119
|
+
try {
|
|
120
|
+
// First check if there are any unhashed active files at all
|
|
121
|
+
const totalUnhashed = await this.db.db.queryOne('SELECT COUNT(*) as total FROM copies WHERE file_hash IS NULL AND active = 1');
|
|
122
|
+
|
|
123
|
+
if (totalUnhashed.total === 0) {
|
|
124
|
+
console.log('âšī¸ No unhashed active files found - all active files already processed');
|
|
125
|
+
this.stats.endTime = Date.now();
|
|
126
|
+
return {
|
|
127
|
+
success: true,
|
|
128
|
+
stats: this.stats,
|
|
129
|
+
candidateGroups: 0,
|
|
130
|
+
totalCandidates: 0,
|
|
131
|
+
message: 'No unhashed active files found'
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Get all files grouped by size
|
|
136
|
+
const sizeGroups = await this.getFilesBySize();
|
|
137
|
+
|
|
138
|
+
// Filter groups that have more than one file (candidates)
|
|
139
|
+
const candidateGroups = sizeGroups.filter(group => group.count > 1);
|
|
140
|
+
|
|
141
|
+
this.stats.candidatesFound = candidateGroups.reduce((total, group) => total + group.count, 0);
|
|
142
|
+
this.stats.totalFiles = sizeGroups.reduce((total, group) => total + group.count, 0);
|
|
143
|
+
this.stats.uniqueFiles = sizeGroups.length;
|
|
144
|
+
|
|
145
|
+
if (this.options.onProgress) {
|
|
146
|
+
this.options.onProgress({
|
|
147
|
+
phase: 'candidate_detection',
|
|
148
|
+
candidatesFound: this.stats.candidatesFound,
|
|
149
|
+
totalFiles: this.stats.totalFiles,
|
|
150
|
+
candidateGroups: candidateGroups.length
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return {
|
|
155
|
+
success: true,
|
|
156
|
+
stats: this.stats,
|
|
157
|
+
candidateGroups: candidateGroups.length,
|
|
158
|
+
totalCandidates: this.stats.candidatesFound
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
} catch (error) {
|
|
162
|
+
this.stats.endTime = Date.now();
|
|
163
|
+
return {
|
|
164
|
+
success: false,
|
|
165
|
+
error: error.message,
|
|
166
|
+
stats: this.stats
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Process candidates by calculating hashes and updating database
|
|
173
|
+
*/
|
|
174
|
+
async processCandidates() {
|
|
175
|
+
try {
|
|
176
|
+
// First check if there are any unhashed files at all
|
|
177
|
+
const totalUnhashed = await this.db.db.queryOne('SELECT COUNT(*) as total FROM copies WHERE file_hash IS NULL AND active = 1');
|
|
178
|
+
|
|
179
|
+
if (totalUnhashed.total === 0) {
|
|
180
|
+
console.log('âšī¸ No unhashed active files found - all active files already processed');
|
|
181
|
+
this.stats.endTime = Date.now();
|
|
182
|
+
return {
|
|
183
|
+
success: true,
|
|
184
|
+
message: 'No unhashed active files found - all active files already processed',
|
|
185
|
+
stats: this.stats,
|
|
186
|
+
hashedFiles: 0,
|
|
187
|
+
errors: 0
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Get all files that need hashing (candidates with matching sizes)
|
|
192
|
+
const candidates = await this.getCandidatesForHashing();
|
|
193
|
+
|
|
194
|
+
if (candidates.length === 0) {
|
|
195
|
+
return {
|
|
196
|
+
success: true,
|
|
197
|
+
message: 'No candidates found for hashing',
|
|
198
|
+
stats: this.stats,
|
|
199
|
+
hashedFiles: 0,
|
|
200
|
+
errors: 0
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Process candidates in batches
|
|
205
|
+
const results = await this.processCandidatesInBatches(candidates);
|
|
206
|
+
|
|
207
|
+
this.stats.endTime = Date.now();
|
|
208
|
+
this.stats.hashedFiles = results.hashedFiles;
|
|
209
|
+
this.stats.skippedFiles = results.skippedFiles;
|
|
210
|
+
this.stats.errors = results.errors;
|
|
211
|
+
|
|
212
|
+
return {
|
|
213
|
+
success: true,
|
|
214
|
+
stats: this.stats,
|
|
215
|
+
hashedFiles: results.hashedFiles,
|
|
216
|
+
skippedFiles: results.skippedFiles,
|
|
217
|
+
errors: results.errors,
|
|
218
|
+
processingTime: this.stats.endTime - this.stats.startTime
|
|
219
|
+
};
|
|
220
|
+
|
|
221
|
+
} catch (error) {
|
|
222
|
+
this.stats.endTime = Date.now();
|
|
223
|
+
return {
|
|
224
|
+
success: false,
|
|
225
|
+
error: error.message,
|
|
226
|
+
stats: this.stats
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* Get files grouped by size from database
|
|
233
|
+
*/
|
|
234
|
+
async getFilesBySize() {
|
|
235
|
+
const sql = `
|
|
236
|
+
SELECT
|
|
237
|
+
file_size,
|
|
238
|
+
COUNT(id) as count,
|
|
239
|
+
GROUP_CONCAT(id, ',') as file_ids
|
|
240
|
+
FROM copies
|
|
241
|
+
WHERE file_hash IS NULL AND active = 1
|
|
242
|
+
GROUP BY file_size
|
|
243
|
+
ORDER BY file_size DESC
|
|
244
|
+
`;
|
|
245
|
+
|
|
246
|
+
const results = await this.db.db.query(sql);
|
|
247
|
+
|
|
248
|
+
return results.map(row => {
|
|
249
|
+
const count = parseInt(row.count) || 0;
|
|
250
|
+
|
|
251
|
+
// Debug logging for negative counts
|
|
252
|
+
if (count < 0) {
|
|
253
|
+
console.error('Negative count detected in getFilesBySize:');
|
|
254
|
+
console.error(' Row data:', row);
|
|
255
|
+
console.error(' Parsed count:', count);
|
|
256
|
+
console.error(' Raw count value:', row.count);
|
|
257
|
+
console.error(' Type of count:', typeof row.count);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return {
|
|
261
|
+
size: row.file_size,
|
|
262
|
+
count: Math.max(0, count), // Ensure count is never negative
|
|
263
|
+
fileIds: row.file_ids ? row.file_ids.split(',').map(id => parseInt(id)) : []
|
|
264
|
+
};
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Get candidate files that need hashing (files with matching sizes)
|
|
270
|
+
*/
|
|
271
|
+
async getCandidatesForHashing() {
|
|
272
|
+
const sql = `
|
|
273
|
+
SELECT c1.*
|
|
274
|
+
FROM copies c1
|
|
275
|
+
WHERE c1.file_hash IS NULL
|
|
276
|
+
AND EXISTS (
|
|
277
|
+
SELECT 1
|
|
278
|
+
FROM copies c2
|
|
279
|
+
WHERE c2.file_size = c1.file_size
|
|
280
|
+
AND c2.id != c1.id
|
|
281
|
+
AND c2.file_hash IS NULL
|
|
282
|
+
)
|
|
283
|
+
ORDER BY c1.file_size DESC, c1.file_path
|
|
284
|
+
`;
|
|
285
|
+
|
|
286
|
+
return await this.db.db.query(sql);
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Process candidates in batches with controlled concurrency
|
|
291
|
+
*/
|
|
292
|
+
async processCandidatesInBatches(candidates) {
|
|
293
|
+
const results = {
|
|
294
|
+
hashedFiles: 0,
|
|
295
|
+
skippedFiles: 0,
|
|
296
|
+
errors: 0
|
|
297
|
+
};
|
|
298
|
+
|
|
299
|
+
// Process in batches
|
|
300
|
+
for (let i = 0; i < candidates.length; i += this.options.batchSize) {
|
|
301
|
+
const batch = candidates.slice(i, i + this.options.batchSize);
|
|
302
|
+
|
|
303
|
+
// Process batch with controlled concurrency
|
|
304
|
+
const batchResults = await this.processBatch(batch);
|
|
305
|
+
|
|
306
|
+
results.hashedFiles += batchResults.hashedFiles;
|
|
307
|
+
results.skippedFiles += batchResults.skippedFiles;
|
|
308
|
+
results.errors += batchResults.errors;
|
|
309
|
+
|
|
310
|
+
// Report progress
|
|
311
|
+
if (this.options.onProgress) {
|
|
312
|
+
this.options.onProgress({
|
|
313
|
+
phase: 'hashing',
|
|
314
|
+
processed: i + batch.length,
|
|
315
|
+
total: candidates.length,
|
|
316
|
+
hashedFiles: results.hashedFiles,
|
|
317
|
+
errors: results.errors,
|
|
318
|
+
percentage: ((i + batch.length) / candidates.length) * 100
|
|
319
|
+
});
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
return results;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
/**
|
|
327
|
+
* Process a batch of candidates with controlled concurrency
|
|
328
|
+
*/
|
|
329
|
+
async processBatch(batch) {
|
|
330
|
+
const results = {
|
|
331
|
+
hashedFiles: 0,
|
|
332
|
+
skippedFiles: 0,
|
|
333
|
+
errors: 0
|
|
334
|
+
};
|
|
335
|
+
|
|
336
|
+
// Process with controlled concurrency
|
|
337
|
+
const chunks = [];
|
|
338
|
+
for (let i = 0; i < batch.length; i += this.options.maxConcurrency) {
|
|
339
|
+
chunks.push(batch.slice(i, i + this.options.maxConcurrency));
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
for (const chunk of chunks) {
|
|
343
|
+
const chunkPromises = chunk.map(candidate => this.processCandidate(candidate));
|
|
344
|
+
const chunkResults = await Promise.all(chunkPromises);
|
|
345
|
+
|
|
346
|
+
chunkResults.forEach(result => {
|
|
347
|
+
if (result.success) {
|
|
348
|
+
if (result.skipped) {
|
|
349
|
+
results.skippedFiles++;
|
|
350
|
+
} else {
|
|
351
|
+
results.hashedFiles++;
|
|
352
|
+
}
|
|
353
|
+
} else {
|
|
354
|
+
results.errors++;
|
|
355
|
+
}
|
|
356
|
+
});
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
return results;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
/**
|
|
363
|
+
* Process a single candidate file
|
|
364
|
+
*/
|
|
365
|
+
async processCandidate(candidate) {
|
|
366
|
+
try {
|
|
367
|
+
// Report current file being hashed
|
|
368
|
+
if (this.options.onProgress) {
|
|
369
|
+
this.options.onProgress({
|
|
370
|
+
phase: 'hashing',
|
|
371
|
+
currentFile: candidate.file_path
|
|
372
|
+
});
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// Skip if file already has a hash
|
|
376
|
+
if (candidate.file_hash !== null && candidate.file_hash !== undefined) {
|
|
377
|
+
return {
|
|
378
|
+
success: true,
|
|
379
|
+
candidateId: candidate.id,
|
|
380
|
+
filePath: candidate.file_path,
|
|
381
|
+
message: 'File already hashed, skipping',
|
|
382
|
+
skipped: true
|
|
383
|
+
};
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// Verify file still exists
|
|
387
|
+
if (!(await pathExists(candidate.file_path))) {
|
|
388
|
+
return {
|
|
389
|
+
success: false,
|
|
390
|
+
error: `File not found: ${candidate.file_path}`,
|
|
391
|
+
candidateId: candidate.id
|
|
392
|
+
};
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// Verify file size hasn't changed
|
|
396
|
+
const currentSize = await getFileSize(candidate.file_path);
|
|
397
|
+
if (currentSize !== candidate.file_size) {
|
|
398
|
+
return {
|
|
399
|
+
success: false,
|
|
400
|
+
error: `File size changed: ${candidate.file_path} (expected: ${candidate.file_size}, actual: ${currentSize})`,
|
|
401
|
+
candidateId: candidate.id
|
|
402
|
+
};
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
// Calculate hash
|
|
406
|
+
const hash = await calculateContentHash(candidate.file_path, this.options.hashAlgorithm);
|
|
407
|
+
|
|
408
|
+
// Update database
|
|
409
|
+
const updateResult = await this.db.updateFileHash(candidate.id, hash);
|
|
410
|
+
|
|
411
|
+
if (!updateResult.success) {
|
|
412
|
+
return {
|
|
413
|
+
success: false,
|
|
414
|
+
error: `Database update failed: ${updateResult.error}`,
|
|
415
|
+
candidateId: candidate.id
|
|
416
|
+
};
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
return {
|
|
420
|
+
success: true,
|
|
421
|
+
candidateId: candidate.id,
|
|
422
|
+
filePath: candidate.file_path,
|
|
423
|
+
hash: hash,
|
|
424
|
+
size: candidate.file_size
|
|
425
|
+
};
|
|
426
|
+
|
|
427
|
+
} catch (error) {
|
|
428
|
+
return {
|
|
429
|
+
success: false,
|
|
430
|
+
error: error.message,
|
|
431
|
+
candidateId: candidate.id
|
|
432
|
+
};
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
/**
|
|
437
|
+
* Get detailed candidate information
|
|
438
|
+
*/
|
|
439
|
+
async getCandidateDetails() {
|
|
440
|
+
try {
|
|
441
|
+
const sizeGroups = await this.getFilesBySize();
|
|
442
|
+
const candidateGroups = sizeGroups.filter(group => group.count > 1);
|
|
443
|
+
|
|
444
|
+
const details = {
|
|
445
|
+
totalSizeGroups: sizeGroups.length,
|
|
446
|
+
candidateGroups: candidateGroups.length,
|
|
447
|
+
totalCandidates: candidateGroups.reduce((total, group) => total + group.count, 0),
|
|
448
|
+
sizeDistribution: candidateGroups.map(group => ({
|
|
449
|
+
size: group.size,
|
|
450
|
+
count: group.count,
|
|
451
|
+
sizeFormatted: this.formatFileSize(group.size)
|
|
452
|
+
})).sort((a, b) => b.size - a.size)
|
|
453
|
+
};
|
|
454
|
+
|
|
455
|
+
return {
|
|
456
|
+
success: true,
|
|
457
|
+
details
|
|
458
|
+
};
|
|
459
|
+
|
|
460
|
+
} catch (error) {
|
|
461
|
+
return {
|
|
462
|
+
success: false,
|
|
463
|
+
error: error.message
|
|
464
|
+
};
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
/**
|
|
469
|
+
* Format file size in human readable format
|
|
470
|
+
*/
|
|
471
|
+
formatFileSize(bytes) {
|
|
472
|
+
const units = ['B', 'KB', 'MB', 'GB', 'TB'];
|
|
473
|
+
let size = bytes;
|
|
474
|
+
let unitIndex = 0;
|
|
475
|
+
|
|
476
|
+
while (size >= 1024 && unitIndex < units.length - 1) {
|
|
477
|
+
size /= 1024;
|
|
478
|
+
unitIndex++;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
return `${size.toFixed(2)} ${units[unitIndex]}`;
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
/**
|
|
486
|
+
* Run candidate detection with database connection
|
|
487
|
+
*/
|
|
488
|
+
export async function runCandidateDetection(options) {
|
|
489
|
+
const databasePath = options.saveDb || options.resumeDb || options.database;
|
|
490
|
+
|
|
491
|
+
if (!databasePath) {
|
|
492
|
+
console.log('âšī¸ No database path available for candidate detection');
|
|
493
|
+
return;
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
let db;
|
|
497
|
+
try {
|
|
498
|
+
console.log('\nđ Starting candidate detection...');
|
|
499
|
+
|
|
500
|
+
db = createConnection(databasePath);
|
|
501
|
+
await db.connect();
|
|
502
|
+
|
|
503
|
+
const dbOps = createOperations(db);
|
|
504
|
+
|
|
505
|
+
const detector = new CandidateDetection(dbOps, {
|
|
506
|
+
hashAlgorithm: options.hashAlgorithm || 'blake3',
|
|
507
|
+
batchSize: parseInt(options.batchSize) || 100,
|
|
508
|
+
maxConcurrency: parseInt(options.maxConcurrency) || 5,
|
|
509
|
+
verbose: options.verbose || false
|
|
510
|
+
});
|
|
511
|
+
|
|
512
|
+
const result = await detector.detectAndProcessCandidates();
|
|
513
|
+
|
|
514
|
+
if (result.success) {
|
|
515
|
+
console.log('\nâ
Candidate detection completed successfully!');
|
|
516
|
+
console.log(`đ Statistics:`);
|
|
517
|
+
console.log(` - Total files: ${result.stats.totalFiles}`);
|
|
518
|
+
console.log(` - Unique files: ${result.stats.uniqueFiles}`);
|
|
519
|
+
console.log(` - Candidates found: ${result.stats.candidatesFound}`);
|
|
520
|
+
console.log(` - Files hashed: ${result.stats.hashedFiles}`);
|
|
521
|
+
console.log(` - Files skipped: ${result.stats.skippedFiles}`);
|
|
522
|
+
console.log(` - Errors: ${result.stats.errors}`);
|
|
523
|
+
if (result.stats.processingTime) {
|
|
524
|
+
console.log(` - Processing time: ${(result.stats.processingTime / 1000).toFixed(2)}s`);
|
|
525
|
+
}
|
|
526
|
+
} else {
|
|
527
|
+
console.error(`â Candidate detection failed: ${result.error}`);
|
|
528
|
+
console.log('Continuing without candidate detection...');
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
await db.close();
|
|
532
|
+
} catch (error) {
|
|
533
|
+
console.error(`â Candidate detection error: ${error.message}`);
|
|
534
|
+
console.log('Continuing without candidate detection...');
|
|
535
|
+
if (db) {
|
|
536
|
+
await db.close();
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
export { CandidateDetection };
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
import { createDatabaseManager } from '../database/index.js';
|
|
3
|
+
import { moveDuplicateFiles, moveDuplicateFilesWithPathPreservation } from './fileMover.js';
|
|
4
|
+
import { createDirectory } from './fileSystemUtils.js';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Move duplicated files to a duplicates directory
|
|
8
|
+
* @param {Object} options - Processed options containing database path and preservePaths setting
|
|
9
|
+
* @returns {Promise<void>}
|
|
10
|
+
*/
|
|
11
|
+
async function moveDuplicates(options) {
|
|
12
|
+
const preservePaths = options.preservePaths !== false;
|
|
13
|
+
// Determine database path
|
|
14
|
+
const dbPath = options.saveDb || options.resumeDb;
|
|
15
|
+
if (!dbPath) {
|
|
16
|
+
console.error('Error: No database path available for move operation');
|
|
17
|
+
return;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
console.log('Starting duplicate file move operation...');
|
|
21
|
+
console.log(`Path preservation: ${preservePaths ? 'enabled' : 'disabled'}`);
|
|
22
|
+
|
|
23
|
+
let dbManager;
|
|
24
|
+
try {
|
|
25
|
+
// Initialize database connection
|
|
26
|
+
dbManager = createDatabaseManager(dbPath);
|
|
27
|
+
await dbManager.initialize();
|
|
28
|
+
|
|
29
|
+
// Get duplicate groups from database
|
|
30
|
+
const duplicateGroups = await dbManager.getOperations().getDuplicateGroups();
|
|
31
|
+
|
|
32
|
+
if (duplicateGroups.length === 0) {
|
|
33
|
+
console.log('No duplicate files found to move.');
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
console.log(`Found ${duplicateGroups.length} duplicate groups`);
|
|
38
|
+
|
|
39
|
+
// Create duplicates directories in each target
|
|
40
|
+
const duplicatesDirs = {};
|
|
41
|
+
for (const target of options.targets) {
|
|
42
|
+
const duplicatesDir = path.join(target, '!@duplicates');
|
|
43
|
+
await createDirectory(duplicatesDir);
|
|
44
|
+
duplicatesDirs[target] = duplicatesDir;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
let totalMoved = 0;
|
|
48
|
+
let totalErrors = 0;
|
|
49
|
+
|
|
50
|
+
// Process each duplicate group
|
|
51
|
+
for (const group of duplicateGroups) {
|
|
52
|
+
const filePaths = group.file_paths.split(';');
|
|
53
|
+
const duplicateFiles = filePaths.map(filePath => ({
|
|
54
|
+
path: filePath,
|
|
55
|
+
size: group.file_size,
|
|
56
|
+
hash: group.file_hash
|
|
57
|
+
}));
|
|
58
|
+
|
|
59
|
+
// Sort by priority (keep the first file, move the rest)
|
|
60
|
+
// The first file in the group should be kept, others moved
|
|
61
|
+
const filesToMove = duplicateFiles.slice(1); // Skip the first file (keep it)
|
|
62
|
+
|
|
63
|
+
if (filesToMove.length > 0) {
|
|
64
|
+
// Group files by their target directory
|
|
65
|
+
const filesByTarget = {};
|
|
66
|
+
for (const file of filesToMove) {
|
|
67
|
+
const fileDir = path.dirname(file.path);
|
|
68
|
+
// Find which target directory this file belongs to
|
|
69
|
+
let targetDir = null;
|
|
70
|
+
for (const target of options.targets) {
|
|
71
|
+
// Resolve both paths to absolute for proper comparison
|
|
72
|
+
const absoluteTarget = path.resolve(target);
|
|
73
|
+
if (fileDir.startsWith(absoluteTarget)) {
|
|
74
|
+
targetDir = target;
|
|
75
|
+
break;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (!targetDir) {
|
|
80
|
+
// If file doesn't belong to any target, use the first target
|
|
81
|
+
targetDir = options.targets[0];
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (!filesByTarget[targetDir]) {
|
|
85
|
+
filesByTarget[targetDir] = [];
|
|
86
|
+
}
|
|
87
|
+
filesByTarget[targetDir].push(file);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Move files to their respective target duplicates directories
|
|
91
|
+
for (const [targetDir, files] of Object.entries(filesByTarget)) {
|
|
92
|
+
try {
|
|
93
|
+
const moveOptions = {
|
|
94
|
+
dryRun: false,
|
|
95
|
+
overwrite: false,
|
|
96
|
+
onProgress: (progress) => {
|
|
97
|
+
if (options.verbose) {
|
|
98
|
+
console.log(`Moving: ${progress.source} -> ${progress.destination}`);
|
|
99
|
+
}
|
|
100
|
+
},
|
|
101
|
+
onError: (error, result) => {
|
|
102
|
+
console.error(`Error moving file ${result.sourcePath}: ${error.message}`);
|
|
103
|
+
totalErrors++;
|
|
104
|
+
}
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
const moveResults = preservePaths
|
|
108
|
+
? await moveDuplicateFilesWithPathPreservation(files, duplicatesDirs[targetDir], targetDir, moveOptions)
|
|
109
|
+
: await moveDuplicateFiles(files, duplicatesDirs[targetDir], moveOptions);
|
|
110
|
+
|
|
111
|
+
const successfulMoves = moveResults.filter(result => result.success);
|
|
112
|
+
totalMoved += successfulMoves.length;
|
|
113
|
+
totalErrors += moveResults.length - successfulMoves.length;
|
|
114
|
+
|
|
115
|
+
} catch (error) {
|
|
116
|
+
console.error(`Error processing duplicate group for target ${targetDir}: ${error.message}`);
|
|
117
|
+
totalErrors += files.length;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
console.log(`Move operation completed:`);
|
|
124
|
+
console.log(` - Files moved: ${totalMoved}`);
|
|
125
|
+
console.log(` - Errors: ${totalErrors}`);
|
|
126
|
+
console.log(` - Duplicates directories created:`);
|
|
127
|
+
for (const [target, duplicatesDir] of Object.entries(duplicatesDirs)) {
|
|
128
|
+
console.log(` - ${target}/!@duplicates`);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
} catch (error) {
|
|
132
|
+
console.error(`Error during move operation: ${error.message}`);
|
|
133
|
+
} finally {
|
|
134
|
+
if (dbManager) {
|
|
135
|
+
await dbManager.close();
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
export { moveDuplicates };
|