@tb.p/dd 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,125 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import { createConnection } from './dbConnection.js';
4
+ import { createValidator } from './dbValidator.js';
5
+ import { createOperations } from './dbOperations.js';
6
+
7
+ class DatabaseUtils {
8
+ constructor(dbPath) {
9
+ this.dbPath = dbPath;
10
+ this.connection = null;
11
+ this.validator = null;
12
+ this.operations = null;
13
+ }
14
+
15
+ async initialize() {
16
+ this.connection = createConnection(this.dbPath);
17
+ await this.connection.connect();
18
+ this.validator = createValidator(this.connection);
19
+ this.operations = createOperations(this.connection);
20
+ }
21
+
22
+ async close() {
23
+ if (this.connection) {
24
+ await this.connection.close();
25
+ this.connection = null;
26
+ this.validator = null;
27
+ this.operations = null;
28
+ }
29
+ }
30
+
31
+
32
+
33
+ /**
34
+ * Get database operations instance
35
+ * @returns {DatabaseOperations}
36
+ */
37
+ getOperations() {
38
+ if (!this.operations) {
39
+ throw new Error('Database not initialized. Call initialize() first.');
40
+ }
41
+ return this.operations;
42
+ }
43
+
44
+ /**
45
+ * Initialize a new database with configuration from processed options
46
+ * @param {Object} processedOptions - Processed CLI options
47
+ * @returns {Promise<Object>} Initialization result
48
+ */
49
+ async initializeNewDatabase(processedOptions) {
50
+ try {
51
+ await this.initialize();
52
+
53
+ // Extract configuration from processed options
54
+ const targets = processedOptions.targets;
55
+ const config = {
56
+ targets: targets.join('|'),
57
+ extensions: processedOptions.extensions ? processedOptions.extensions.join(',') : '',
58
+ min_size: processedOptions.minSize || '0',
59
+ max_size: processedOptions.maxSize || '0',
60
+ keep_strategy: processedOptions.keepStrategy || 'priority',
61
+ hash_algorithm: processedOptions.hashAlgorithm || 'blake3',
62
+ recursive: processedOptions.recursive !== false ? 'true' : 'false',
63
+ exclude_system: processedOptions.excludeSystem !== false ? 'true' : 'false',
64
+ exclude_hidden: processedOptions.excludeHidden !== false ? 'true' : 'false',
65
+ batch_size: processedOptions.batchSize || '100',
66
+ max_concurrency: processedOptions.maxConcurrency || '5',
67
+ move: processedOptions.move ? 'true' : 'false',
68
+ preserve_paths: processedOptions.preservePaths !== false ? 'true' : 'false',
69
+ created_at: new Date().toISOString(),
70
+ version: '1.0.0'
71
+ };
72
+
73
+ // Store configuration in meta table
74
+ for (const [key, value] of Object.entries(config)) {
75
+ await this.operations.setMeta(key, value);
76
+ }
77
+
78
+ // Validate database structure
79
+ const validation = await this.validator.validateAll();
80
+ if (!validation.valid) {
81
+ return {
82
+ success: false,
83
+ error: 'Database validation failed',
84
+ validation
85
+ };
86
+ }
87
+
88
+ // Get database info
89
+ const dbInfo = await this.connection.getInfo();
90
+
91
+ return {
92
+ success: true,
93
+ message: 'Database initialized successfully',
94
+ config,
95
+ dbInfo,
96
+ validation
97
+ };
98
+ } catch (error) {
99
+ return {
100
+ success: false,
101
+ error: error.message
102
+ };
103
+ }
104
+ }
105
+
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+ }
115
+
116
+ /**
117
+ * Create a database utils instance
118
+ * @param {string} dbPath - Path to database file
119
+ * @returns {DatabaseUtils} Database utils instance
120
+ */
121
+ function createUtils(dbPath) {
122
+ return new DatabaseUtils(dbPath);
123
+ }
124
+
125
+ export { DatabaseUtils, createUtils };
@@ -0,0 +1,325 @@
1
+ import fs from 'fs';
2
+
3
+ class DatabaseValidator {
4
+ constructor(dbConnection) {
5
+ this.db = dbConnection;
6
+ }
7
+
8
+ validateDatabaseFile() {
9
+ const result = {
10
+ valid: true,
11
+ errors: [],
12
+ warnings: []
13
+ };
14
+
15
+ if (!this.db.exists()) {
16
+ result.valid = false;
17
+ result.errors.push('Database file does not exist');
18
+ return result;
19
+ }
20
+
21
+ try {
22
+ fs.accessSync(this.db.dbPath, fs.constants.R_OK);
23
+ } catch (error) {
24
+ result.valid = false;
25
+ result.errors.push('Database file is not readable');
26
+ return result;
27
+ }
28
+
29
+ try {
30
+ fs.accessSync(this.db.dbPath, fs.constants.W_OK);
31
+ } catch (error) {
32
+ result.warnings.push('Database file is not writable (read-only mode)');
33
+ }
34
+
35
+ const fileSize = this.db.getFileSize();
36
+ if (fileSize === 0) {
37
+ result.warnings.push('Database file is empty');
38
+ }
39
+
40
+ return result;
41
+ }
42
+
43
+ async validateSchema() {
44
+ const result = {
45
+ valid: true,
46
+ errors: [],
47
+ warnings: []
48
+ };
49
+
50
+ try {
51
+ const tables = await this.db.query(`
52
+ SELECT name FROM sqlite_master
53
+ WHERE type='table' AND name IN ('meta', 'copies')
54
+ ORDER BY name
55
+ `);
56
+
57
+ const tableNames = tables.map(t => t.name);
58
+ const expectedTables = ['meta', 'copies'];
59
+
60
+ for (const expectedTable of expectedTables) {
61
+ if (!tableNames.includes(expectedTable)) {
62
+ result.valid = false;
63
+ result.errors.push(`Missing table: ${expectedTable}`);
64
+ }
65
+ }
66
+
67
+ for (const table of tables) {
68
+ const tableValidation = await this.validateTableStructure(table.name);
69
+ if (!tableValidation.valid) {
70
+ result.valid = false;
71
+ result.errors.push(...tableValidation.errors);
72
+ }
73
+ result.warnings.push(...tableValidation.warnings);
74
+ }
75
+
76
+ const indexes = await this.db.query(`
77
+ SELECT name FROM sqlite_master
78
+ WHERE type='index' AND name LIKE 'idx_%'
79
+ `);
80
+
81
+ const expectedIndexes = [
82
+ 'idx_copies_file_hash',
83
+ 'idx_copies_file_path',
84
+ 'idx_copies_file_name',
85
+ 'idx_copies_dir_group',
86
+ 'idx_copies_extension'
87
+ ];
88
+
89
+ for (const expectedIndex of expectedIndexes) {
90
+ if (!indexes.some(idx => idx.name === expectedIndex)) {
91
+ result.warnings.push(`Missing index: ${expectedIndex}`);
92
+ }
93
+ }
94
+
95
+ } catch (error) {
96
+ result.valid = false;
97
+ result.errors.push(`Schema validation failed: ${error.message}`);
98
+ }
99
+
100
+ return result;
101
+ }
102
+
103
+ async validateTableStructure(tableName) {
104
+ const result = {
105
+ valid: true,
106
+ errors: [],
107
+ warnings: []
108
+ };
109
+
110
+ try {
111
+ const columns = await this.db.query(`PRAGMA table_info(${tableName})`);
112
+
113
+ if (columns.length === 0) {
114
+ result.valid = false;
115
+ result.errors.push(`Table ${tableName} has no columns`);
116
+ return result;
117
+ }
118
+
119
+ switch (tableName) {
120
+ case 'meta':
121
+ this.validateMetaTable(columns, result);
122
+ break;
123
+ case 'copies':
124
+ this.validateCopiesTable(columns, result);
125
+ break;
126
+ default:
127
+ result.warnings.push(`Unknown table: ${tableName}`);
128
+ }
129
+
130
+ } catch (error) {
131
+ result.valid = false;
132
+ result.errors.push(`Failed to validate table ${tableName}: ${error.message}`);
133
+ }
134
+
135
+ return result;
136
+ }
137
+
138
+ validateMetaTable(columns, result) {
139
+ const expectedColumns = [
140
+ { name: 'key', type: 'TEXT', pk: true },
141
+ { name: 'value', type: 'TEXT', pk: false }
142
+ ];
143
+
144
+ this.validateColumns(columns, expectedColumns, 'meta', result);
145
+ }
146
+
147
+
148
+ validateCopiesTable(columns, result) {
149
+ const expectedColumns = [
150
+ { name: 'id', type: 'INTEGER', pk: true },
151
+ { name: 'dir_group', type: 'TEXT', pk: false },
152
+ { name: 'file_path', type: 'TEXT', pk: false },
153
+ { name: 'file_name', type: 'TEXT', pk: false },
154
+ { name: 'file_extension', type: 'TEXT', pk: false },
155
+ { name: 'file_size', type: 'INTEGER', pk: false },
156
+ { name: 'file_hash', type: 'TEXT', pk: false },
157
+ { name: 'active', type: 'BOOLEAN', pk: false },
158
+ { name: 'priority', type: 'INTEGER', pk: false }
159
+ ];
160
+
161
+ this.validateColumns(columns, expectedColumns, 'copies', result);
162
+ }
163
+
164
+ validateColumns(actualColumns, expectedColumns, tableName, result) {
165
+ const actualColumnNames = actualColumns.map(c => c.name);
166
+
167
+ for (const expected of expectedColumns) {
168
+ const actual = actualColumns.find(c => c.name === expected.name);
169
+
170
+ if (!actual) {
171
+ result.valid = false;
172
+ result.errors.push(`Table ${tableName} missing column: ${expected.name}`);
173
+ continue;
174
+ }
175
+
176
+ if (actual.type.toUpperCase() !== expected.type.toUpperCase()) {
177
+ result.warnings.push(`Table ${tableName} column ${expected.name} has type ${actual.type}, expected ${expected.type}`);
178
+ }
179
+
180
+ if (actual.pk !== expected.pk) {
181
+ result.warnings.push(`Table ${tableName} column ${expected.name} primary key mismatch`);
182
+ }
183
+ }
184
+ }
185
+
186
+ async validateDataIntegrity() {
187
+ const result = {
188
+ valid: true,
189
+ errors: [],
190
+ warnings: []
191
+ };
192
+
193
+ try {
194
+ const negativeSizes = await this.db.query(`
195
+ SELECT id, file_path, file_size
196
+ FROM copies
197
+ WHERE file_size < 0
198
+ `);
199
+
200
+ if (negativeSizes.length > 0) {
201
+ result.warnings.push(`Found ${negativeSizes.length} files with negative sizes`);
202
+ }
203
+
204
+ const emptyPaths = await this.db.query(`
205
+ SELECT id, file_path
206
+ FROM copies
207
+ WHERE file_path = '' OR file_path IS NULL
208
+ `);
209
+
210
+ if (emptyPaths.length > 0) {
211
+ result.valid = false;
212
+ result.errors.push(`Found ${emptyPaths.length} files with empty paths`);
213
+ }
214
+
215
+ const emptyNames = await this.db.query(`
216
+ SELECT id, file_name
217
+ FROM copies
218
+ WHERE file_name = '' OR file_name IS NULL
219
+ `);
220
+
221
+ if (emptyNames.length > 0) {
222
+ result.valid = false;
223
+ result.errors.push(`Found ${emptyNames.length} files with empty names`);
224
+ }
225
+
226
+ } catch (error) {
227
+ result.valid = false;
228
+ result.errors.push(`Data integrity validation failed: ${error.message}`);
229
+ }
230
+
231
+ return result;
232
+ }
233
+
234
+ async validateFileSystemConsistency() {
235
+ const result = {
236
+ valid: true,
237
+ errors: [],
238
+ warnings: []
239
+ };
240
+
241
+ try {
242
+ const allFiles = await this.db.query('SELECT id, file_path FROM copies');
243
+ let missingFiles = 0;
244
+ let accessibleFiles = 0;
245
+
246
+ for (const file of allFiles) {
247
+ try {
248
+ if (fs.existsSync(file.file_path)) {
249
+ accessibleFiles++;
250
+ } else {
251
+ missingFiles++;
252
+ result.warnings.push(`File not found: ${file.file_path}`);
253
+ }
254
+ } catch (error) {
255
+ result.warnings.push(`Error checking file ${file.file_path}: ${error.message}`);
256
+ }
257
+ }
258
+
259
+ if (missingFiles > 0) {
260
+ result.warnings.push(`Found ${missingFiles} missing files out of ${allFiles.length} total files`);
261
+ }
262
+
263
+ result.stats = {
264
+ totalFiles: allFiles.length,
265
+ accessibleFiles,
266
+ missingFiles
267
+ };
268
+
269
+ } catch (error) {
270
+ result.valid = false;
271
+ result.errors.push(`File system validation failed: ${error.message}`);
272
+ }
273
+
274
+ return result;
275
+ }
276
+
277
+ async validateAll() {
278
+ const result = {
279
+ valid: true,
280
+ errors: [],
281
+ warnings: [],
282
+ details: {}
283
+ };
284
+
285
+ const fileValidation = this.validateDatabaseFile();
286
+ result.details.file = fileValidation;
287
+ if (!fileValidation.valid) {
288
+ result.valid = false;
289
+ }
290
+ result.errors.push(...fileValidation.errors);
291
+ result.warnings.push(...fileValidation.warnings);
292
+
293
+ const schemaValidation = await this.validateSchema();
294
+ result.details.schema = schemaValidation;
295
+ if (!schemaValidation.valid) {
296
+ result.valid = false;
297
+ }
298
+ result.errors.push(...schemaValidation.errors);
299
+ result.warnings.push(...schemaValidation.warnings);
300
+
301
+ const integrityValidation = await this.validateDataIntegrity();
302
+ result.details.integrity = integrityValidation;
303
+ if (!integrityValidation.valid) {
304
+ result.valid = false;
305
+ }
306
+ result.errors.push(...integrityValidation.errors);
307
+ result.warnings.push(...integrityValidation.warnings);
308
+
309
+ const fsValidation = await this.validateFileSystemConsistency();
310
+ result.details.fileSystem = fsValidation;
311
+ if (!fsValidation.valid) {
312
+ result.valid = false;
313
+ }
314
+ result.errors.push(...fsValidation.errors);
315
+ result.warnings.push(...fsValidation.warnings);
316
+
317
+ return result;
318
+ }
319
+ }
320
+
321
+ function createValidator(dbConnection) {
322
+ return new DatabaseValidator(dbConnection);
323
+ }
324
+
325
+ export { DatabaseValidator, createValidator };
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Database module for file deduplication tool
3
+ * Provides unified access to all database functionality
4
+ */
5
+
6
+ import { createConnection } from './dbConnection.js';
7
+ import { createValidator } from './dbValidator.js';
8
+ import { createOperations } from './dbOperations.js';
9
+ import { createUtils } from './dbUtils.js';
10
+
11
+ /**
12
+ * Create a complete database manager instance
13
+ * @param {string} dbPath - Path to database file
14
+ * @returns {Object} Database manager with all components
15
+ */
16
+ function createDatabaseManager(dbPath) {
17
+ return {
18
+ // Core components
19
+ connection: null,
20
+ validator: null,
21
+ operations: null,
22
+ utils: null,
23
+
24
+ // Initialize all components
25
+ async initialize() {
26
+ this.connection = createConnection(dbPath);
27
+ await this.connection.connect();
28
+ this.validator = createValidator(this.connection);
29
+ this.operations = createOperations(this.connection);
30
+ this.utils = createUtils(dbPath);
31
+ await this.utils.initialize();
32
+ },
33
+
34
+ // Close all connections
35
+ async close() {
36
+ if (this.connection) {
37
+ await this.connection.close();
38
+ }
39
+ if (this.utils) {
40
+ await this.utils.close();
41
+ }
42
+ this.connection = null;
43
+ this.validator = null;
44
+ this.operations = null;
45
+ this.utils = null;
46
+ },
47
+
48
+ // Get individual components
49
+ getConnection() {
50
+ if (!this.connection) {
51
+ throw new Error('Database not initialized. Call initialize() first.');
52
+ }
53
+ return this.connection;
54
+ },
55
+
56
+ getValidator() {
57
+ if (!this.validator) {
58
+ throw new Error('Database not initialized. Call initialize() first.');
59
+ }
60
+ return this.validator;
61
+ },
62
+
63
+ getOperations() {
64
+ if (!this.operations) {
65
+ throw new Error('Database not initialized. Call initialize() first.');
66
+ }
67
+ return this.operations;
68
+ },
69
+
70
+ getUtils() {
71
+ if (!this.utils) {
72
+ throw new Error('Database not initialized. Call initialize() first.');
73
+ }
74
+ return this.utils;
75
+ }
76
+ };
77
+ }
78
+
79
+ /**
80
+ * Initialize a new database with processed options
81
+ * @param {string} dbPath - Path to database file
82
+ * @param {Object} processedOptions - Processed CLI options
83
+ * @returns {Promise<Object>} Initialization result
84
+ */
85
+ async function initializeNewDatabase(dbPath, processedOptions) {
86
+ const utils = createUtils(dbPath);
87
+ try {
88
+ const result = await utils.initializeNewDatabase(processedOptions);
89
+ return result;
90
+ } finally {
91
+ await utils.close();
92
+ }
93
+ }
94
+
95
+ export {
96
+ createConnection,
97
+ createValidator,
98
+ createOperations,
99
+ createUtils,
100
+ createDatabaseManager,
101
+ initializeNewDatabase
102
+ };
package/index.js ADDED
@@ -0,0 +1,75 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { Command } from 'commander';
4
+ import { getExtensionsController } from './controllers/getExtensionsController.js';
5
+ import { newController } from './controllers/newController.js';
6
+ import { resumeController } from './controllers/resumeController.js';
7
+ import { runCandidateDetection } from './utils/candidateDetection.js';
8
+ import { validateOptions } from './validators/optionValidator.js';
9
+ import { processOptions } from './processors/optionETL.js';
10
+ import { moveDuplicates } from './utils/duplicateMover.js';
11
+ import { displayDuplicates } from './utils/duplicateReporter.js';
12
+
13
+ const program = new Command();
14
+
15
+ program
16
+ .name('@tb.p/dd')
17
+ .description('File Deduplication Tool')
18
+ .version('1.0.0')
19
+ .option('-t, --targets <string>', 'Pipe-separated paths to directories', process.cwd())
20
+ .option('-e, --extensions <string>', 'Pipe-separated file extensions')
21
+ .option('-g, --get-extensions', 'Get file extensions in target directories')
22
+ .option('-r, --resume <string>', 'Path to database file to resume from')
23
+ .option('-s, --save <string>', 'Path to database file to save to')
24
+ .option('-m, --move', 'Move duplicated files (only available with --save or --resume)')
25
+ .option('--preserve-paths', 'Preserve directory structure when moving duplicates (default: true)')
26
+ .option('--flat-duplicates', 'Move duplicates to flat structure (overrides --preserve-paths)')
27
+ .option('--hash-algorithm <string>', 'Hash algorithm to use (blake3, md5, sha1, sha256)', 'blake3')
28
+ .option('-b, --batch-size <number>', 'Batch size for processing candidates', '100')
29
+ .option('-c, --max-concurrency <number>', 'Maximum concurrent hash operations', '5')
30
+ .option('-v, --verbose', 'Show detailed progress information');
31
+
32
+ program.parse();
33
+
34
+ const rawOptions = program.opts();
35
+
36
+ const processedOptions = processOptions(rawOptions);
37
+
38
+ validateOptions(processedOptions);
39
+
40
+ async function runController() {
41
+ try {
42
+ switch (processedOptions.mode) {
43
+ case 'get-extensions':
44
+ getExtensionsController(processedOptions);
45
+ break;
46
+ case 'save':
47
+
48
+ await newController(processedOptions);
49
+ await runCandidateDetection(processedOptions);
50
+ await displayDuplicates(processedOptions);
51
+ if (processedOptions.move) {
52
+ await moveDuplicates(processedOptions);
53
+ }
54
+ break;
55
+ case 'resume':
56
+ await resumeController(processedOptions);
57
+ await runCandidateDetection(processedOptions);
58
+ await displayDuplicates(processedOptions);
59
+ if (processedOptions.move) {
60
+ await moveDuplicates(processedOptions);
61
+ }
62
+ break;
63
+
64
+ default:
65
+ console.error('Error: You must specify a mode: --get-extensions, --resume <db>, or --save <db>');
66
+ process.exit(1);
67
+ }
68
+ } catch (error) {
69
+ console.error('Error:', error.message);
70
+ process.exit(1);
71
+ }
72
+ }
73
+
74
+
75
+ runController();
package/package.json ADDED
@@ -0,0 +1,32 @@
1
+ {
2
+ "name": "@tb.p/dd",
3
+ "version": "1.0.0",
4
+ "description": "A comprehensive command-line tool for finding and removing duplicate files using content-based hashing",
5
+ "type": "module",
6
+ "main": "index.js",
7
+ "bin": {
8
+ "@tb.p/dd": "./index.js"
9
+ },
10
+ "scripts": {
11
+ "start": "node index.js",
12
+ "test": "echo \"Error: no test specified\" && exit 1"
13
+ },
14
+ "keywords": [
15
+ "duplicate",
16
+ "files",
17
+ "deduplication",
18
+ "hash",
19
+ "cli",
20
+ "file-management"
21
+ ],
22
+ "author": "@tb.p",
23
+ "license": "MIT",
24
+ "dependencies": {
25
+ "commander": "^11.1.0",
26
+ "hash-wasm": "^4.12.0",
27
+ "sqlite3": "^5.1.6"
28
+ },
29
+ "engines": {
30
+ "node": ">=14.0.0"
31
+ }
32
+ }