@tb.p/dd 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ import { scanDirectories } from '../utils/index.js';
2
+
3
+ async function getExtensionsController(options) {
4
+ try {
5
+ const extensions = await getExtensions(options);
6
+ if (extensions.length > 0) {
7
+ process.stdout.write(extensions.join(',') + '\n');
8
+ }
9
+ } catch (error) {
10
+ console.error('Error:', error.message);
11
+ process.exit(1);
12
+ }
13
+ }
14
+
15
+ async function getExtensions(options) {
16
+ const files = await scanDirectories(options);
17
+ const extensions = new Set();
18
+ for (const file of files) {
19
+ if (file.extension) {
20
+ extensions.add(file.extension);
21
+ }
22
+ }
23
+
24
+ return Array.from(extensions).sort();
25
+ }
26
+
27
+ export { getExtensionsController };
@@ -0,0 +1,72 @@
1
+ import { scanDirectory } from '../utils/index.js';
2
+ import { createDatabaseManager } from '../database/index.js';
3
+
4
+ async function newController(processedOptions) {
5
+ const { saveDb, verbose = false } = processedOptions;
6
+
7
+ if (verbose) {
8
+ console.log(`Using database: ${saveDb}`);
9
+ }
10
+
11
+ const dbManager = createDatabaseManager(saveDb);
12
+ await dbManager.initialize();
13
+
14
+ try {
15
+ await performFileOperations(processedOptions, dbManager);
16
+ } finally {
17
+ await dbManager.close();
18
+ }
19
+ }
20
+
21
+ async function performFileOperations(options, dbManager) {
22
+ const { targets = [] } = options;
23
+ const allFiles = [];
24
+ const fileMap = new Map(); // For deduplication with priority
25
+
26
+ for (let i = 0; i < targets.length; i++) {
27
+ const dirPath = targets[i];
28
+ const priority = i; // Lower index = higher priority
29
+
30
+ try {
31
+ const files = await scanDirectory(dirPath, options);
32
+
33
+ for (const file of files) {
34
+ const fileKey = `${file.path}_${file.name}`; // Create unique key
35
+
36
+ if (!fileMap.has(fileKey) || fileMap.get(fileKey).priority > priority) {
37
+ fileMap.set(fileKey, {
38
+ ...file,
39
+ priority,
40
+ dirGroup: dirPath,
41
+ active: true
42
+ });
43
+ }
44
+ }
45
+ } catch (error) {
46
+ console.warn(`Warning: Failed to scan directory ${dirPath}: ${error.message}`);
47
+ }
48
+ }
49
+
50
+ // Convert map values back to array, maintaining priority order
51
+ allFiles.push(...Array.from(fileMap.values()).sort((a, b) => a.priority - b.priority));
52
+
53
+ // Store files in database
54
+ let successCount = 0;
55
+ let errorCount = 0;
56
+
57
+ for (const file of allFiles) {
58
+ const result = await dbManager.getOperations().addCopy(file);
59
+ if (result.success) {
60
+ successCount++;
61
+ } else {
62
+ errorCount++;
63
+ console.error(`Failed to add file ${file.path}: ${result.error}`);
64
+ }
65
+ }
66
+
67
+ console.log(`Database operations: ${successCount} successful, ${errorCount} failed`);
68
+
69
+ console.log(`Scanned and stored ${allFiles.length} files (deduplicated with priority)`);
70
+ }
71
+
72
+ export { newController };
@@ -0,0 +1,233 @@
1
+ import fs from 'fs';
2
+ import { createDatabaseManager } from '../database/index.js';
3
+ import { scanDirectory } from '../utils/index.js';
4
+
5
+ /**
6
+ * Resume Controller for @tb.p/dd
7
+ * Handles resuming from existing database - sets all rows to active=false,
8
+ * scans for files with matching extensions, sets existing matches to active=true,
9
+ * adds new files normally
10
+ */
11
+ async function resumeController(processedOptions) {
12
+ const { resumeDb, verbose = false } = processedOptions;
13
+
14
+ if (!fs.existsSync(resumeDb)) {
15
+ console.error(`Error: Database file does not exist: ${resumeDb}`);
16
+ process.exit(1);
17
+ }
18
+
19
+ if (verbose) {
20
+ console.log(`🔄 Resuming from database: ${resumeDb}`);
21
+ }
22
+
23
+ // Initialize database manager
24
+ const dbManager = createDatabaseManager(resumeDb);
25
+ await dbManager.initialize();
26
+
27
+ try {
28
+ // 1. Load original parameters from meta table
29
+ const originalParams = await loadOriginalParameters(dbManager);
30
+
31
+ // 2. Merge with CLI overrides
32
+ const mergedParams = mergeParameters(originalParams, processedOptions);
33
+
34
+ if (verbose) {
35
+ console.log('📋 Loaded original parameters from database');
36
+ console.log('🔧 Applied CLI overrides');
37
+ }
38
+
39
+ // 3. Set all rows to active=false
40
+ await setAllRowsInactive(dbManager, verbose);
41
+
42
+ // 4. Scan directory for files with matching extensions
43
+ await scanAndSetActiveFiles(dbManager, mergedParams, verbose);
44
+
45
+ // 5. Show final statistics using the new two-step approach
46
+ if (verbose) {
47
+ const stats = await dbManager.getOperations().getResumeStatistics();
48
+ console.log(`📊 Database statistics after resume:`);
49
+ console.log(` - Total files: ${stats.totalFiles}`);
50
+ console.log(` - Active files: ${stats.activeFiles}`);
51
+ console.log(` - Unhashed active files: ${stats.unhashedActiveFiles}`);
52
+ console.log(` - Size duplicate groups: ${stats.sizeDuplicateGroups}`);
53
+ console.log(` - Hash duplicate groups: ${stats.hashDuplicateGroups}`);
54
+ }
55
+
56
+ // 6. Add any new files if targets changed
57
+ if (hasNewTargets(originalParams, mergedParams)) {
58
+ await addNewFiles(dbManager, mergedParams, verbose);
59
+ }
60
+
61
+ if (verbose) {
62
+ console.log('✅ Resume completed successfully');
63
+ }
64
+
65
+ } catch (error) {
66
+ console.error(`❌ Resume failed: ${error.message}`);
67
+ throw error;
68
+ } finally {
69
+ await dbManager.close();
70
+ }
71
+ }
72
+
73
+ /**
74
+ * Load original parameters from database meta table
75
+ * @param {Object} dbManager - Database manager instance
76
+ * @returns {Promise<Object>} Original parameters
77
+ */
78
+ async function loadOriginalParameters(dbManager) {
79
+ const meta = await dbManager.getOperations().getAllMeta();
80
+
81
+ // Convert stored strings back to appropriate types
82
+ return {
83
+ targets: meta.targets ? meta.targets.split('|') : [],
84
+ extensions: meta.extensions ? meta.extensions.split(',') : [],
85
+ minSize: parseInt(meta.min_size) || 0,
86
+ maxSize: parseInt(meta.max_size) || 0,
87
+ keepStrategy: meta.keep_strategy || 'priority',
88
+ hashAlgorithm: meta.hash_algorithm || 'blake3',
89
+ recursive: meta.recursive === 'true',
90
+ excludeSystem: meta.exclude_system === 'true',
91
+ excludeHidden: meta.exclude_hidden === 'true',
92
+ batchSize: parseInt(meta.batch_size) || 100,
93
+ maxConcurrency: parseInt(meta.max_concurrency) || 5,
94
+ move: meta.move === 'true',
95
+ preservePaths: meta.preserve_paths !== 'false'
96
+ };
97
+ }
98
+
99
+ /**
100
+ * Merge original parameters with CLI overrides
101
+ * @param {Object} original - Original parameters from database
102
+ * @param {Object} cli - CLI parameters
103
+ * @returns {Object} Merged parameters
104
+ */
105
+ function mergeParameters(original, cli) {
106
+ return {
107
+ ...original,
108
+ ...(cli.targets && { targets: cli.targets }),
109
+ ...(cli.extensions && { extensions: cli.extensions }),
110
+ ...(cli.minSize !== undefined && { minSize: cli.minSize }),
111
+ ...(cli.maxSize !== undefined && { maxSize: cli.maxSize }),
112
+ ...(cli.keepStrategy && { keepStrategy: cli.keepStrategy }),
113
+ ...(cli.hashAlgorithm && { hashAlgorithm: cli.hashAlgorithm }),
114
+ ...(cli.batchSize && { batchSize: cli.batchSize }),
115
+ ...(cli.maxConcurrency && { maxConcurrency: cli.maxConcurrency }),
116
+ ...(cli.move !== undefined && { move: cli.move }),
117
+ ...(cli.preservePaths !== undefined && { preservePaths: cli.preservePaths }),
118
+ ...(cli.verbose !== undefined && { verbose: cli.verbose })
119
+ };
120
+ }
121
+
122
+ /**
123
+ * Set all rows to active=false
124
+ * @param {Object} dbManager - Database manager instance
125
+ * @param {boolean} verbose - Whether to show verbose output
126
+ */
127
+ async function setAllRowsInactive(dbManager, verbose) {
128
+ if (verbose) {
129
+ console.log('🔄 Setting all rows to inactive...');
130
+ }
131
+
132
+ const result = await dbManager.getOperations().setAllActive(false);
133
+ if (!result.success) {
134
+ throw new Error(`Failed to set all rows inactive: ${result.error}`);
135
+ }
136
+
137
+ if (verbose) {
138
+ console.log(`✅ Set ${result.changes} rows to inactive`);
139
+ }
140
+ }
141
+
142
+ /**
143
+ * Scan directory for files with matching extensions and set existing matches to active=true
144
+ * @param {Object} dbManager - Database manager instance
145
+ * @param {Object} params - Processing parameters
146
+ * @param {boolean} verbose - Whether to show verbose output
147
+ */
148
+ async function scanAndSetActiveFiles(dbManager, params, verbose) {
149
+ if (verbose) {
150
+ console.log('🔍 Scanning directory for files with matching extensions...');
151
+ }
152
+
153
+ let activatedCount = 0;
154
+ let newFileCount = 0;
155
+
156
+ // OPTIMIZATION: Load all existing file paths once (fixes N+1 query problem)
157
+ const existingFilePaths = await dbManager.getOperations().getExistingFilePaths();
158
+
159
+ for (const target of params.targets) {
160
+ try {
161
+ const files = await scanDirectory(target, params);
162
+
163
+ for (const file of files) {
164
+ // Check if file already exists using Set lookup (O(1) vs O(n))
165
+ if (existingFilePaths.has(file.path)) {
166
+ // Set existing file to active=true
167
+ const result = await dbManager.getOperations().setActiveByPath(file.path, true);
168
+ if (result.success) {
169
+ activatedCount++;
170
+ }
171
+ } else {
172
+ // Add new file normally
173
+ const result = await dbManager.getOperations().addCopy({
174
+ ...file,
175
+ dirGroup: target,
176
+ priority: params.targets.indexOf(target)
177
+ });
178
+
179
+ if (result.success) {
180
+ newFileCount++;
181
+ }
182
+ }
183
+ }
184
+ } catch (error) {
185
+ if (verbose) {
186
+ console.warn(`⚠️ Warning: Failed to scan directory ${target}: ${error.message}`);
187
+ }
188
+ }
189
+ }
190
+
191
+ if (verbose) {
192
+ console.log(`✅ Activated ${activatedCount} existing files`);
193
+ console.log(`📄 Added ${newFileCount} new files`);
194
+ }
195
+ }
196
+
197
+ /**
198
+ * Check if there are new targets to scan
199
+ * @param {Object} original - Original parameters
200
+ * @param {Object} merged - Merged parameters
201
+ * @returns {boolean} Whether there are new targets
202
+ */
203
+ function hasNewTargets(original, merged) {
204
+ if (!original.targets || !merged.targets) return false;
205
+
206
+ // Check if any targets are different
207
+ const originalSet = new Set(original.targets);
208
+ const mergedSet = new Set(merged.targets);
209
+
210
+ return originalSet.size !== mergedSet.size ||
211
+ [...originalSet].some(target => !mergedSet.has(target));
212
+ }
213
+
214
+ /**
215
+ * Add new files from changed targets (simplified version)
216
+ * @param {Object} dbManager - Database manager instance
217
+ * @param {Object} params - Processing parameters
218
+ * @param {boolean} verbose - Whether to show verbose output
219
+ */
220
+ async function addNewFiles(dbManager, params, verbose) {
221
+ if (verbose) {
222
+ console.log('📁 Adding new files from changed targets...');
223
+ }
224
+
225
+ // This is now handled in scanAndSetActiveFiles, but keeping for compatibility
226
+ if (verbose) {
227
+ console.log('ℹ️ New files are handled during directory scanning');
228
+ }
229
+ }
230
+
231
+
232
+
233
+ export { resumeController };
@@ -0,0 +1,262 @@
1
+ # Database Module
2
+
3
+ This module provides comprehensive database functionality for the file deduplication tool, including initialization, validation, CRUD operations, and utilities.
4
+
5
+ ## Overview
6
+
7
+ The database module uses SQLite to store deduplication data, enabling resume functionality and persistent tracking of duplicate files across multiple runs.
8
+
9
+ ## Files
10
+
11
+ - `dbConnection.js` - Database connection and initialization
12
+ - `dbValidator.js` - Database validation and integrity checks
13
+ - `dbOperations.js` - CRUD operations for all tables
14
+ - `dbUtils.js` - High-level utilities and helper functions
15
+ - `index.js` - Unified module exports
16
+ - `example.js` - Usage examples
17
+
18
+ ## Quick Start
19
+
20
+ ```javascript
21
+ const { createDatabaseManager } = require('./database');
22
+
23
+ // Create and initialize database
24
+ const db = createDatabaseManager('./dedupe.sqlite');
25
+ await db.initialize();
26
+
27
+ // Use database operations
28
+ await db.getOperations().setMeta('targets', '/path/to/files');
29
+ await db.getOperations().addCopy({
30
+ path: '/path/to/files/image.jpg',
31
+ name: 'image.jpg',
32
+ extension: 'jpg',
33
+ size: 1024000,
34
+ dirGroup: '/path/to/files'
35
+ });
36
+
37
+ // Close when done
38
+ await db.close();
39
+ ```
40
+
41
+ ## Components
42
+
43
+ ### DatabaseConnection
44
+
45
+ Handles SQLite database connection, table creation, and basic operations.
46
+
47
+ ```javascript
48
+ const { createConnection } = require('./database');
49
+ const db = createConnection('./dedupe.sqlite');
50
+ await db.connect();
51
+ ```
52
+
53
+ ### DatabaseValidator
54
+
55
+ Validates database structure, data integrity, and file system consistency.
56
+
57
+ ```javascript
58
+ const { createValidator } = require('./database');
59
+ const validator = createValidator(db);
60
+ const validation = await validator.validateAll();
61
+ ```
62
+
63
+ ### DatabaseOperations
64
+
65
+ Provides CRUD operations for meta, data, and copies tables.
66
+
67
+ ```javascript
68
+ const { createOperations } = require('./database');
69
+ const ops = createOperations(db);
70
+
71
+ // Meta operations
72
+ await ops.setMeta('key', 'value');
73
+ const value = await ops.getMeta('key');
74
+
75
+ // Data operations
76
+ const dataResult = await ops.createData('hash123', 1024);
77
+ const data = await ops.getDataByHash('hash123');
78
+
79
+ // Copies operations
80
+ await ops.addCopy(fileInfo);
81
+ const copies = await ops.getUnhashedCopies();
82
+ ```
83
+
84
+ ### DatabaseUtils
85
+
86
+ High-level utilities for database management, export/import, and cleanup.
87
+
88
+ ```javascript
89
+ const { createUtils } = require('./database');
90
+ const utils = createUtils('./dedupe.sqlite');
91
+ await utils.initialize();
92
+
93
+ // Get database info
94
+ const info = await utils.getDatabaseInfo();
95
+
96
+ // Export to CSV
97
+ await utils.exportToCSV('./export.csv');
98
+
99
+ // Cleanup database
100
+ await utils.cleanupDatabase();
101
+ ```
102
+
103
+ ## Database Schema
104
+
105
+ ### Meta Table
106
+ Stores configuration parameters from deduplication runs.
107
+
108
+ | Column | Type | Description |
109
+ |--------|------|-------------|
110
+ | key | TEXT PRIMARY KEY | Configuration parameter name |
111
+ | value | TEXT | Configuration parameter value |
112
+
113
+ ### Copies Table
114
+ Stores file paths and their metadata for deduplication tracking.
115
+
116
+ | Column | Type | Description |
117
+ |--------|------|-------------|
118
+ | id | INTEGER PRIMARY KEY AUTOINCREMENT | Unique incrementing identifier |
119
+ | dir_group | TEXT | Directory group identifier (source directory from --targets) |
120
+ | file_path | TEXT NOT NULL | Full path to the file |
121
+ | file_name | TEXT NOT NULL | Just the filename |
122
+ | file_extension | TEXT | File extension (e.g., 'jpg', 'png', 'pdf') |
123
+ | file_size | INTEGER NOT NULL | File size in bytes |
124
+ | file_hash | TEXT | Content hash (BLAKE3, SHA-256, MD5, etc.) |
125
+ | active | BOOLEAN | Whether the file is active in processing |
126
+ | priority | INTEGER | Processing priority (higher numbers = higher priority) |
127
+
128
+ ## Usage Examples
129
+
130
+ ### Initialize New Database
131
+
132
+ ```javascript
133
+ const { createDatabaseManager } = require('./database');
134
+
135
+ const db = createDatabaseManager('./new-db.sqlite');
136
+ await db.initialize();
137
+
138
+ // Set configuration
139
+ await db.getOperations().setMeta('targets', '/path/to/files');
140
+ await db.getOperations().setMeta('extensions', 'jpg,png,gif');
141
+ await db.getOperations().setMeta('hash_algorithm', 'blake3');
142
+
143
+ await db.close();
144
+ ```
145
+
146
+ ### Resume From Existing Database
147
+
148
+ ```javascript
149
+ const { createDatabaseManager } = require('./database');
150
+
151
+ const db = createDatabaseManager('./existing-db.sqlite');
152
+ await db.initialize();
153
+
154
+ // Validate database
155
+ const validation = await db.getValidator().validateAll();
156
+ if (!validation.valid) {
157
+ console.error('Database validation failed:', validation.errors);
158
+ process.exit(1);
159
+ }
160
+
161
+ // Get existing configuration
162
+ const config = await db.getOperations().getAllMeta();
163
+ console.log('Configuration:', config);
164
+
165
+ // Continue processing...
166
+ await db.close();
167
+ ```
168
+
169
+ ### Process Files
170
+
171
+ ```javascript
172
+ const { createDatabaseManager } = require('./database');
173
+
174
+ const db = createDatabaseManager('./dedupe.sqlite');
175
+ await db.initialize();
176
+
177
+ // Add files to database
178
+ const files = [
179
+ { path: '/path/file1.jpg', name: 'file1.jpg', extension: 'jpg', size: 1024, dirGroup: '/path' },
180
+ { path: '/path/file2.jpg', name: 'file2.jpg', extension: 'jpg', size: 1024, dirGroup: '/path' }
181
+ ];
182
+
183
+ for (const file of files) {
184
+ await db.getOperations().addCopy(file);
185
+ }
186
+
187
+ // Calculate hashes and update files
188
+ const unhashedFiles = await db.getOperations().getUnhashedCopies();
189
+ for (const file of unhashedFiles) {
190
+ const hash = calculateHash(file.file_path); // Your hash function
191
+ await db.getOperations().updateFileHash(file.id, hash);
192
+ }
193
+
194
+ // Find duplicates
195
+ const duplicates = await db.getOperations().getDuplicateGroups();
196
+ console.log('Found', duplicates.length, 'duplicate groups');
197
+
198
+ await db.close();
199
+ ```
200
+
201
+ ### Export and Cleanup
202
+
203
+ ```javascript
204
+ const { createDatabaseManager } = require('./database');
205
+
206
+ const db = createDatabaseManager('./dedupe.sqlite');
207
+ await db.initialize();
208
+
209
+ // Export to CSV
210
+ await db.getUtils().exportToCSV('./duplicates.csv');
211
+
212
+ // Get statistics
213
+ const stats = await db.getOperations().getStatistics();
214
+ console.log('Statistics:', stats);
215
+
216
+ // Cleanup orphaned data
217
+ await db.getUtils().cleanupDatabase();
218
+
219
+ await db.close();
220
+ ```
221
+
222
+ ## Error Handling
223
+
224
+ All database operations return results with success/error information:
225
+
226
+ ```javascript
227
+ const result = await db.getOperations().setMeta('key', 'value');
228
+ if (!result.success) {
229
+ console.error('Error:', result.error);
230
+ }
231
+ ```
232
+
233
+ ## Transactions
234
+
235
+ Use transactions for atomic operations:
236
+
237
+ ```javascript
238
+ await db.getConnection().beginTransaction();
239
+ try {
240
+ // Multiple operations
241
+ await db.getOperations().setMeta('key1', 'value1');
242
+ await db.getOperations().setMeta('key2', 'value2');
243
+ await db.getConnection().commit();
244
+ } catch (error) {
245
+ await db.getConnection().rollback();
246
+ throw error;
247
+ }
248
+ ```
249
+
250
+ ## Performance Considerations
251
+
252
+ - Use indexes for better query performance
253
+ - Batch operations when possible
254
+ - Use transactions for multiple related operations
255
+ - Clean up orphaned data regularly
256
+ - Consider database size limits for very large file sets
257
+
258
+ ## Dependencies
259
+
260
+ - `sqlite3` - SQLite database driver
261
+ - `fs` - File system operations
262
+ - `path` - Path utilities