@tb.p/dd 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/controllers/getExtensionsController.js +27 -0
- package/controllers/newController.js +72 -0
- package/controllers/resumeController.js +233 -0
- package/database/README.md +262 -0
- package/database/dbConnection.js +314 -0
- package/database/dbOperations.js +332 -0
- package/database/dbUtils.js +125 -0
- package/database/dbValidator.js +325 -0
- package/database/index.js +102 -0
- package/index.js +75 -0
- package/package.json +32 -0
- package/processors/optionETL.js +82 -0
- package/utils/README.md +261 -0
- package/utils/candidateDetection.js +541 -0
- package/utils/duplicateMover.js +140 -0
- package/utils/duplicateReporter.js +91 -0
- package/utils/fileHasher.js +195 -0
- package/utils/fileMover.js +180 -0
- package/utils/fileScanner.js +128 -0
- package/utils/fileSystemUtils.js +192 -0
- package/utils/index.js +5 -0
- package/validators/optionValidator.js +103 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { scanDirectories } from '../utils/index.js';
|
|
2
|
+
|
|
3
|
+
async function getExtensionsController(options) {
|
|
4
|
+
try {
|
|
5
|
+
const extensions = await getExtensions(options);
|
|
6
|
+
if (extensions.length > 0) {
|
|
7
|
+
process.stdout.write(extensions.join(',') + '\n');
|
|
8
|
+
}
|
|
9
|
+
} catch (error) {
|
|
10
|
+
console.error('Error:', error.message);
|
|
11
|
+
process.exit(1);
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
async function getExtensions(options) {
|
|
16
|
+
const files = await scanDirectories(options);
|
|
17
|
+
const extensions = new Set();
|
|
18
|
+
for (const file of files) {
|
|
19
|
+
if (file.extension) {
|
|
20
|
+
extensions.add(file.extension);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
return Array.from(extensions).sort();
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export { getExtensionsController };
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { scanDirectory } from '../utils/index.js';
|
|
2
|
+
import { createDatabaseManager } from '../database/index.js';
|
|
3
|
+
|
|
4
|
+
async function newController(processedOptions) {
|
|
5
|
+
const { saveDb, verbose = false } = processedOptions;
|
|
6
|
+
|
|
7
|
+
if (verbose) {
|
|
8
|
+
console.log(`Using database: ${saveDb}`);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
const dbManager = createDatabaseManager(saveDb);
|
|
12
|
+
await dbManager.initialize();
|
|
13
|
+
|
|
14
|
+
try {
|
|
15
|
+
await performFileOperations(processedOptions, dbManager);
|
|
16
|
+
} finally {
|
|
17
|
+
await dbManager.close();
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
async function performFileOperations(options, dbManager) {
|
|
22
|
+
const { targets = [] } = options;
|
|
23
|
+
const allFiles = [];
|
|
24
|
+
const fileMap = new Map(); // For deduplication with priority
|
|
25
|
+
|
|
26
|
+
for (let i = 0; i < targets.length; i++) {
|
|
27
|
+
const dirPath = targets[i];
|
|
28
|
+
const priority = i; // Lower index = higher priority
|
|
29
|
+
|
|
30
|
+
try {
|
|
31
|
+
const files = await scanDirectory(dirPath, options);
|
|
32
|
+
|
|
33
|
+
for (const file of files) {
|
|
34
|
+
const fileKey = `${file.path}_${file.name}`; // Create unique key
|
|
35
|
+
|
|
36
|
+
if (!fileMap.has(fileKey) || fileMap.get(fileKey).priority > priority) {
|
|
37
|
+
fileMap.set(fileKey, {
|
|
38
|
+
...file,
|
|
39
|
+
priority,
|
|
40
|
+
dirGroup: dirPath,
|
|
41
|
+
active: true
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
} catch (error) {
|
|
46
|
+
console.warn(`Warning: Failed to scan directory ${dirPath}: ${error.message}`);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Convert map values back to array, maintaining priority order
|
|
51
|
+
allFiles.push(...Array.from(fileMap.values()).sort((a, b) => a.priority - b.priority));
|
|
52
|
+
|
|
53
|
+
// Store files in database
|
|
54
|
+
let successCount = 0;
|
|
55
|
+
let errorCount = 0;
|
|
56
|
+
|
|
57
|
+
for (const file of allFiles) {
|
|
58
|
+
const result = await dbManager.getOperations().addCopy(file);
|
|
59
|
+
if (result.success) {
|
|
60
|
+
successCount++;
|
|
61
|
+
} else {
|
|
62
|
+
errorCount++;
|
|
63
|
+
console.error(`Failed to add file ${file.path}: ${result.error}`);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
console.log(`Database operations: ${successCount} successful, ${errorCount} failed`);
|
|
68
|
+
|
|
69
|
+
console.log(`Scanned and stored ${allFiles.length} files (deduplicated with priority)`);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export { newController };
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import { createDatabaseManager } from '../database/index.js';
|
|
3
|
+
import { scanDirectory } from '../utils/index.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Resume Controller for @tb.p/dd
|
|
7
|
+
* Handles resuming from existing database - sets all rows to active=false,
|
|
8
|
+
* scans for files with matching extensions, sets existing matches to active=true,
|
|
9
|
+
* adds new files normally
|
|
10
|
+
*/
|
|
11
|
+
async function resumeController(processedOptions) {
|
|
12
|
+
const { resumeDb, verbose = false } = processedOptions;
|
|
13
|
+
|
|
14
|
+
if (!fs.existsSync(resumeDb)) {
|
|
15
|
+
console.error(`Error: Database file does not exist: ${resumeDb}`);
|
|
16
|
+
process.exit(1);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
if (verbose) {
|
|
20
|
+
console.log(`🔄 Resuming from database: ${resumeDb}`);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Initialize database manager
|
|
24
|
+
const dbManager = createDatabaseManager(resumeDb);
|
|
25
|
+
await dbManager.initialize();
|
|
26
|
+
|
|
27
|
+
try {
|
|
28
|
+
// 1. Load original parameters from meta table
|
|
29
|
+
const originalParams = await loadOriginalParameters(dbManager);
|
|
30
|
+
|
|
31
|
+
// 2. Merge with CLI overrides
|
|
32
|
+
const mergedParams = mergeParameters(originalParams, processedOptions);
|
|
33
|
+
|
|
34
|
+
if (verbose) {
|
|
35
|
+
console.log('📋 Loaded original parameters from database');
|
|
36
|
+
console.log('🔧 Applied CLI overrides');
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// 3. Set all rows to active=false
|
|
40
|
+
await setAllRowsInactive(dbManager, verbose);
|
|
41
|
+
|
|
42
|
+
// 4. Scan directory for files with matching extensions
|
|
43
|
+
await scanAndSetActiveFiles(dbManager, mergedParams, verbose);
|
|
44
|
+
|
|
45
|
+
// 5. Show final statistics using the new two-step approach
|
|
46
|
+
if (verbose) {
|
|
47
|
+
const stats = await dbManager.getOperations().getResumeStatistics();
|
|
48
|
+
console.log(`📊 Database statistics after resume:`);
|
|
49
|
+
console.log(` - Total files: ${stats.totalFiles}`);
|
|
50
|
+
console.log(` - Active files: ${stats.activeFiles}`);
|
|
51
|
+
console.log(` - Unhashed active files: ${stats.unhashedActiveFiles}`);
|
|
52
|
+
console.log(` - Size duplicate groups: ${stats.sizeDuplicateGroups}`);
|
|
53
|
+
console.log(` - Hash duplicate groups: ${stats.hashDuplicateGroups}`);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// 6. Add any new files if targets changed
|
|
57
|
+
if (hasNewTargets(originalParams, mergedParams)) {
|
|
58
|
+
await addNewFiles(dbManager, mergedParams, verbose);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
if (verbose) {
|
|
62
|
+
console.log('✅ Resume completed successfully');
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
} catch (error) {
|
|
66
|
+
console.error(`❌ Resume failed: ${error.message}`);
|
|
67
|
+
throw error;
|
|
68
|
+
} finally {
|
|
69
|
+
await dbManager.close();
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Load original parameters from database meta table
|
|
75
|
+
* @param {Object} dbManager - Database manager instance
|
|
76
|
+
* @returns {Promise<Object>} Original parameters
|
|
77
|
+
*/
|
|
78
|
+
async function loadOriginalParameters(dbManager) {
|
|
79
|
+
const meta = await dbManager.getOperations().getAllMeta();
|
|
80
|
+
|
|
81
|
+
// Convert stored strings back to appropriate types
|
|
82
|
+
return {
|
|
83
|
+
targets: meta.targets ? meta.targets.split('|') : [],
|
|
84
|
+
extensions: meta.extensions ? meta.extensions.split(',') : [],
|
|
85
|
+
minSize: parseInt(meta.min_size) || 0,
|
|
86
|
+
maxSize: parseInt(meta.max_size) || 0,
|
|
87
|
+
keepStrategy: meta.keep_strategy || 'priority',
|
|
88
|
+
hashAlgorithm: meta.hash_algorithm || 'blake3',
|
|
89
|
+
recursive: meta.recursive === 'true',
|
|
90
|
+
excludeSystem: meta.exclude_system === 'true',
|
|
91
|
+
excludeHidden: meta.exclude_hidden === 'true',
|
|
92
|
+
batchSize: parseInt(meta.batch_size) || 100,
|
|
93
|
+
maxConcurrency: parseInt(meta.max_concurrency) || 5,
|
|
94
|
+
move: meta.move === 'true',
|
|
95
|
+
preservePaths: meta.preserve_paths !== 'false'
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Merge original parameters with CLI overrides
|
|
101
|
+
* @param {Object} original - Original parameters from database
|
|
102
|
+
* @param {Object} cli - CLI parameters
|
|
103
|
+
* @returns {Object} Merged parameters
|
|
104
|
+
*/
|
|
105
|
+
function mergeParameters(original, cli) {
|
|
106
|
+
return {
|
|
107
|
+
...original,
|
|
108
|
+
...(cli.targets && { targets: cli.targets }),
|
|
109
|
+
...(cli.extensions && { extensions: cli.extensions }),
|
|
110
|
+
...(cli.minSize !== undefined && { minSize: cli.minSize }),
|
|
111
|
+
...(cli.maxSize !== undefined && { maxSize: cli.maxSize }),
|
|
112
|
+
...(cli.keepStrategy && { keepStrategy: cli.keepStrategy }),
|
|
113
|
+
...(cli.hashAlgorithm && { hashAlgorithm: cli.hashAlgorithm }),
|
|
114
|
+
...(cli.batchSize && { batchSize: cli.batchSize }),
|
|
115
|
+
...(cli.maxConcurrency && { maxConcurrency: cli.maxConcurrency }),
|
|
116
|
+
...(cli.move !== undefined && { move: cli.move }),
|
|
117
|
+
...(cli.preservePaths !== undefined && { preservePaths: cli.preservePaths }),
|
|
118
|
+
...(cli.verbose !== undefined && { verbose: cli.verbose })
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Set all rows to active=false
|
|
124
|
+
* @param {Object} dbManager - Database manager instance
|
|
125
|
+
* @param {boolean} verbose - Whether to show verbose output
|
|
126
|
+
*/
|
|
127
|
+
async function setAllRowsInactive(dbManager, verbose) {
|
|
128
|
+
if (verbose) {
|
|
129
|
+
console.log('🔄 Setting all rows to inactive...');
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const result = await dbManager.getOperations().setAllActive(false);
|
|
133
|
+
if (!result.success) {
|
|
134
|
+
throw new Error(`Failed to set all rows inactive: ${result.error}`);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (verbose) {
|
|
138
|
+
console.log(`✅ Set ${result.changes} rows to inactive`);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Scan directory for files with matching extensions and set existing matches to active=true
|
|
144
|
+
* @param {Object} dbManager - Database manager instance
|
|
145
|
+
* @param {Object} params - Processing parameters
|
|
146
|
+
* @param {boolean} verbose - Whether to show verbose output
|
|
147
|
+
*/
|
|
148
|
+
async function scanAndSetActiveFiles(dbManager, params, verbose) {
|
|
149
|
+
if (verbose) {
|
|
150
|
+
console.log('🔍 Scanning directory for files with matching extensions...');
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
let activatedCount = 0;
|
|
154
|
+
let newFileCount = 0;
|
|
155
|
+
|
|
156
|
+
// OPTIMIZATION: Load all existing file paths once (fixes N+1 query problem)
|
|
157
|
+
const existingFilePaths = await dbManager.getOperations().getExistingFilePaths();
|
|
158
|
+
|
|
159
|
+
for (const target of params.targets) {
|
|
160
|
+
try {
|
|
161
|
+
const files = await scanDirectory(target, params);
|
|
162
|
+
|
|
163
|
+
for (const file of files) {
|
|
164
|
+
// Check if file already exists using Set lookup (O(1) vs O(n))
|
|
165
|
+
if (existingFilePaths.has(file.path)) {
|
|
166
|
+
// Set existing file to active=true
|
|
167
|
+
const result = await dbManager.getOperations().setActiveByPath(file.path, true);
|
|
168
|
+
if (result.success) {
|
|
169
|
+
activatedCount++;
|
|
170
|
+
}
|
|
171
|
+
} else {
|
|
172
|
+
// Add new file normally
|
|
173
|
+
const result = await dbManager.getOperations().addCopy({
|
|
174
|
+
...file,
|
|
175
|
+
dirGroup: target,
|
|
176
|
+
priority: params.targets.indexOf(target)
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
if (result.success) {
|
|
180
|
+
newFileCount++;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
} catch (error) {
|
|
185
|
+
if (verbose) {
|
|
186
|
+
console.warn(`⚠️ Warning: Failed to scan directory ${target}: ${error.message}`);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
if (verbose) {
|
|
192
|
+
console.log(`✅ Activated ${activatedCount} existing files`);
|
|
193
|
+
console.log(`📄 Added ${newFileCount} new files`);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Check if there are new targets to scan
|
|
199
|
+
* @param {Object} original - Original parameters
|
|
200
|
+
* @param {Object} merged - Merged parameters
|
|
201
|
+
* @returns {boolean} Whether there are new targets
|
|
202
|
+
*/
|
|
203
|
+
function hasNewTargets(original, merged) {
|
|
204
|
+
if (!original.targets || !merged.targets) return false;
|
|
205
|
+
|
|
206
|
+
// Check if any targets are different
|
|
207
|
+
const originalSet = new Set(original.targets);
|
|
208
|
+
const mergedSet = new Set(merged.targets);
|
|
209
|
+
|
|
210
|
+
return originalSet.size !== mergedSet.size ||
|
|
211
|
+
[...originalSet].some(target => !mergedSet.has(target));
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Add new files from changed targets (simplified version)
|
|
216
|
+
* @param {Object} dbManager - Database manager instance
|
|
217
|
+
* @param {Object} params - Processing parameters
|
|
218
|
+
* @param {boolean} verbose - Whether to show verbose output
|
|
219
|
+
*/
|
|
220
|
+
async function addNewFiles(dbManager, params, verbose) {
|
|
221
|
+
if (verbose) {
|
|
222
|
+
console.log('📁 Adding new files from changed targets...');
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// This is now handled in scanAndSetActiveFiles, but keeping for compatibility
|
|
226
|
+
if (verbose) {
|
|
227
|
+
console.log('ℹ️ New files are handled during directory scanning');
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
export { resumeController };
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
# Database Module
|
|
2
|
+
|
|
3
|
+
This module provides comprehensive database functionality for the file deduplication tool, including initialization, validation, CRUD operations, and utilities.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The database module uses SQLite to store deduplication data, enabling resume functionality and persistent tracking of duplicate files across multiple runs.
|
|
8
|
+
|
|
9
|
+
## Files
|
|
10
|
+
|
|
11
|
+
- `dbConnection.js` - Database connection and initialization
|
|
12
|
+
- `dbValidator.js` - Database validation and integrity checks
|
|
13
|
+
- `dbOperations.js` - CRUD operations for all tables
|
|
14
|
+
- `dbUtils.js` - High-level utilities and helper functions
|
|
15
|
+
- `index.js` - Unified module exports
|
|
16
|
+
- `example.js` - Usage examples
|
|
17
|
+
|
|
18
|
+
## Quick Start
|
|
19
|
+
|
|
20
|
+
```javascript
|
|
21
|
+
const { createDatabaseManager } = require('./database');
|
|
22
|
+
|
|
23
|
+
// Create and initialize database
|
|
24
|
+
const db = createDatabaseManager('./dedupe.sqlite');
|
|
25
|
+
await db.initialize();
|
|
26
|
+
|
|
27
|
+
// Use database operations
|
|
28
|
+
await db.getOperations().setMeta('targets', '/path/to/files');
|
|
29
|
+
await db.getOperations().addCopy({
|
|
30
|
+
path: '/path/to/files/image.jpg',
|
|
31
|
+
name: 'image.jpg',
|
|
32
|
+
extension: 'jpg',
|
|
33
|
+
size: 1024000,
|
|
34
|
+
dirGroup: '/path/to/files'
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
// Close when done
|
|
38
|
+
await db.close();
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Components
|
|
42
|
+
|
|
43
|
+
### DatabaseConnection
|
|
44
|
+
|
|
45
|
+
Handles SQLite database connection, table creation, and basic operations.
|
|
46
|
+
|
|
47
|
+
```javascript
|
|
48
|
+
const { createConnection } = require('./database');
|
|
49
|
+
const db = createConnection('./dedupe.sqlite');
|
|
50
|
+
await db.connect();
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### DatabaseValidator
|
|
54
|
+
|
|
55
|
+
Validates database structure, data integrity, and file system consistency.
|
|
56
|
+
|
|
57
|
+
```javascript
|
|
58
|
+
const { createValidator } = require('./database');
|
|
59
|
+
const validator = createValidator(db);
|
|
60
|
+
const validation = await validator.validateAll();
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### DatabaseOperations
|
|
64
|
+
|
|
65
|
+
Provides CRUD operations for meta, data, and copies tables.
|
|
66
|
+
|
|
67
|
+
```javascript
|
|
68
|
+
const { createOperations } = require('./database');
|
|
69
|
+
const ops = createOperations(db);
|
|
70
|
+
|
|
71
|
+
// Meta operations
|
|
72
|
+
await ops.setMeta('key', 'value');
|
|
73
|
+
const value = await ops.getMeta('key');
|
|
74
|
+
|
|
75
|
+
// Data operations
|
|
76
|
+
const dataResult = await ops.createData('hash123', 1024);
|
|
77
|
+
const data = await ops.getDataByHash('hash123');
|
|
78
|
+
|
|
79
|
+
// Copies operations
|
|
80
|
+
await ops.addCopy(fileInfo);
|
|
81
|
+
const copies = await ops.getUnhashedCopies();
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### DatabaseUtils
|
|
85
|
+
|
|
86
|
+
High-level utilities for database management, export/import, and cleanup.
|
|
87
|
+
|
|
88
|
+
```javascript
|
|
89
|
+
const { createUtils } = require('./database');
|
|
90
|
+
const utils = createUtils('./dedupe.sqlite');
|
|
91
|
+
await utils.initialize();
|
|
92
|
+
|
|
93
|
+
// Get database info
|
|
94
|
+
const info = await utils.getDatabaseInfo();
|
|
95
|
+
|
|
96
|
+
// Export to CSV
|
|
97
|
+
await utils.exportToCSV('./export.csv');
|
|
98
|
+
|
|
99
|
+
// Cleanup database
|
|
100
|
+
await utils.cleanupDatabase();
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Database Schema
|
|
104
|
+
|
|
105
|
+
### Meta Table
|
|
106
|
+
Stores configuration parameters from deduplication runs.
|
|
107
|
+
|
|
108
|
+
| Column | Type | Description |
|
|
109
|
+
|--------|------|-------------|
|
|
110
|
+
| key | TEXT PRIMARY KEY | Configuration parameter name |
|
|
111
|
+
| value | TEXT | Configuration parameter value |
|
|
112
|
+
|
|
113
|
+
### Copies Table
|
|
114
|
+
Stores file paths and their metadata for deduplication tracking.
|
|
115
|
+
|
|
116
|
+
| Column | Type | Description |
|
|
117
|
+
|--------|------|-------------|
|
|
118
|
+
| id | INTEGER PRIMARY KEY AUTOINCREMENT | Unique incrementing identifier |
|
|
119
|
+
| dir_group | TEXT | Directory group identifier (source directory from --targets) |
|
|
120
|
+
| file_path | TEXT NOT NULL | Full path to the file |
|
|
121
|
+
| file_name | TEXT NOT NULL | Just the filename |
|
|
122
|
+
| file_extension | TEXT | File extension (e.g., 'jpg', 'png', 'pdf') |
|
|
123
|
+
| file_size | INTEGER NOT NULL | File size in bytes |
|
|
124
|
+
| file_hash | TEXT | Content hash (BLAKE3, SHA-256, MD5, etc.) |
|
|
125
|
+
| active | BOOLEAN | Whether the file is active in processing |
|
|
126
|
+
| priority | INTEGER | Processing priority (higher numbers = higher priority) |
|
|
127
|
+
|
|
128
|
+
## Usage Examples
|
|
129
|
+
|
|
130
|
+
### Initialize New Database
|
|
131
|
+
|
|
132
|
+
```javascript
|
|
133
|
+
const { createDatabaseManager } = require('./database');
|
|
134
|
+
|
|
135
|
+
const db = createDatabaseManager('./new-db.sqlite');
|
|
136
|
+
await db.initialize();
|
|
137
|
+
|
|
138
|
+
// Set configuration
|
|
139
|
+
await db.getOperations().setMeta('targets', '/path/to/files');
|
|
140
|
+
await db.getOperations().setMeta('extensions', 'jpg,png,gif');
|
|
141
|
+
await db.getOperations().setMeta('hash_algorithm', 'blake3');
|
|
142
|
+
|
|
143
|
+
await db.close();
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Resume From Existing Database
|
|
147
|
+
|
|
148
|
+
```javascript
|
|
149
|
+
const { createDatabaseManager } = require('./database');
|
|
150
|
+
|
|
151
|
+
const db = createDatabaseManager('./existing-db.sqlite');
|
|
152
|
+
await db.initialize();
|
|
153
|
+
|
|
154
|
+
// Validate database
|
|
155
|
+
const validation = await db.getValidator().validateAll();
|
|
156
|
+
if (!validation.valid) {
|
|
157
|
+
console.error('Database validation failed:', validation.errors);
|
|
158
|
+
process.exit(1);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Get existing configuration
|
|
162
|
+
const config = await db.getOperations().getAllMeta();
|
|
163
|
+
console.log('Configuration:', config);
|
|
164
|
+
|
|
165
|
+
// Continue processing...
|
|
166
|
+
await db.close();
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### Process Files
|
|
170
|
+
|
|
171
|
+
```javascript
|
|
172
|
+
const { createDatabaseManager } = require('./database');
|
|
173
|
+
|
|
174
|
+
const db = createDatabaseManager('./dedupe.sqlite');
|
|
175
|
+
await db.initialize();
|
|
176
|
+
|
|
177
|
+
// Add files to database
|
|
178
|
+
const files = [
|
|
179
|
+
{ path: '/path/file1.jpg', name: 'file1.jpg', extension: 'jpg', size: 1024, dirGroup: '/path' },
|
|
180
|
+
{ path: '/path/file2.jpg', name: 'file2.jpg', extension: 'jpg', size: 1024, dirGroup: '/path' }
|
|
181
|
+
];
|
|
182
|
+
|
|
183
|
+
for (const file of files) {
|
|
184
|
+
await db.getOperations().addCopy(file);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Calculate hashes and update files
|
|
188
|
+
const unhashedFiles = await db.getOperations().getUnhashedCopies();
|
|
189
|
+
for (const file of unhashedFiles) {
|
|
190
|
+
const hash = calculateHash(file.file_path); // Your hash function
|
|
191
|
+
await db.getOperations().updateFileHash(file.id, hash);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Find duplicates
|
|
195
|
+
const duplicates = await db.getOperations().getDuplicateGroups();
|
|
196
|
+
console.log('Found', duplicates.length, 'duplicate groups');
|
|
197
|
+
|
|
198
|
+
await db.close();
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### Export and Cleanup
|
|
202
|
+
|
|
203
|
+
```javascript
|
|
204
|
+
const { createDatabaseManager } = require('./database');
|
|
205
|
+
|
|
206
|
+
const db = createDatabaseManager('./dedupe.sqlite');
|
|
207
|
+
await db.initialize();
|
|
208
|
+
|
|
209
|
+
// Export to CSV
|
|
210
|
+
await db.getUtils().exportToCSV('./duplicates.csv');
|
|
211
|
+
|
|
212
|
+
// Get statistics
|
|
213
|
+
const stats = await db.getOperations().getStatistics();
|
|
214
|
+
console.log('Statistics:', stats);
|
|
215
|
+
|
|
216
|
+
// Cleanup orphaned data
|
|
217
|
+
await db.getUtils().cleanupDatabase();
|
|
218
|
+
|
|
219
|
+
await db.close();
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
## Error Handling
|
|
223
|
+
|
|
224
|
+
All database operations return results with success/error information:
|
|
225
|
+
|
|
226
|
+
```javascript
|
|
227
|
+
const result = await db.getOperations().setMeta('key', 'value');
|
|
228
|
+
if (!result.success) {
|
|
229
|
+
console.error('Error:', result.error);
|
|
230
|
+
}
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
## Transactions
|
|
234
|
+
|
|
235
|
+
Use transactions for atomic operations:
|
|
236
|
+
|
|
237
|
+
```javascript
|
|
238
|
+
await db.getConnection().beginTransaction();
|
|
239
|
+
try {
|
|
240
|
+
// Multiple operations
|
|
241
|
+
await db.getOperations().setMeta('key1', 'value1');
|
|
242
|
+
await db.getOperations().setMeta('key2', 'value2');
|
|
243
|
+
await db.getConnection().commit();
|
|
244
|
+
} catch (error) {
|
|
245
|
+
await db.getConnection().rollback();
|
|
246
|
+
throw error;
|
|
247
|
+
}
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
## Performance Considerations
|
|
251
|
+
|
|
252
|
+
- Use indexes for better query performance
|
|
253
|
+
- Batch operations when possible
|
|
254
|
+
- Use transactions for multiple related operations
|
|
255
|
+
- Clean up orphaned data regularly
|
|
256
|
+
- Consider database size limits for very large file sets
|
|
257
|
+
|
|
258
|
+
## Dependencies
|
|
259
|
+
|
|
260
|
+
- `sqlite3` - SQLite database driver
|
|
261
|
+
- `fs` - File system operations
|
|
262
|
+
- `path` - Path utilities
|