@arela/uploader 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,516 @@
1
+ import cliProgress from 'cli-progress';
2
+ import { globbyStream } from 'globby';
3
+ import path from 'path';
4
+ import { Transform } from 'stream';
5
+ import { pipeline } from 'stream/promises';
6
+
7
+ import logger from '../services/LoggingService.js';
8
+
9
+ import appConfig from '../config/config.js';
10
+ import ErrorHandler from '../errors/ErrorHandler.js';
11
+ import { ConfigurationError } from '../errors/ErrorTypes.js';
12
+ import FileOperations from '../utils/FileOperations.js';
13
+
14
+ /**
15
+ * Scan Command Handler
16
+ * Handles the optimized arela scan command with streaming support
17
+ */
18
+ export class ScanCommand {
19
+ constructor() {
20
+ this.errorHandler = new ErrorHandler(logger);
21
+ this.scanApiService = null; // Will be initialized in execute
22
+ }
23
+
24
+ /**
25
+ * Execute the scan command
26
+ * @param {Object} options - Command options
27
+ * @param {boolean} options.countFirst - Count files first for percentage-based progress
28
+ */
29
+ async execute(options = {}) {
30
+ const startTime = Date.now();
31
+
32
+ try {
33
+ // Validate scan configuration
34
+ appConfig.validateScanConfig();
35
+
36
+ // Import ScanApiService dynamically
37
+ const { default: ScanApiService } = await import(
38
+ '../services/ScanApiService.js'
39
+ );
40
+ this.scanApiService = new ScanApiService();
41
+
42
+ const scanConfig = appConfig.getScanConfig();
43
+ const basePath = appConfig.getBasePath();
44
+
45
+ logger.info('šŸ” Starting arela scan command');
46
+ logger.info(`šŸ“¦ Company: ${scanConfig.companySlug}`);
47
+ logger.info(`šŸ–„ļø Server: ${scanConfig.serverId}`);
48
+ logger.info(`šŸ“‚ Base Path: ${basePath}`);
49
+ logger.info(`šŸ·ļø Label: ${scanConfig.basePathLabel}`);
50
+ logger.info(`šŸ“Š Directory Level: ${scanConfig.directoryLevel}`);
51
+
52
+ // Step 1: Discover directories at specified level
53
+ logger.info('\nšŸ” Discovering directories...');
54
+ const directories = await this.#discoverDirectories(
55
+ basePath,
56
+ scanConfig.directoryLevel,
57
+ );
58
+ logger.info(
59
+ `šŸ“ Found ${directories.length} director${directories.length === 1 ? 'y' : 'ies'} to scan`,
60
+ );
61
+
62
+ // Step 2: Register instances for each directory
63
+ logger.info('\nšŸ“ Registering scan instances...');
64
+ const registrations = [];
65
+ for (const dir of directories) {
66
+ const dirLabel = dir.label
67
+ ? `${scanConfig.basePathLabel}_${dir.label.replace(/[^a-zA-Z0-9_-]/g, '_')}`
68
+ : scanConfig.basePathLabel;
69
+
70
+ const registration = await this.scanApiService.registerInstance({
71
+ companySlug: scanConfig.companySlug,
72
+ serverId: scanConfig.serverId,
73
+ basePathLabel: dirLabel,
74
+ basePathFull: dir.path,
75
+ });
76
+
77
+ registrations.push({ ...registration, directory: dir });
78
+
79
+ if (registration.existed) {
80
+ logger.info(` āœ“ ${dir.label || 'root'}: ${registration.tableName}`);
81
+ } else {
82
+ logger.success(
83
+ ` āœ“ ${dir.label || 'root'}: ${registration.tableName} (new)`,
84
+ );
85
+ }
86
+ }
87
+
88
+ // Optional: Count files first for percentage-based progress
89
+ let totalFiles = null;
90
+ if (options.countFirst) {
91
+ logger.info('\nšŸ”¢ Counting files...');
92
+ totalFiles = await this.#countFiles(basePath, scanConfig);
93
+ logger.info(`šŸ“Š Found ${totalFiles.toLocaleString()} files to scan`);
94
+ }
95
+
96
+ // Step 3: Stream files and upload stats for each directory
97
+ logger.info('\nšŸš€ Starting file scan...');
98
+ let totalStats = {
99
+ filesScanned: 0,
100
+ filesInserted: 0,
101
+ filesSkipped: 0,
102
+ totalSize: 0,
103
+ };
104
+
105
+ for (const reg of registrations) {
106
+ logger.info(`\nšŸ“‚ Scanning: ${reg.directory.label || 'root'}`);
107
+ const stats = await this.#streamScanDirectory(
108
+ reg.directory.path,
109
+ scanConfig,
110
+ reg.tableName,
111
+ null, // Don't use percentage for individual directories
112
+ );
113
+
114
+ // Step 4: Complete scan for this directory
115
+ await this.scanApiService.completeScan({
116
+ tableName: reg.tableName,
117
+ totalFiles: stats.filesScanned,
118
+ totalSizeBytes: stats.totalSize,
119
+ });
120
+
121
+ totalStats.filesScanned += stats.filesScanned;
122
+ totalStats.filesInserted += stats.filesInserted;
123
+ totalStats.filesSkipped += stats.filesSkipped;
124
+ totalStats.totalSize += stats.totalSize;
125
+ }
126
+
127
+ const duration = ((Date.now() - startTime) / 1000).toFixed(2);
128
+ const filesPerSec = (totalStats.filesScanned / duration).toFixed(2);
129
+
130
+ logger.success('\nāœ… Scan completed successfully!');
131
+ logger.info(`\nšŸ“Š Scan Statistics:`);
132
+ logger.info(` Directories scanned: ${registrations.length}`);
133
+ logger.info(
134
+ ` Files scanned: ${totalStats.filesScanned.toLocaleString()}`,
135
+ );
136
+ logger.info(
137
+ ` Files inserted: ${totalStats.filesInserted.toLocaleString()}`,
138
+ );
139
+ logger.info(
140
+ ` Files skipped: ${totalStats.filesSkipped.toLocaleString()} (excluded patterns)`,
141
+ );
142
+ logger.info(` Total size: ${this.#formatBytes(totalStats.totalSize)}`);
143
+ logger.info(` Duration: ${duration}s`);
144
+ logger.info(` Throughput: ${filesPerSec} files/sec`);
145
+ logger.info(`\nšŸ“‹ Tables created:`);
146
+ for (const reg of registrations) {
147
+ logger.info(` - ${reg.tableName}`);
148
+ }
149
+
150
+ return {
151
+ success: true,
152
+ tables: registrations.map((r) => r.tableName),
153
+ stats: totalStats,
154
+ };
155
+ } catch (error) {
156
+ this.errorHandler.handleError(error, 'scan');
157
+ return {
158
+ success: false,
159
+ error: error.message,
160
+ stats: {
161
+ filesScanned: 0,
162
+ filesInserted: 0,
163
+ filesSkipped: 0,
164
+ totalSize: 0,
165
+ },
166
+ };
167
+ }
168
+ }
169
+
170
+ /**
171
+ * Discover directories at specified level
172
+ * @private
173
+ */
174
+ async #discoverDirectories(basePath, level) {
175
+ // Get sources, defaults to ['.'] if not configured
176
+ const sources = appConfig.getUploadSources();
177
+ const isDefaultSource = sources.length === 1 && sources[0] === '.';
178
+
179
+ if (level === 0) {
180
+ // Level 0: Create one entry per source
181
+ return sources.map((source) => {
182
+ const sourcePath =
183
+ source === '.' ? basePath : path.resolve(basePath, source);
184
+ const label =
185
+ source === '.' ? '' : source.replace(/[^a-zA-Z0-9_-]/g, '_');
186
+ return { path: sourcePath, label };
187
+ });
188
+ }
189
+
190
+ // For level > 0: First discover directories at the base path, then combine with sources
191
+ const fs = await import('fs/promises');
192
+ const directories = [];
193
+
194
+ try {
195
+ const fs = await import('fs/promises');
196
+
197
+ // Step 1: Discover directories at the specified level from base path
198
+ const levelDirs = await this.#getDirectoriesAtLevel(basePath, level, '');
199
+
200
+ // Step 2: For each discovered directory, create entries for each source
201
+ for (const levelDir of levelDirs) {
202
+ for (const source of sources) {
203
+ if (source === '.') {
204
+ // Source is current directory, use discovered path as-is
205
+ directories.push(levelDir);
206
+ } else {
207
+ // Append source to both path and label
208
+ const combinedPath = path.resolve(levelDir.path, source);
209
+ const sourceLabel = source.replace(/[^a-zA-Z0-9_-]/g, '_');
210
+ const combinedLabel = levelDir.label
211
+ ? `${levelDir.label}_${sourceLabel}`
212
+ : sourceLabel;
213
+
214
+ // Only add if the combined path actually exists
215
+ try {
216
+ const stats = await fs.stat(combinedPath);
217
+ if (stats.isDirectory()) {
218
+ directories.push({
219
+ path: combinedPath,
220
+ label: combinedLabel,
221
+ });
222
+ } else {
223
+ logger.debug(`ā­ļø Skipping ${combinedPath} (not a directory)`);
224
+ }
225
+ } catch (error) {
226
+ logger.debug(`ā­ļø Skipping ${combinedPath} (does not exist)`);
227
+ }
228
+ }
229
+ }
230
+ }
231
+ } catch (error) {
232
+ logger.warn(`āš ļø Could not discover directories: ${error.message}`);
233
+ }
234
+
235
+ return directories;
236
+ }
237
+
238
+ /**
239
+ * Recursively get directories at specified level
240
+ * @private
241
+ */
242
+ async #getDirectoriesAtLevel(
243
+ basePath,
244
+ targetLevel,
245
+ currentPath,
246
+ currentLevel = 0,
247
+ ) {
248
+ const fs = await import('fs/promises');
249
+ const fullPath = path.join(basePath, currentPath);
250
+
251
+ if (currentLevel === targetLevel) {
252
+ const label = currentPath.replace(/\\/g, '/').replace(/^\//g, '');
253
+ return [{ path: fullPath, label: label || 'root' }];
254
+ }
255
+
256
+ const directories = [];
257
+ const entries = await fs.readdir(fullPath, { withFileTypes: true });
258
+
259
+ for (const entry of entries) {
260
+ if (entry.isDirectory()) {
261
+ const subPath = path.join(currentPath, entry.name);
262
+ const subDirs = await this.#getDirectoriesAtLevel(
263
+ basePath,
264
+ targetLevel,
265
+ subPath,
266
+ currentLevel + 1,
267
+ );
268
+ directories.push(...subDirs);
269
+ }
270
+ }
271
+
272
+ return directories;
273
+ }
274
+
275
+ /**
276
+ * Count files for percentage-based progress
277
+ * @private
278
+ */
279
+ async #countFiles(basePath, scanConfig) {
280
+ const sources = appConfig.getUploadSources();
281
+ let totalCount = 0;
282
+
283
+ for (const source of sources) {
284
+ const sourcePath = path.resolve(basePath, source);
285
+ const files = await globbyStream('**/*', {
286
+ cwd: sourcePath,
287
+ onlyFiles: true,
288
+ absolute: true,
289
+ });
290
+
291
+ for await (const file of files) {
292
+ if (!this.#shouldExcludeFile(file, scanConfig.excludePatterns)) {
293
+ totalCount++;
294
+ }
295
+ }
296
+ }
297
+
298
+ return totalCount;
299
+ }
300
+
301
+ /**
302
+ * Stream files from a single directory and upload stats in batches
303
+ * @private
304
+ */
305
+ async #streamScanDirectory(
306
+ dirPath,
307
+ scanConfig,
308
+ tableName,
309
+ totalFiles = null,
310
+ ) {
311
+ // For directory-level scanning, we scan the directory directly
312
+ const batchSize = scanConfig.batchSize || 2000;
313
+ const scanTimestamp = new Date().toISOString();
314
+
315
+ let filesScanned = 0;
316
+ let filesInserted = 0;
317
+ let filesSkipped = 0;
318
+ let totalSize = 0;
319
+ let currentBatch = [];
320
+
321
+ // Create progress bar
322
+ const progressBar = this.#createProgressBar(totalFiles);
323
+
324
+ try {
325
+ // Create stream with stats option
326
+ const fileStream = globbyStream('**/*', {
327
+ cwd: dirPath,
328
+ onlyFiles: true,
329
+ absolute: true,
330
+ stats: true, // Get file stats during discovery
331
+ });
332
+
333
+ // Process each file from stream
334
+ for await (const entry of fileStream) {
335
+ // globby with stats:true returns {path, stats} objects
336
+ const filePath = typeof entry === 'string' ? entry : entry.path;
337
+ const stats = typeof entry === 'object' ? entry.stats : null;
338
+
339
+ // Check if file should be excluded
340
+ if (this.#shouldExcludeFile(filePath, scanConfig.excludePatterns)) {
341
+ filesSkipped++;
342
+ continue;
343
+ }
344
+
345
+ // Get file stats (use from globby or fetch manually)
346
+ const fileStats = stats || FileOperations.getFileStats(filePath);
347
+ if (!fileStats) {
348
+ logger.debug(`āš ļø Could not read stats: ${filePath}`);
349
+ filesSkipped++;
350
+ continue;
351
+ }
352
+
353
+ // Normalize file record
354
+ const record = this.#normalizeFileRecord(
355
+ filePath,
356
+ fileStats,
357
+ dirPath,
358
+ scanTimestamp,
359
+ );
360
+
361
+ currentBatch.push(record);
362
+ filesScanned++;
363
+ totalSize += record.sizeBytes;
364
+
365
+ // Update progress
366
+ if (totalFiles) {
367
+ progressBar.update(filesScanned);
368
+ } else {
369
+ // Show throughput instead of percentage
370
+ const elapsed = (Date.now() - progressBar.startTime) / 1000;
371
+ const rate = (filesScanned / elapsed).toFixed(1);
372
+ progressBar.update(filesScanned, { rate });
373
+ }
374
+
375
+ // Upload batch when full
376
+ if (currentBatch.length >= batchSize) {
377
+ const inserted = await this.#uploadBatch(tableName, currentBatch);
378
+ filesInserted += inserted;
379
+ currentBatch = [];
380
+ }
381
+ }
382
+ } catch (error) {
383
+ logger.error(`āŒ Error scanning directory: ${error.message}`);
384
+ }
385
+
386
+ // Upload remaining files
387
+ if (currentBatch.length > 0) {
388
+ const inserted = await this.#uploadBatch(tableName, currentBatch);
389
+ filesInserted += inserted;
390
+ }
391
+
392
+ progressBar.stop();
393
+
394
+ return {
395
+ filesScanned,
396
+ filesInserted,
397
+ filesSkipped,
398
+ totalSize,
399
+ };
400
+ }
401
+
402
+ /**
403
+ * Upload a batch of file records
404
+ * @private
405
+ */
406
+ async #uploadBatch(tableName, records) {
407
+ try {
408
+ const result = await this.scanApiService.batchInsertStats(
409
+ tableName,
410
+ records,
411
+ );
412
+ return result.inserted;
413
+ } catch (error) {
414
+ logger.error(`āŒ Failed to upload batch: ${error.message}`);
415
+ return 0;
416
+ }
417
+ }
418
+
419
+ /**
420
+ * Normalize file record for database insertion
421
+ * @private
422
+ */
423
+ #normalizeFileRecord(filePath, fileStats, basePath, scanTimestamp) {
424
+ const fileName = path.basename(filePath);
425
+ const fileExtension = path.extname(filePath).toLowerCase().replace('.', '');
426
+ const directoryPath = path.dirname(filePath);
427
+ const relativePath = path.relative(basePath, filePath);
428
+
429
+ return {
430
+ fileName,
431
+ fileExtension,
432
+ directoryPath,
433
+ relativePath,
434
+ absolutePath: filePath,
435
+ sizeBytes: Number(fileStats.size),
436
+ modifiedAt: fileStats.mtime.toISOString(),
437
+ scanTimestamp,
438
+ };
439
+ }
440
+
441
+ /**
442
+ * Check if file should be excluded based on patterns
443
+ * @private
444
+ */
445
+ #shouldExcludeFile(filePath, excludePatterns) {
446
+ const fileName = path.basename(filePath);
447
+
448
+ for (const pattern of excludePatterns) {
449
+ // Convert glob pattern to regex
450
+ const regexPattern = pattern
451
+ .replace(/\./g, '\\.') // Escape dots
452
+ .replace(/\*/g, '.*') // * to .*
453
+ .replace(/\?/g, '.'); // ? to .
454
+
455
+ const regex = new RegExp(`^${regexPattern}$`, 'i');
456
+ if (regex.test(fileName)) {
457
+ return true;
458
+ }
459
+ }
460
+
461
+ return false;
462
+ }
463
+
464
+ /**
465
+ * Create progress bar
466
+ * @private
467
+ */
468
+ #createProgressBar(totalFiles) {
469
+ if (totalFiles) {
470
+ // Percentage-based progress
471
+ const bar = new cliProgress.SingleBar(
472
+ {
473
+ format:
474
+ 'šŸ“Š Scanning |{bar}| {percentage}% | {value}/{total} files | {rate} files/sec',
475
+ barCompleteChar: '\u2588',
476
+ barIncompleteChar: '\u2591',
477
+ hideCursor: true,
478
+ },
479
+ cliProgress.Presets.shades_classic,
480
+ );
481
+ bar.start(totalFiles, 0, { rate: '0.0' });
482
+ bar.startTime = Date.now();
483
+ return bar;
484
+ } else {
485
+ // Throughput-based progress
486
+ const bar = new cliProgress.SingleBar(
487
+ {
488
+ format: 'šŸ“Š Scanning | {value} files | {rate} files/sec',
489
+ hideCursor: true,
490
+ clearOnComplete: false,
491
+ stopOnComplete: false,
492
+ },
493
+ cliProgress.Presets.legacy,
494
+ );
495
+ bar.start(0, 0, { rate: '0.0' });
496
+ bar.startTime = Date.now();
497
+ return bar;
498
+ }
499
+ }
500
+
501
+ /**
502
+ * Format bytes to human-readable size
503
+ * @private
504
+ */
505
+ #formatBytes(bytes) {
506
+ if (bytes === 0) return '0 Bytes';
507
+
508
+ const k = 1024;
509
+ const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB'];
510
+ const i = Math.floor(Math.log(bytes) / Math.log(k));
511
+
512
+ return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
513
+ }
514
+ }
515
+
516
+ export default new ScanCommand();