@arela/uploader 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/package.json +2 -3
  2. package/src/index.js +575 -217
  3. package/upload.log +1323 -158
package/src/index.js CHANGED
@@ -7,7 +7,6 @@ import fs from 'fs';
7
7
  import { globby } from 'globby';
8
8
  import mime from 'mime-types';
9
9
  import { createRequire } from 'module';
10
- import ora from 'ora';
11
10
  import path from 'path';
12
11
 
13
12
  const require = createRequire(import.meta.url);
@@ -27,60 +26,236 @@ const sources = process.env.UPLOAD_SOURCES?.split('|')
27
26
 
28
27
  const supabase = createClient(supabaseUrl, supabaseKey);
29
28
 
30
- // Enhanced sanitization function specifically for file names
29
+ // Pre-compiled regex patterns for better performance
30
+ const SANITIZATION_PATTERNS = [
31
+ // Character replacements (grouped for efficiency)
32
+ [/[áàâäãåāăą]/gi, 'a'],
33
+ [/[éèêëēĕėę]/gi, 'e'],
34
+ [/[íìîïīĭį]/gi, 'i'],
35
+ [/[óòôöõōŏő]/gi, 'o'],
36
+ [/[úùûüūŭů]/gi, 'u'],
37
+ [/[ñň]/gi, 'n'],
38
+ [/[ç]/gi, 'c'],
39
+ [/[ý]/gi, 'y'],
40
+ // Korean characters (compiled once)
41
+ [/[멕]/g, 'meok'],
42
+ [/[시]/g, 'si'],
43
+ [/[코]/g, 'ko'],
44
+ [/[용]/g, 'yong'],
45
+ [/[가-힣]/g, 'kr'],
46
+ // Unicode diacritics (after normalize)
47
+ [/[\u0300-\u036f]/g, ''],
48
+ // Problematic symbols
49
+ [/[\\?%*:|"<>[\]~`^]/g, '-'],
50
+ [/[{}]/g, '-'],
51
+ [/[&]/g, 'and'],
52
+ [/[()]/g, ''], // Remove parentheses
53
+ // Cleanup patterns
54
+ [/\s+/g, '-'], // Replace spaces with dashes
55
+ [/-+/g, '-'], // Replace multiple dashes with single dash
56
+ [/^-+|-+$/g, ''], // Remove leading/trailing dashes
57
+ [/^\.+/, ''], // Remove leading dots
58
+ [/[^\w.-]/g, ''], // Remove any remaining non-alphanumeric chars
59
+ ];
60
+
61
+ // Cache for sanitized filenames to avoid repeated processing
62
+ const sanitizationCache = new Map();
63
+
64
+ // Enhanced sanitization function with caching and pre-compiled regex
31
65
  const sanitizeFileName = (fileName) => {
66
+ // Check cache first
67
+ if (sanitizationCache.has(fileName)) {
68
+ return sanitizationCache.get(fileName);
69
+ }
70
+
32
71
  // Get file extension
33
72
  const ext = path.extname(fileName);
34
73
  const nameWithoutExt = path.basename(fileName, ext);
35
74
 
36
- // Replace problematic characters with safe alternatives
37
- let sanitized = nameWithoutExt
38
- // First, handle common character replacements
39
- .replace(/[áàâäãåāăą]/gi, 'a')
40
- .replace(/[éèêëēĕėę]/gi, 'e')
41
- .replace(/[íìîïīĭį]/gi, 'i')
42
- .replace(/[óòôöõōŏő]/gi, 'o')
43
- .replace(/[úùûüūŭů]/gi, 'u')
44
- .replace(/[ñň]/gi, 'n')
45
- .replace(/[ç]/gi, 'c')
46
- .replace(/[ý]/gi, 'y')
47
- // Handle Korean characters more comprehensively
48
- .replace(/[멕]/g, 'meok')
49
- .replace(/[시]/g, 'si')
50
- .replace(/[코]/g, 'ko')
51
- .replace(/[용]/g, 'yong')
52
- .replace(/[가-힣]/g, 'kr') // Replace any remaining Korean chars with 'kr'
53
- // Normalize unicode and remove diacritics
54
- .normalize('NFD')
55
- .replace(/[\u0300-\u036f]/g, '') // Remove diacritics
56
- // Replace problematic symbols
57
- .replace(/[\\?%*:|"<>[\]~`^]/g, '-')
58
- .replace(/[{}]/g, '-')
59
- .replace(/[&]/g, 'and')
60
- .replace(/[()]/g, '') // Remove parentheses
61
- // Clean up spaces and dashes
62
- .replace(/\s+/g, '-') // Replace spaces with dashes
63
- .replace(/-+/g, '-') // Replace multiple dashes with single dash
64
- .replace(/^-+|-+$/g, '') // Remove leading/trailing dashes
65
- .replace(/^\.+/, '') // Remove leading dots
66
- .replace(/[^\w.-]/g, ''); // Remove any remaining non-alphanumeric chars except dots and dashes
75
+ // Fast path for already clean filenames
76
+ if (/^[a-zA-Z0-9._-]+$/.test(nameWithoutExt)) {
77
+ const result = fileName;
78
+ sanitizationCache.set(fileName, result);
79
+ return result;
80
+ }
81
+
82
+ // Normalize unicode first (more efficient to do once)
83
+ let sanitized = nameWithoutExt.normalize('NFD');
84
+
85
+ // Apply all sanitization patterns
86
+ for (const [pattern, replacement] of SANITIZATION_PATTERNS) {
87
+ sanitized = sanitized.replace(pattern, replacement);
88
+ }
67
89
 
68
90
  // Ensure the filename is not empty
69
91
  if (!sanitized) {
70
92
  sanitized = 'unnamed_file';
71
93
  }
72
94
 
73
- return sanitized + ext;
95
+ const result = sanitized + ext;
96
+
97
+ // Cache the result for future use
98
+ sanitizationCache.set(fileName, result);
99
+
100
+ return result;
101
+ };
102
+
103
+ // Pre-compiled regex patterns for path sanitization
104
+ const PATH_SANITIZATION_PATTERNS = [
105
+ [/[\\?%*:|"<>[\]~]/g, '-'],
106
+ [/ +/g, ' '],
107
+ [/^\.+/, ''],
108
+ [/\/+/g, '/'],
109
+ ];
110
+
111
+ // Cache for sanitized paths
112
+ const pathSanitizationCache = new Map();
113
+
114
+ // Batch logging system for performance
115
+ class LogBatcher {
116
+ constructor(batchSize = 50, flushInterval = 5000) {
117
+ this.batch = [];
118
+ this.batchSize = batchSize;
119
+ this.flushInterval = flushInterval;
120
+ this.lastFlush = Date.now();
121
+ this.flushTimer = null;
122
+ }
123
+
124
+ add(logEntry) {
125
+ this.batch.push({
126
+ filename: path.basename(logEntry.file),
127
+ path: logEntry.uploadPath,
128
+ status: logEntry.status,
129
+ message: logEntry.message,
130
+ });
131
+
132
+ // Auto-flush if batch is full or enough time has passed
133
+ if (
134
+ this.batch.length >= this.batchSize ||
135
+ Date.now() - this.lastFlush > this.flushInterval
136
+ ) {
137
+ this.flush();
138
+ }
139
+ }
140
+
141
+ async flush() {
142
+ if (this.batch.length === 0) return;
143
+
144
+ const logsToSend = [...this.batch];
145
+ this.batch = [];
146
+ this.lastFlush = Date.now();
147
+
148
+ // Clear any pending timer
149
+ if (this.flushTimer) {
150
+ clearTimeout(this.flushTimer);
151
+ this.flushTimer = null;
152
+ }
153
+
154
+ try {
155
+ const { error } = await supabase.from('upload_logs').insert(logsToSend);
156
+ if (error) {
157
+ console.error(
158
+ `⚠️ Error saving batch of ${logsToSend.length} logs to Supabase: ${error.message}`,
159
+ );
160
+ // Re-add failed logs to batch for retry (optional)
161
+ this.batch.unshift(...logsToSend);
162
+ } else {
163
+ // Only show verbose output if requested
164
+ if (process.env.LOG_BATCH_VERBOSE === 'true') {
165
+ console.log(`📊 Flushed ${logsToSend.length} logs to Supabase`);
166
+ }
167
+ }
168
+ } catch (err) {
169
+ console.error(`⚠️ Error during batch flush: ${err.message}`);
170
+ // Re-add failed logs to batch for retry (optional)
171
+ this.batch.unshift(...logsToSend);
172
+ }
173
+ }
174
+
175
+ // Schedule auto-flush if not already scheduled
176
+ scheduleFlush() {
177
+ if (!this.flushTimer && this.batch.length > 0) {
178
+ this.flushTimer = setTimeout(() => {
179
+ this.flush();
180
+ }, this.flushInterval);
181
+ }
182
+ }
183
+
184
+ // Force flush all pending logs (called at end of process)
185
+ async forceFlush() {
186
+ if (this.flushTimer) {
187
+ clearTimeout(this.flushTimer);
188
+ this.flushTimer = null;
189
+ }
190
+ await this.flush();
191
+ }
192
+ }
193
+
194
+ // Global log batcher instance
195
+ const logBatcher = new LogBatcher();
196
+
197
+ // Function to manage cache size (prevent memory issues in long sessions)
198
+ const manageCaches = () => {
199
+ const MAX_CACHE_SIZE = 1000;
200
+
201
+ if (sanitizationCache.size > MAX_CACHE_SIZE) {
202
+ // Keep only the most recent 500 entries
203
+ const entries = Array.from(sanitizationCache.entries());
204
+ sanitizationCache.clear();
205
+ entries.slice(-500).forEach(([key, value]) => {
206
+ sanitizationCache.set(key, value);
207
+ });
208
+ }
209
+
210
+ if (pathSanitizationCache.size > MAX_CACHE_SIZE) {
211
+ const entries = Array.from(pathSanitizationCache.entries());
212
+ pathSanitizationCache.clear();
213
+ entries.slice(-500).forEach(([key, value]) => {
214
+ pathSanitizationCache.set(key, value);
215
+ });
216
+ }
74
217
  };
75
218
 
76
- const sanitizePath = (path) =>
77
- path
78
- .replace(/[\\?%*:|"<>[\]~]/g, '-')
79
- .replace(/ +/g, ' ')
80
- .replace(/^\.+/, '')
81
- .replace(/\/+/g, '/');
219
+ const sanitizePath = (inputPath) => {
220
+ // Check cache first
221
+ if (pathSanitizationCache.has(inputPath)) {
222
+ return pathSanitizationCache.get(inputPath);
223
+ }
224
+
225
+ // Fast path for already clean paths
226
+ if (!/[\\?%*:|"<>[\]~]|^ +|^\.+|\/\/+/.test(inputPath)) {
227
+ pathSanitizationCache.set(inputPath, inputPath);
228
+ return inputPath;
229
+ }
230
+
231
+ let sanitized = inputPath;
232
+
233
+ // Apply path sanitization patterns
234
+ for (const [pattern, replacement] of PATH_SANITIZATION_PATTERNS) {
235
+ sanitized = sanitized.replace(pattern, replacement);
236
+ }
237
+
238
+ // Cache the result
239
+ pathSanitizationCache.set(inputPath, sanitized);
240
+
241
+ return sanitized;
242
+ };
82
243
 
83
244
  const sendLogToSupabase = async ({ file, uploadPath, status, message }) => {
245
+ // Add to batch instead of sending immediately
246
+ logBatcher.add({ file, uploadPath, status, message });
247
+
248
+ // Schedule auto-flush if needed
249
+ logBatcher.scheduleFlush();
250
+ };
251
+
252
+ // Enhanced version for immediate sending (used for critical errors)
253
+ const sendLogToSupabaseImmediate = async ({
254
+ file,
255
+ uploadPath,
256
+ status,
257
+ message,
258
+ }) => {
84
259
  const { error } = await supabase.from('upload_logs').insert([
85
260
  {
86
261
  filename: path.basename(file),
@@ -91,7 +266,9 @@ const sendLogToSupabase = async ({ file, uploadPath, status, message }) => {
91
266
  ]);
92
267
 
93
268
  if (error) {
94
- console.error(`⚠️ Error saving the log to Supabase: ${error.message}`);
269
+ console.error(
270
+ `⚠️ Error saving immediate log to Supabase: ${error.message}`,
271
+ );
95
272
  }
96
273
  };
97
274
 
@@ -101,7 +278,7 @@ const checkCredentials = async () => {
101
278
  '⚠️ Missing Supabase credentials. Please set SUPABASE_URL, SUPABASE_KEY, and SUPABASE_BUCKET in your environment variables.',
102
279
  );
103
280
  writeLog('⚠️ Missing Supabase credentials.');
104
- await sendLogToSupabase({
281
+ await sendLogToSupabaseImmediate({
105
282
  file: 'Error',
106
283
  uploadPath: 'Error',
107
284
  status: 'error',
@@ -115,7 +292,7 @@ const checkCredentials = async () => {
115
292
  if (error) {
116
293
  console.error('⚠️ Error connecting to Supabase:', error.message);
117
294
  writeLog(`⚠️ Error connecting to Supabase: ${error.message}`);
118
- await sendLogToSupabase({
295
+ await sendLogToSupabaseImmediate({
119
296
  file: 'Error',
120
297
  uploadPath: 'Error',
121
298
  status: 'error',
@@ -126,7 +303,7 @@ const checkCredentials = async () => {
126
303
  } catch (err) {
127
304
  console.error('⚠️ Error:', err.message);
128
305
  writeLog(`⚠️ Error: ${err.message}`);
129
- await sendLogToSupabase({
306
+ await sendLogToSupabaseImmediate({
130
307
  file: 'Error',
131
308
  uploadPath: 'Error',
132
309
  status: 'error',
@@ -147,7 +324,7 @@ const fileExistsInBucket = async (pathInBucket) => {
147
324
  if (error) {
148
325
  console.error(`⚠️ Could not verify duplicate: ${error.message}`);
149
326
  writeLog(`⚠️ Could not verify duplicate: ${error.message}`);
150
- await sendLogToSupabase({
327
+ await sendLogToSupabaseImmediate({
151
328
  file: 'Error',
152
329
  uploadPath: 'Error',
153
330
  status: 'error',
@@ -164,7 +341,7 @@ const writeLog = (message) => {
164
341
  const timestamp = new Date().toISOString();
165
342
  fs.appendFileSync(logFilePath, `[${timestamp}] ${message}\n`);
166
343
  } catch (error) {
167
- console.error(`❌ Error writing to log file: ${error.message}`);
344
+ console.error(`❌ Error writing to log file: ${error.code} | ${error.message} | path: ${logFilePath}`);
168
345
  }
169
346
  };
170
347
 
@@ -277,6 +454,279 @@ const uploadWithRetry = async (uploadFn, maxRetries = 5, delayMs = 2000) => {
277
454
  };
278
455
  };
279
456
 
457
+ // Function to process a single file
458
+ const processFile = async (
459
+ file,
460
+ options,
461
+ basePath,
462
+ folder,
463
+ sourcePath,
464
+ processedPaths,
465
+ ) => {
466
+ let currentFile = file;
467
+ let result = {
468
+ success: false,
469
+ skipped: false,
470
+ error: null,
471
+ message: '',
472
+ };
473
+
474
+ try {
475
+ // Check if we need to rename the file
476
+ if (options.renameFiles) {
477
+ const originalName = path.basename(file);
478
+ const sanitizedName = sanitizeFileName(originalName);
479
+
480
+ if (originalName !== sanitizedName) {
481
+ const newFilePath = path.join(path.dirname(file), sanitizedName);
482
+
483
+ if (options.dryRun) {
484
+ result.message = `Would rename: ${originalName} → ${sanitizedName}`;
485
+ result.skipped = true;
486
+ return result;
487
+ } else {
488
+ try {
489
+ fs.renameSync(file, newFilePath);
490
+ currentFile = newFilePath;
491
+ writeLog(`RENAMED: ${originalName} → ${sanitizedName}`);
492
+ await sendLogToSupabase({
493
+ file: originalName,
494
+ uploadPath: sanitizedName,
495
+ status: 'renamed',
496
+ message: `Renamed from ${originalName}`,
497
+ });
498
+ } catch (renameError) {
499
+ result.error = `Failed to rename ${originalName}: ${renameError.message}`;
500
+ writeLog(`RENAME_ERROR: ${originalName} | ${renameError.message}`);
501
+ return result;
502
+ }
503
+ }
504
+ }
505
+ }
506
+
507
+ const content = fs.readFileSync(currentFile);
508
+ const relativePathRaw = path
509
+ .relative(basePath, currentFile)
510
+ .replace(/^[\\/]+/, '')
511
+ .replace(/\\/g, '/');
512
+
513
+ // Always sanitize the filename for upload path
514
+ const pathParts = relativePathRaw.split('/');
515
+ const originalFileName = pathParts[pathParts.length - 1];
516
+ const sanitizedFileName = sanitizeFileName(originalFileName);
517
+ pathParts[pathParts.length - 1] = sanitizedFileName;
518
+ const sanitizedRelativePath = pathParts.join('/');
519
+
520
+ const uploadPathRaw = options.prefix
521
+ ? path.posix.join(options.prefix, sanitizedRelativePath)
522
+ : sanitizedRelativePath;
523
+ const uploadPath = sanitizePath(uploadPathRaw);
524
+
525
+ if (
526
+ uploadPath !== uploadPathRaw ||
527
+ originalFileName !== sanitizedFileName
528
+ ) {
529
+ writeLog(`SANITIZED: ${relativePathRaw} → ${uploadPath}`);
530
+ await sendLogToSupabase({
531
+ file: currentFile,
532
+ uploadPath: relativePathRaw,
533
+ status: 'sanitized',
534
+ message: `Sanitized to ${uploadPath} (Arela Version: ${version})`,
535
+ });
536
+ }
537
+
538
+ if (processedPaths.has(uploadPath)) {
539
+ result.skipped = true;
540
+ result.message = `Already processed (log): ${currentFile}`;
541
+ return result;
542
+ }
543
+
544
+ const contentType = mime.lookup(currentFile) || 'application/octet-stream';
545
+
546
+ const exists = await fileExistsInBucket(uploadPath);
547
+
548
+ if (exists) {
549
+ result.skipped = true;
550
+ result.message = `Skipped (already exists): ${currentFile}`;
551
+ writeLog(`SKIPPED: ${currentFile} -> ${uploadPath}`);
552
+ await sendLogToSupabase({
553
+ file: currentFile,
554
+ uploadPath,
555
+ status: 'skipped',
556
+ message: 'Already exists in bucket',
557
+ });
558
+ return result;
559
+ }
560
+
561
+ const { error } = await uploadWithRetry(() =>
562
+ supabase.storage.from(bucket).upload(uploadPath, content, {
563
+ upsert: true,
564
+ contentType,
565
+ metadata: {
566
+ originalName: path.basename(currentFile),
567
+ sanitizedName: path.basename(uploadPath),
568
+ clientPath: path.posix.join(
569
+ basePath,
570
+ folder,
571
+ path.relative(sourcePath, currentFile).replace(/\\/g, '/'),
572
+ ),
573
+ arelaVersion: version,
574
+ },
575
+ }),
576
+ );
577
+
578
+ if (error) {
579
+ result.error = error.message || JSON.stringify(error);
580
+ writeLog(`ERROR: ${currentFile} -> ${uploadPath} | ${result.error}`);
581
+ await sendLogToSupabase({
582
+ file: currentFile,
583
+ uploadPath,
584
+ status: 'error',
585
+ message: result.error,
586
+ });
587
+ } else {
588
+ result.success = true;
589
+ result.message = `Uploaded ${currentFile} -> ${uploadPath}`;
590
+ writeLog(`SUCCESS: ${currentFile} -> ${uploadPath}`);
591
+ await sendLogToSupabase({
592
+ file: currentFile,
593
+ uploadPath,
594
+ status: 'success',
595
+ message: 'Uploaded successfully',
596
+ });
597
+ }
598
+ } catch (err) {
599
+ result.error = err.message || JSON.stringify(err);
600
+ writeLog(`ERROR: ${currentFile} | ${result.error}`);
601
+ await sendLogToSupabase({
602
+ file: currentFile,
603
+ uploadPath: currentFile,
604
+ status: 'error',
605
+ message: result.error,
606
+ });
607
+ }
608
+
609
+ return result;
610
+ };
611
+
612
+ // Function to process files in parallel batches
613
+ const processFilesInBatches = async (
614
+ files,
615
+ batchSize,
616
+ options,
617
+ basePath,
618
+ folder,
619
+ sourcePath,
620
+ processedPaths,
621
+ ) => {
622
+ let successCount = 0;
623
+ let failureCount = 0;
624
+ let skippedCount = 0;
625
+
626
+ // Buffer for messages to show after progress bar completes
627
+ const messageBuffer = [];
628
+
629
+ const progressBar = new cliProgress.SingleBar({
630
+ format:
631
+ '📂 Processing [{bar}] {percentage}% | {value}/{total} files | Success: {successCount} | Errors: {failureCount} | Skipped: {skippedCount}',
632
+ barCompleteChar: '█',
633
+ barIncompleteChar: '░',
634
+ hideCursor: true,
635
+ });
636
+
637
+ progressBar.start(files.length, 0, {
638
+ successCount: 0,
639
+ failureCount: 0,
640
+ skippedCount: 0,
641
+ });
642
+
643
+ for (let i = 0; i < files.length; i += batchSize) {
644
+ const batch = files.slice(i, i + batchSize);
645
+
646
+ // Process batch in parallel
647
+ const batchResults = await Promise.all(
648
+ batch.map((file) =>
649
+ processFile(
650
+ file,
651
+ options,
652
+ basePath,
653
+ folder,
654
+ sourcePath,
655
+ processedPaths,
656
+ ),
657
+ ),
658
+ );
659
+
660
+ // Update counters and buffer messages (don't print them yet)
661
+ for (const result of batchResults) {
662
+ if (result.success) {
663
+ successCount++;
664
+ // Only buffer verbose success messages if needed
665
+ if (
666
+ process.env.UPLOAD_VERBOSE === 'true' &&
667
+ result.message &&
668
+ !result.error
669
+ ) {
670
+ messageBuffer.push(`✅ ${result.message}`);
671
+ }
672
+ } else if (result.skipped) {
673
+ skippedCount++;
674
+ // Only buffer verbose skip messages if needed
675
+ if (process.env.UPLOAD_VERBOSE === 'true' && result.message) {
676
+ messageBuffer.push(`⏭️ ${result.message}`);
677
+ }
678
+ } else if (result.error) {
679
+ failureCount++;
680
+ // Always buffer error messages to show later
681
+ messageBuffer.push(`❌ ${result.error}`);
682
+ }
683
+ }
684
+
685
+ progressBar.update(i + batch.length, {
686
+ successCount,
687
+ failureCount,
688
+ skippedCount,
689
+ });
690
+
691
+ // Manage cache size periodically (every 100 files processed)
692
+ if ((i + batch.length) % 100 === 0) {
693
+ manageCaches();
694
+ // Also flush logs every 100 files to maintain responsiveness
695
+ await logBatcher.flush();
696
+ }
697
+
698
+ // Small delay between batches to prevent overwhelming the server
699
+ if (i + batchSize < files.length) {
700
+ await delay(100);
701
+ }
702
+ }
703
+
704
+ // Stop progress bar cleanly before showing any messages
705
+ progressBar.stop();
706
+
707
+ // Now show buffered messages if there are any important ones to show
708
+ const errorMessages = messageBuffer.filter((msg) => msg.startsWith('❌'));
709
+ if (errorMessages.length > 0) {
710
+ console.log('\n🚨 Errors encountered during processing:');
711
+ errorMessages.forEach((msg) => console.error(msg));
712
+ }
713
+
714
+ // Show verbose messages only if requested
715
+ if (process.env.UPLOAD_VERBOSE === 'true') {
716
+ const otherMessages = messageBuffer.filter((msg) => !msg.startsWith('❌'));
717
+ if (otherMessages.length > 0) {
718
+ console.log('\n📝 Detailed processing log:');
719
+ otherMessages.forEach((msg) => console.log(msg));
720
+ }
721
+ }
722
+
723
+ return {
724
+ successCount,
725
+ failureCount,
726
+ skippedCount,
727
+ };
728
+ };
729
+
280
730
  program
281
731
  .name('supabase-uploader')
282
732
  .description('CLI to upload folders from a base path to Supabase Storage')
@@ -290,6 +740,20 @@ program
290
740
  '--dry-run',
291
741
  'Show what files would be renamed without actually renaming them',
292
742
  )
743
+ .option(
744
+ '-c, --concurrency <number>',
745
+ 'Number of files to process concurrently (default: 3)',
746
+ '3',
747
+ )
748
+ .option(
749
+ '--show-cache-stats',
750
+ 'Show cache statistics for performance analysis',
751
+ )
752
+ .option(
753
+ '--batch-size <number>',
754
+ 'Number of logs to batch before sending to Supabase (default: 50)',
755
+ '50',
756
+ )
293
757
  .action(async (options) => {
294
758
  // Handle version option
295
759
  if (options.version) {
@@ -304,6 +768,15 @@ program
304
768
  process.exit(1);
305
769
  }
306
770
 
771
+ const concurrency = parseInt(options.concurrency) || 3;
772
+ const batchSize = parseInt(options.batchSize) || 50;
773
+
774
+ // Configure log batcher with custom batch size
775
+ logBatcher.batchSize = batchSize;
776
+
777
+ console.log(`🚀 Using concurrency level: ${concurrency}`);
778
+ console.log(`📦 Using log batch size: ${batchSize}`);
779
+
307
780
  const processedPaths = await getProcessedPaths();
308
781
  let globalSuccess = 0;
309
782
  let globalFailure = 0;
@@ -318,185 +791,61 @@ program
318
791
  ? await globby([`${sourcePath}/**/*`], { onlyFiles: true })
319
792
  : [sourcePath];
320
793
 
321
- const progressBar = new cliProgress.SingleBar({
322
- format: '📂 Reading [{bar}] {percentage}% | {value}/{total} files',
323
- barCompleteChar: '█',
324
- barIncompleteChar: '░',
325
- hideCursor: true,
326
- });
327
- progressBar.start(files.length, 0);
328
-
329
- let successCount = 0;
330
- let failureCount = 0;
331
-
332
- for (const file of files) {
333
- progressBar.increment();
334
-
335
- let currentFile = file;
336
-
337
- // Check if we need to rename the file
338
- if (options.renameFiles) {
339
- const originalName = path.basename(file);
340
- const sanitizedName = sanitizeFileName(originalName);
341
-
342
- if (originalName !== sanitizedName) {
343
- const newFilePath = path.join(path.dirname(file), sanitizedName);
344
-
345
- if (options.dryRun) {
346
- console.log(`Would rename: ${originalName} → ${sanitizedName}`);
347
- continue;
348
- } else {
349
- try {
350
- fs.renameSync(file, newFilePath);
351
- currentFile = newFilePath;
352
- console.log(`✅ Renamed: ${originalName} → ${sanitizedName}`);
353
- writeLog(`RENAMED: ${originalName} → ${sanitizedName}`);
354
- await sendLogToSupabase({
355
- file: originalName,
356
- uploadPath: sanitizedName,
357
- status: 'renamed',
358
- message: `Renamed from ${originalName}`,
359
- });
360
- } catch (renameError) {
361
- console.error(
362
- `❌ Failed to rename ${originalName}: ${renameError.message}`,
363
- );
364
- writeLog(
365
- `RENAME_ERROR: ${originalName} | ${renameError.message}`,
366
- );
367
- continue;
368
- }
369
- }
370
- }
371
- }
794
+ console.log(`📊 Found ${files.length} files to process`);
372
795
 
373
- const content = fs.readFileSync(currentFile);
374
- const relativePathRaw = path
375
- .relative(basePath, currentFile)
376
- .replace(/^[\\/]+/, '')
377
- .replace(/\\/g, '/');
378
-
379
- // Always sanitize the filename for upload path
380
- const pathParts = relativePathRaw.split('/');
381
- const originalFileName = pathParts[pathParts.length - 1];
382
- const sanitizedFileName = sanitizeFileName(originalFileName);
383
- pathParts[pathParts.length - 1] = sanitizedFileName;
384
- const sanitizedRelativePath = pathParts.join('/');
385
-
386
- const uploadPathRaw = options.prefix
387
- ? path.posix.join(options.prefix, sanitizedRelativePath)
388
- : sanitizedRelativePath;
389
- const uploadPath = sanitizePath(uploadPathRaw);
390
-
391
- if (
392
- uploadPath !== uploadPathRaw ||
393
- originalFileName !== sanitizedFileName
394
- ) {
395
- writeLog(`SANITIZED: ${relativePathRaw} → ${uploadPath}`);
396
- await sendLogToSupabase({
397
- file: currentFile,
398
- uploadPath: relativePathRaw,
399
- status: 'sanitized',
400
- message: `Sanitized to ${uploadPath} (Arela Version: ${version})`,
401
- });
402
- }
403
-
404
- if (processedPaths.has(uploadPath)) {
405
- ora().info(`⏭️ Already processed (log): ${currentFile}`);
406
- continue;
407
- }
796
+ // Process files in parallel batches
797
+ const { successCount, failureCount, skippedCount } =
798
+ await processFilesInBatches(
799
+ files,
800
+ concurrency,
801
+ options,
802
+ basePath,
803
+ folder,
804
+ sourcePath,
805
+ processedPaths,
806
+ );
408
807
 
409
- const contentType =
410
- mime.lookup(currentFile) || 'application/octet-stream';
808
+ globalSuccess += successCount;
809
+ globalFailure += failureCount;
411
810
 
412
- const spinner = ora(`Checking ${currentFile}...`).start();
413
- const exists = await fileExistsInBucket(uploadPath);
414
-
415
- if (exists) {
416
- spinner.info(`⏭️ Skipped (already exists): ${currentFile}`);
417
- writeLog(`SKIPPED: ${currentFile} -> ${uploadPath}`);
418
- await sendLogToSupabase({
419
- file: currentFile,
420
- uploadPath,
421
- status: 'skipped',
422
- message: 'Already exists in bucket',
423
- });
424
- continue;
425
- }
811
+ // Small delay to ensure progress bar is fully cleared
812
+ await delay(100);
426
813
 
427
- try {
428
- // await delay(5000); // TODO: Remove this delay before production
429
-
430
- spinner.text = `Uploading ${currentFile}...`;
431
-
432
- const { error } = await uploadWithRetry(() =>
433
- supabase.storage.from(bucket).upload(uploadPath, content, {
434
- upsert: true,
435
- contentType,
436
- metadata: {
437
- originalName: path.basename(currentFile),
438
- sanitizedName: path.basename(uploadPath),
439
- clientPath: path.posix.join(
440
- basePath,
441
- folder,
442
- path.relative(sourcePath, currentFile).replace(/\\/g, '/'),
443
- ),
444
- arelaVersion: version,
445
- },
446
- }),
447
- );
448
-
449
- if (error) {
450
- failureCount++;
451
- globalFailure++;
452
- const errorMsg = error.message || JSON.stringify(error);
453
- spinner.fail(`❌ Failed to upload ${currentFile}: ${errorMsg}`);
454
- writeLog(`ERROR: ${currentFile} -> ${uploadPath} | ${errorMsg}`);
455
- await sendLogToSupabase({
456
- file: currentFile,
457
- uploadPath,
458
- status: 'error',
459
- message: errorMsg,
460
- });
461
- } else {
462
- successCount++;
463
- globalSuccess++;
464
- spinner.succeed(`✅ Uploaded ${currentFile} -> ${uploadPath}`);
465
- writeLog(`SUCCESS: ${currentFile} -> ${uploadPath}`);
466
- await sendLogToSupabase({
467
- file: currentFile,
468
- uploadPath,
469
- status: 'success',
470
- message: 'Uploaded successfully',
471
- });
472
- }
473
- } catch (err) {
474
- failureCount++;
475
- globalFailure++;
476
- const errorMsg = err.message || JSON.stringify(err);
477
- spinner.fail(`❌ Error uploading ${currentFile}: ${errorMsg}`);
478
- writeLog(`ERROR: ${currentFile} -> ${uploadPath} | ${errorMsg}`);
479
- await sendLogToSupabase({
480
- file: currentFile,
481
- uploadPath,
482
- status: 'error',
483
- message: errorMsg,
484
- });
485
- }
486
- }
487
-
488
- progressBar.stop();
489
-
490
- console.log(`\n📦 Upload Summary:`);
814
+ console.log(`\n📦 Upload Summary for ${folder}:`);
491
815
  console.log(` ✅ Successfully uploaded files: ${successCount}`);
492
816
  console.log(` ❌ Files with errors: ${failureCount}`);
493
- console.log(
494
- ` ⏭️ Files skipped (already exist): ${files.length - successCount - failureCount}`,
495
- );
817
+ console.log(` ⏭️ Files skipped (already exist): ${skippedCount}`);
496
818
  console.log(` 📜 Log file: ${logFilePath} \n`);
497
819
 
820
+ // Show cache statistics if requested
821
+ if (options.showCacheStats) {
822
+ console.log(`📊 Cache Statistics:`);
823
+ console.log(
824
+ ` 🗂️ Filename sanitization cache: ${sanitizationCache.size} entries`,
825
+ );
826
+ console.log(
827
+ ` 📁 Path sanitization cache: ${pathSanitizationCache.size} entries`,
828
+ );
829
+ console.log(
830
+ ` 📋 Log batch pending: ${logBatcher.batch.length} entries`,
831
+ );
832
+
833
+ // Calculate cache hit rate (rough estimation)
834
+ const totalProcessed = successCount + failureCount + skippedCount;
835
+ const estimatedCacheHitRate =
836
+ totalProcessed > 0
837
+ ? Math.round(
838
+ ((totalProcessed - sanitizationCache.size) / totalProcessed) *
839
+ 100,
840
+ )
841
+ : 0;
842
+ console.log(
843
+ ` 🎯 Estimated cache hit rate: ${Math.max(0, estimatedCacheHitRate)}%\n`,
844
+ );
845
+ }
846
+
498
847
  writeLog(
499
- `📦 Upload Summary for folder ${folder}: Success: ${successCount}, Errors: ${failureCount}, Skipped: ${files.length - successCount - failureCount}`,
848
+ `📦 Upload Summary for folder ${folder}: Success: ${successCount}, Errors: ${failureCount}, Skipped: ${skippedCount}`,
500
849
  );
501
850
  } catch (err) {
502
851
  console.error(`⚠️ Error processing folder ${folder}:`, err.message);
@@ -507,13 +856,22 @@ program
507
856
  status: 'error',
508
857
  message: err.message,
509
858
  });
859
+ globalFailure++;
510
860
  }
511
861
  }
512
862
 
513
- console.log(`🎯 Upload completed.`);
863
+ // Force flush any remaining logs before finishing
864
+ console.log(`\n📤 Flushing remaining logs...`);
865
+ await logBatcher.forceFlush();
866
+
867
+ // Final summary with clear separation
868
+ console.log(`\n${'='.repeat(50)}`);
869
+ console.log(`🎯 UPLOAD COMPLETED`);
870
+ console.log(`${'='.repeat(50)}`);
514
871
  console.log(` ✅ Total uploaded: ${globalSuccess}`);
515
872
  console.log(` ❌ Total with errors: ${globalFailure}`);
516
873
  console.log(` 📜 Log file: ${logFilePath}`);
874
+ console.log(`${'='.repeat(50)}\n`);
517
875
  });
518
876
 
519
877
  program.parse();