@arela/uploader 0.2.12 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/.env.template +66 -0
  2. package/.vscode/settings.json +1 -0
  3. package/README.md +134 -58
  4. package/SUPABASE_UPLOAD_FIX.md +157 -0
  5. package/package.json +3 -2
  6. package/scripts/cleanup-ds-store.js +109 -0
  7. package/scripts/cleanup-system-files.js +69 -0
  8. package/scripts/tests/phase-7-features.test.js +415 -0
  9. package/scripts/tests/signal-handling.test.js +275 -0
  10. package/scripts/tests/smart-watch-integration.test.js +554 -0
  11. package/scripts/tests/watch-service-integration.test.js +584 -0
  12. package/src/commands/UploadCommand.js +36 -2
  13. package/src/commands/WatchCommand.js +1305 -0
  14. package/src/config/config.js +113 -0
  15. package/src/document-type-shared.js +2 -0
  16. package/src/document-types/support-document.js +201 -0
  17. package/src/file-detection.js +2 -1
  18. package/src/index.js +44 -0
  19. package/src/services/AdvancedFilterService.js +505 -0
  20. package/src/services/AutoProcessingService.js +639 -0
  21. package/src/services/BenchmarkingService.js +381 -0
  22. package/src/services/DatabaseService.js +723 -170
  23. package/src/services/ErrorMonitor.js +275 -0
  24. package/src/services/LoggingService.js +419 -1
  25. package/src/services/MonitoringService.js +401 -0
  26. package/src/services/PerformanceOptimizer.js +511 -0
  27. package/src/services/ReportingService.js +511 -0
  28. package/src/services/SignalHandler.js +255 -0
  29. package/src/services/SmartWatchDatabaseService.js +527 -0
  30. package/src/services/WatchService.js +783 -0
  31. package/src/services/upload/ApiUploadService.js +30 -4
  32. package/src/services/upload/SupabaseUploadService.js +28 -6
  33. package/src/utils/CleanupManager.js +262 -0
  34. package/src/utils/FileOperations.js +41 -0
  35. package/src/utils/WatchEventHandler.js +517 -0
  36. package/supabase/migrations/001_create_initial_schema.sql +366 -0
  37. package/supabase/migrations/002_align_with_arela_api_schema.sql +145 -0
  38. package/commands.md +0 -6
@@ -25,6 +25,15 @@ export class DatabaseService {
25
25
  return await supabaseService.getClient();
26
26
  }
27
27
 
28
+ /**
29
+ * Get Supabase client (public wrapper for dependency injection)
30
+ * Used by specialized services like SmartWatchDatabaseService
31
+ * @returns {Promise<Object>} Supabase client
32
+ */
33
+ async getSupabaseClient() {
34
+ return this.#getSupabaseClient();
35
+ }
36
+
28
37
  /**
29
38
  * Execute database query with retry logic and exponential backoff
30
39
  * @private
@@ -51,6 +60,7 @@ export class DatabaseService {
51
60
  error.message?.includes('timeout') ||
52
61
  error.message?.includes('canceling statement') ||
53
62
  error.message?.includes('connection') ||
63
+ error.message?.includes('fetch failed') ||
54
64
  error.code === 'PGRST301'; // PostgREST timeout
55
65
 
56
66
  if (!isRetriableError || attempt === maxRetries) {
@@ -70,6 +80,18 @@ export class DatabaseService {
70
80
  throw lastError;
71
81
  }
72
82
 
83
+ /**
84
+ * Execute database query with retry logic (public wrapper for dependency injection)
85
+ * Used by specialized services like SmartWatchDatabaseService
86
+ * @param {Function} queryFn - Query function to execute
87
+ * @param {string} operation - Description of the operation for logging
88
+ * @param {number} maxRetries - Maximum number of retry attempts (default: 3)
89
+ * @returns {Promise<Object>} Query result
90
+ */
91
+ async queryWithRetry(queryFn, operation, maxRetries = 3) {
92
+ return this.#queryWithRetry(queryFn, operation, maxRetries);
93
+ }
94
+
73
95
  /**
74
96
  * Insert file stats with document detection into uploader table
75
97
  * @param {Array} files - Array of file objects
@@ -111,6 +133,7 @@ export class DatabaseService {
111
133
  const filename = file.originalName || path.basename(file.path);
112
134
 
113
135
  const record = {
136
+ name: filename,
114
137
  document_type: null,
115
138
  size: stats.size,
116
139
  num_pedimento: null,
@@ -121,11 +144,14 @@ export class DatabaseService {
121
144
  rfc: null,
122
145
  message: null,
123
146
  file_extension: fileExtension,
124
- is_like_simplificado:
125
- fileExtension === 'pdf' && filename.toLowerCase().includes('simp'),
147
+ is_like_simplificado: filename.toLowerCase().includes('simp'),
126
148
  year: null,
127
149
  created_at: new Date().toISOString(),
150
+ updated_at: new Date().toISOString(),
128
151
  modified_at: stats.mtime.toISOString(),
152
+ // Queue/Processing columns (for arela-api)
153
+ processing_status: 'PENDING',
154
+ upload_attempts: 0,
129
155
  };
130
156
 
131
157
  // Try to detect document type for supported files
@@ -180,18 +206,153 @@ export class DatabaseService {
180
206
  `Inserting ${records.length} new records into uploader table...`,
181
207
  );
182
208
 
209
+ // Use upsert to handle duplicates gracefully
210
+ // This will insert new records or update existing ones (by original_path)
183
211
  const { data, error } = await supabase
184
212
  .from('uploader')
185
- .insert(records)
213
+ .upsert(records, { onConflict: 'original_path' })
186
214
  .select();
187
215
 
188
216
  if (error) {
189
217
  throw new Error(`Failed to insert stats records: ${error.message}`);
190
218
  }
191
219
 
220
+ // Propagate arela_path to related files in same folder
221
+ if (data && data.length > 0) {
222
+ await this.#propagateArelaPathToRelatedFiles(data, supabase);
223
+ }
224
+
192
225
  return data;
193
226
  }
194
227
 
228
+ /**
229
+ * Propagate arela_path from pedimento files to related files in the same directory
230
+ * When a pedimento_simplificado is detected, all files in its directory get the same arela_path
231
+ * Also checks database for existing files in the same directory that need the arela_path
232
+ * @private
233
+ * @param {Array} insertedRecords - Records that were just inserted
234
+ * @param {Object} supabase - Supabase client
235
+ * @returns {Promise<void>}
236
+ */
237
+ async #propagateArelaPathToRelatedFiles(insertedRecords, supabase) {
238
+ try {
239
+ // Group records by directory
240
+ const recordsByDir = {};
241
+
242
+ for (const record of insertedRecords) {
243
+ if (!record.original_path) continue;
244
+
245
+ const dirPath = path.dirname(record.original_path);
246
+ if (!recordsByDir[dirPath]) {
247
+ recordsByDir[dirPath] = {
248
+ pedimentos: [],
249
+ allFiles: [],
250
+ };
251
+ }
252
+
253
+ recordsByDir[dirPath].allFiles.push(record);
254
+
255
+ // Identify pedimento files
256
+ if (
257
+ record.document_type === 'pedimento_simplificado' &&
258
+ record.arela_path
259
+ ) {
260
+ recordsByDir[dirPath].pedimentos.push(record);
261
+ }
262
+ }
263
+
264
+ // For each directory with a pedimento, propagate its arela_path to related files
265
+ for (const [dirPath, dirData] of Object.entries(recordsByDir)) {
266
+ if (dirData.pedimentos.length === 0) continue;
267
+
268
+ // Use the first pedimento's arela_path (should be only one per directory typically)
269
+ const pedimentoRecord = dirData.pedimentos[0];
270
+ const arelaPath = pedimentoRecord.arela_path;
271
+
272
+ logger.info(
273
+ `📁 Propagating arela_path from pedimento to files in ${path.basename(dirPath)}/`,
274
+ );
275
+
276
+ // Step 1: Update newly inserted files in this directory
277
+ const fileIds = dirData.allFiles
278
+ .filter(
279
+ (f) =>
280
+ f.id &&
281
+ f.arela_path !== arelaPath &&
282
+ f.document_type !== 'pedimento_simplificado',
283
+ )
284
+ .map((f) => f.id);
285
+
286
+ if (fileIds.length > 0) {
287
+ const { error: updateError } = await supabase
288
+ .from('uploader')
289
+ .update({ arela_path: arelaPath })
290
+ .in('id', fileIds);
291
+
292
+ if (updateError) {
293
+ logger.warn(
294
+ `Could not propagate arela_path: ${updateError.message}`,
295
+ );
296
+ } else {
297
+ logger.info(
298
+ `✅ Updated ${fileIds.length} related files with arela_path: ${arelaPath}`,
299
+ );
300
+ }
301
+ }
302
+
303
+ // Step 2: Also find and update any existing files in the same directory that don't have arela_path
304
+ // This handles the case where files were detected earlier but the pedimento is detected now
305
+ try {
306
+ const dirPattern = dirPath.replace(/\\/g, '/'); // Normalize for SQL LIKE
307
+ const { data: existingFiles, error: fetchError } = await supabase
308
+ .from('uploader')
309
+ .select('id, original_path, document_type')
310
+ .like('original_path', `${dirPattern}/%`)
311
+ .is('arela_path', null)
312
+ .limit(1000); // Reasonable limit to avoid huge queries
313
+
314
+ if (fetchError) {
315
+ logger.warn(
316
+ `Could not fetch existing files in ${path.basename(dirPath)}: ${fetchError.message}`,
317
+ );
318
+ } else if (existingFiles && existingFiles.length > 0) {
319
+ const existingFileIds = existingFiles
320
+ .filter(
321
+ (f) => f.id && f.document_type !== 'pedimento_simplificado',
322
+ )
323
+ .map((f) => f.id);
324
+
325
+ if (existingFileIds.length > 0) {
326
+ const { error: existingError } = await supabase
327
+ .from('uploader')
328
+ .update({ arela_path: arelaPath })
329
+ .in('id', existingFileIds);
330
+
331
+ if (existingError) {
332
+ logger.warn(
333
+ `Could not update existing files with arela_path: ${existingError.message}`,
334
+ );
335
+ } else {
336
+ logger.info(
337
+ `✅ Updated ${existingFileIds.length} existing files in directory with arela_path: ${arelaPath}`,
338
+ );
339
+ }
340
+ }
341
+ }
342
+ } catch (existingFilesError) {
343
+ logger.warn(
344
+ `Error checking for existing files in ${path.basename(dirPath)}: ${existingFilesError.message}`,
345
+ );
346
+ }
347
+ }
348
+ } catch (error) {
349
+ logger.warn(
350
+ `Error propagating arela_path to related files: ${error.message}`,
351
+ );
352
+ // Don't throw - this is a non-critical operation
353
+ }
354
+ }
355
+
195
356
  /**
196
357
  * Insert file stats only (no detection) into uploader table
197
358
  * @param {Array} files - Array of file objects
@@ -201,11 +362,29 @@ export class DatabaseService {
201
362
  async insertStatsOnlyToUploaderTable(files, options) {
202
363
  const supabase = await this.#getSupabaseClient();
203
364
  const batchSize = 1000;
365
+ const quietMode = options?.quietMode || false;
204
366
  const allRecords = [];
205
367
 
206
- logger.info('Collecting filesystem stats...');
368
+ if (!quietMode) {
369
+ logger.info('Collecting filesystem stats...');
370
+ }
371
+
372
+ // Filter out system files and hidden files (macOS, Windows, Python, editors)
373
+ const systemFilePattern =
374
+ /^\.|__pycache__|\.pyc|\.swp|\.swo|Thumbs\.db|desktop\.ini|DS_Store|\$RECYCLE\.BIN|System Volume Information|~\$|\.tmp/i;
375
+
207
376
  for (const file of files) {
208
377
  try {
378
+ const fileName = file.originalName || path.basename(file.path);
379
+
380
+ // Skip system and hidden files
381
+ if (systemFilePattern.test(fileName)) {
382
+ if (!quietMode) {
383
+ logger.debug(`Skipping system file: ${fileName}`);
384
+ }
385
+ continue;
386
+ }
387
+
209
388
  const stats = file.stats || fs.statSync(file.path);
210
389
  const originalPath = options.clientPath || file.path;
211
390
  const fileExtension = path
@@ -214,6 +393,7 @@ export class DatabaseService {
214
393
  .replace('.', '');
215
394
 
216
395
  const record = {
396
+ name: file.originalName || path.basename(file.path),
217
397
  document_type: null,
218
398
  size: stats.size,
219
399
  num_pedimento: null,
@@ -225,6 +405,7 @@ export class DatabaseService {
225
405
  message: null,
226
406
  file_extension: fileExtension,
227
407
  created_at: new Date().toISOString(),
408
+ updated_at: new Date().toISOString(),
228
409
  modified_at: stats.mtime.toISOString(),
229
410
  is_like_simplificado:
230
411
  fileExtension === 'pdf' &&
@@ -232,6 +413,9 @@ export class DatabaseService {
232
413
  .toLowerCase()
233
414
  .includes('simp'),
234
415
  year: null,
416
+ // Queue/Processing columns (for arela-api)
417
+ processing_status: 'PENDING',
418
+ upload_attempts: 0,
235
419
  };
236
420
 
237
421
  allRecords.push(record);
@@ -241,13 +425,17 @@ export class DatabaseService {
241
425
  }
242
426
 
243
427
  if (allRecords.length === 0) {
244
- logger.info('No file stats to insert');
428
+ if (!quietMode) {
429
+ logger.info('No file stats to insert');
430
+ }
245
431
  return { totalInserted: 0, totalSkipped: 0, totalProcessed: 0 };
246
432
  }
247
433
 
248
- logger.info(
249
- `Processing ${allRecords.length} file stats in batches of ${batchSize}...`,
250
- );
434
+ if (!quietMode) {
435
+ logger.info(
436
+ `Processing ${allRecords.length} file stats in batches of ${batchSize}...`,
437
+ );
438
+ }
251
439
 
252
440
  let totalInserted = 0;
253
441
  let totalUpdated = 0;
@@ -280,10 +468,11 @@ export class DatabaseService {
280
468
  existingPaths.has(r.original_path),
281
469
  );
282
470
 
283
- // Only log every 10th batch to reduce noise
471
+ // Only log every 10th batch to reduce noise (skip in quiet mode)
284
472
  if (
285
- (Math.floor(i / batchSize) + 1) % 10 === 0 ||
286
- Math.floor(i / batchSize) + 1 === 1
473
+ !quietMode &&
474
+ ((Math.floor(i / batchSize) + 1) % 10 === 0 ||
475
+ Math.floor(i / batchSize) + 1 === 1)
287
476
  ) {
288
477
  logger.info(
289
478
  `Batch ${Math.floor(i / batchSize) + 1}: ${newRecords.length} new, ${updateRecords.length} updates`,
@@ -311,6 +500,7 @@ export class DatabaseService {
311
500
  const { error: updateError } = await supabase
312
501
  .from('uploader')
313
502
  .update({
503
+ name: record.filename,
314
504
  size: record.size,
315
505
  modified_at: record.modified_at,
316
506
  filename: record.filename,
@@ -324,8 +514,8 @@ export class DatabaseService {
324
514
  }
325
515
  }
326
516
  totalUpdated += batchUpdated;
327
- // Reduce logging noise - only log when there are updates
328
- if (batchUpdated > 0) {
517
+ // Reduce logging noise - only log when there are updates (skip in quiet mode)
518
+ if (!quietMode && batchUpdated > 0) {
329
519
  logger.info(`Updated ${batchUpdated} existing records`);
330
520
  }
331
521
  }
@@ -336,9 +526,11 @@ export class DatabaseService {
336
526
  }
337
527
  }
338
528
 
339
- logger.success(
340
- `Phase 1 Summary: ${totalInserted} new records inserted, ${totalUpdated} existing records updated`,
341
- );
529
+ if (!quietMode) {
530
+ logger.success(
531
+ `Phase 1 Summary: ${totalInserted} new records inserted, ${totalUpdated} existing records updated`,
532
+ );
533
+ }
342
534
 
343
535
  return {
344
536
  totalInserted,
@@ -650,16 +842,18 @@ export class DatabaseService {
650
842
 
651
843
  while (hasMoreRelatedFiles) {
652
844
  const { data: relatedFilesPage, error: relatedError } =
653
- await supabase
654
- .from('uploader')
655
- .select('id, filename, original_path')
656
- .like('original_path', `${basePath}%`)
657
- .is('arela_path', null)
658
- .neq('id', pedimento.id) // Exclude the pedimento itself
659
- .range(
660
- relatedFilesFrom,
661
- relatedFilesFrom + relatedFilesPageSize - 1,
662
- );
845
+ await this.#queryWithRetry(async () => {
846
+ return await supabase
847
+ .from('uploader')
848
+ .select('id, filename, original_path')
849
+ .like('original_path', `${basePath}%`)
850
+ .is('arela_path', null)
851
+ .neq('id', pedimento.id) // Exclude the pedimento itself
852
+ .range(
853
+ relatedFilesFrom,
854
+ relatedFilesFrom + relatedFilesPageSize - 1,
855
+ );
856
+ }, `query related files for ${pedimento.filename} (page ${relatedFilesPageNumber})`);
663
857
 
664
858
  if (relatedError) {
665
859
  logger.error(
@@ -782,11 +976,13 @@ export class DatabaseService {
782
976
  'ℹ️ No pedimento_simplificado records with arela_path found',
783
977
  );
784
978
  } else {
979
+ // pageNumber represents the current page (starts at 1)
980
+ const totalPages = pageNumber;
785
981
  console.log(
786
- `📋 Processed ${totalProcessed} pedimento records across ${pageNumber - 1} pages`,
982
+ `📋 Processed ${totalProcessed} pedimento records across ${totalPages} page${totalPages !== 1 ? 's' : ''}`,
787
983
  );
788
984
  logger.info(
789
- `Processed ${totalProcessed} pedimento records across ${pageNumber - 1} pages`,
985
+ `Processed ${totalProcessed} pedimento records across ${totalPages} page${totalPages !== 1 ? 's' : ''}`,
790
986
  );
791
987
  }
792
988
 
@@ -923,200 +1119,129 @@ export class DatabaseService {
923
1119
  const uniqueArelaPaths = [
924
1120
  ...new Set(allPedimentoRecords.map((r) => r.arela_path)),
925
1121
  ];
1122
+ // pageNumber represents the current page (starts at 1)
1123
+ const totalPages = pageNumber;
926
1124
  console.log(
927
- `📋 Found ${allPedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths for specified RFCs across ${pageNumber - 1} pages`,
1125
+ `📋 Found ${allPedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths for specified RFCs across ${totalPages} page${totalPages !== 1 ? 's' : ''}`,
928
1126
  );
929
1127
  logger.info(
930
- `Found ${allPedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths across ${pageNumber - 1} pages`,
1128
+ `Found ${allPedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths across ${totalPages} page${totalPages !== 1 ? 's' : ''}`,
931
1129
  );
932
1130
 
933
- // Step 2: Get all files with these arela_paths that haven't been uploaded yet
934
- let rfcRecords = [];
935
- const chunkSize = 50;
936
-
937
- for (let i = 0; i < uniqueArelaPaths.length; i += chunkSize) {
938
- const pathChunk = uniqueArelaPaths.slice(i, i + chunkSize);
939
-
940
- const { data: chunkFiles, error: chunkError } = await supabase
941
- .from('uploader')
942
- .select('arela_path')
943
- .in('arela_path', pathChunk)
944
- .neq('status', 'file-uploaded')
945
- .not('arela_path', 'is', null);
946
-
947
- if (chunkError) {
948
- const errorMsg = `Error fetching files for arela_paths chunk: ${chunkError.message}`;
949
- logger.error(errorMsg);
950
- throw new Error(errorMsg);
951
- }
952
-
953
- if (chunkFiles && chunkFiles.length > 0) {
954
- rfcRecords = rfcRecords.concat(chunkFiles);
955
- }
956
- }
957
-
958
- if (!rfcRecords || rfcRecords.length === 0) {
959
- if (totalRfcFiles && totalRfcFiles > 0) {
960
- console.log(
961
- `ℹ️ All ${totalRfcFiles} files for the specified RFCs are already uploaded (status: file-uploaded)`,
962
- );
963
- console.log(' No new files to upload.');
964
- logger.info(
965
- `All ${totalRfcFiles} files for specified RFCs already uploaded`,
966
- );
967
- } else {
968
- console.log(
969
- 'ℹ️ No files found for the specified RFCs with arela_path',
970
- );
971
- console.log(
972
- ` Make sure files for RFCs [${appConfig.upload.rfcs.join(', ')}] have been processed and have arela_path values`,
973
- );
974
- logger.info('No files found for specified RFCs with arela_path');
975
- }
976
- return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
977
- }
978
-
979
- // Show filtering effect
980
- const uploadableArelaPaths = [
981
- ...new Set(rfcRecords.map((r) => r.arela_path)),
982
- ];
983
- const skipped = (totalRfcFiles || 0) - rfcRecords.length;
984
- if (skipped > 0) {
985
- console.log(
986
- `📊 Found ${rfcRecords.length} files ready for upload (${skipped} already uploaded, skipped)`,
987
- );
988
- } else {
989
- console.log(`📊 Found ${rfcRecords.length} files ready for upload`);
990
- }
991
- logger.info(
992
- `Found ${rfcRecords.length} files ready for upload, ${skipped} skipped`,
993
- );
994
-
995
- // Step 3: Process files with streaming pagination to avoid memory overload
1131
+ // Step 2: Process files with optimized single query per chunk
996
1132
  let totalProcessed = 0;
997
1133
  let totalUploaded = 0;
998
1134
  let totalErrors = 0;
999
1135
  let globalFileCount = 0;
1000
1136
  const arelaPathChunkSize = 50;
1001
- const queryBatchSize = 500; // Reduced batch size for better memory management
1002
1137
  const batchSize = parseInt(options.batchSize) || 10;
1138
+ const filePageSize = 1000; // Supabase limit per request
1003
1139
 
1004
1140
  // Import performance configuration
1005
1141
  const { performance: perfConfig } = appConfig;
1006
1142
  const maxConcurrency = perfConfig?.maxApiConnections || 3;
1007
1143
 
1008
- console.log(
1009
- '📥 Processing files with streaming pagination (processing arela_paths in chunks to avoid URI limits and memory overload)...',
1010
- );
1144
+ console.log('📥 Processing files in chunks to avoid URI limits...');
1011
1145
 
1012
1146
  // Process arela_paths in chunks and upload files as we fetch them
1013
- for (let i = 0; i < uploadableArelaPaths.length; i += arelaPathChunkSize) {
1014
- const arelaPathChunk = uploadableArelaPaths.slice(
1015
- i,
1016
- i + arelaPathChunkSize,
1017
- );
1147
+ for (let i = 0; i < uniqueArelaPaths.length; i += arelaPathChunkSize) {
1148
+ const arelaPathChunk = uniqueArelaPaths.slice(i, i + arelaPathChunkSize);
1018
1149
  const chunkNumber = Math.floor(i / arelaPathChunkSize) + 1;
1019
1150
  const totalChunks = Math.ceil(
1020
- uploadableArelaPaths.length / arelaPathChunkSize,
1151
+ uniqueArelaPaths.length / arelaPathChunkSize,
1021
1152
  );
1022
1153
 
1023
1154
  console.log(
1024
1155
  ` Processing arela_path chunk ${chunkNumber}/${totalChunks} (${arelaPathChunk.length} paths)`,
1025
1156
  );
1026
1157
 
1027
- // For each chunk of arela_paths, use pagination to get related files and process them immediately
1028
- let hasMore = true;
1029
- let offset = 0;
1030
- let chunkFileCount = 0;
1158
+ // Fetch all files for this chunk with pagination to handle >1000 records
1159
+ let allChunkFiles = [];
1160
+ let fileOffset = 0;
1161
+ let hasMoreFiles = true;
1162
+ let filePageNum = 1;
1031
1163
 
1032
- while (hasMore) {
1164
+ while (hasMoreFiles) {
1033
1165
  const { data: batch, error: queryError } = await supabase
1034
1166
  .from('uploader')
1035
1167
  .select('id, original_path, arela_path, filename, rfc, document_type')
1036
1168
  .in('arela_path', arelaPathChunk)
1037
- .not('original_path', 'is', null)
1038
1169
  .neq('status', 'file-uploaded')
1039
- .range(offset, offset + queryBatchSize - 1)
1040
- .order('created_at');
1170
+ .order('created_at')
1171
+ .range(fileOffset, fileOffset + filePageSize - 1);
1041
1172
 
1042
1173
  if (queryError) {
1043
- const errorMsg = `Error fetching related files for chunk ${chunkNumber}: ${queryError.message}`;
1174
+ const errorMsg = `Error fetching files for chunk ${chunkNumber} page ${filePageNum}: ${queryError.message}`;
1044
1175
  logger.error(errorMsg);
1045
1176
  throw new Error(errorMsg);
1046
1177
  }
1047
1178
 
1048
1179
  if (!batch || batch.length === 0) {
1049
- hasMore = false;
1180
+ hasMoreFiles = false;
1181
+ if (filePageNum === 1) {
1182
+ // No files found at all for this chunk
1183
+ console.log(
1184
+ ` ℹ️ Chunk ${chunkNumber}/${totalChunks}: No files to upload`,
1185
+ );
1186
+ }
1050
1187
  break;
1051
1188
  }
1052
1189
 
1053
- chunkFileCount += batch.length;
1054
- globalFileCount += batch.length;
1055
-
1056
- console.log(
1057
- ` 📦 Processing batch within chunk ${chunkNumber}: ${batch.length} files (total processed so far: ${globalFileCount})`,
1190
+ logger.debug(
1191
+ `Chunk ${chunkNumber} page ${filePageNum}: fetched ${batch.length} files`,
1058
1192
  );
1193
+ allChunkFiles = allChunkFiles.concat(batch);
1059
1194
 
1060
- // Track if any uploads occurred in this batch
1061
- let uploadsOccurred = false;
1195
+ // Check if we need more pages
1196
+ if (batch.length < filePageSize) {
1197
+ hasMoreFiles = false;
1198
+ logger.debug(
1199
+ `Chunk ${chunkNumber}: Completed pagination with ${allChunkFiles.length} total files`,
1200
+ );
1201
+ } else {
1202
+ fileOffset += filePageSize;
1203
+ filePageNum++;
1204
+ }
1205
+ }
1062
1206
 
1063
- // Process this batch of files immediately using concurrent processing
1064
- // Split batch into upload batches
1065
- for (let j = 0; j < batch.length; j += batchSize) {
1066
- const uploadBatch = batch.slice(j, j + batchSize);
1067
- const batchNum = Math.floor(globalFileCount / batchSize) + 1;
1207
+ if (allChunkFiles.length === 0) {
1208
+ continue;
1209
+ }
1068
1210
 
1069
- console.log(
1070
- `📦 Processing upload batch ${batchNum} (${uploadBatch.length} files)`,
1071
- );
1211
+ const chunkFileCount = allChunkFiles.length;
1212
+ globalFileCount += chunkFileCount;
1072
1213
 
1073
- // Process batch using concurrent processing similar to UploadCommand
1074
- const batchResults = await this.#processRfcBatch(
1075
- uploadBatch,
1076
- uploadService,
1077
- supabase,
1078
- options,
1079
- maxConcurrency,
1080
- );
1214
+ console.log(
1215
+ ` 📦 Chunk ${chunkNumber}/${totalChunks}: Processing ${chunkFileCount} files`,
1216
+ );
1081
1217
 
1082
- totalProcessed += batchResults.processed;
1083
- totalUploaded += batchResults.uploaded;
1084
- totalErrors += batchResults.errors;
1218
+ // Process this batch of files immediately using concurrent processing
1219
+ // Split batch into upload batches
1220
+ for (let j = 0; j < allChunkFiles.length; j += batchSize) {
1221
+ const uploadBatch = allChunkFiles.slice(j, j + batchSize);
1222
+ const batchNum = Math.floor(j / batchSize) + 1;
1223
+ const totalBatches = Math.ceil(allChunkFiles.length / batchSize);
1085
1224
 
1086
- // Track if uploads occurred (status changes from non-uploaded to uploaded)
1087
- if (batchResults.uploaded > 0) {
1088
- uploadsOccurred = true;
1089
- }
1225
+ console.log(
1226
+ ` 📦 Processing upload batch ${batchNum}/${totalBatches} within chunk ${chunkNumber} (${uploadBatch.length} files)`,
1227
+ );
1090
1228
 
1091
- console.log(
1092
- `📊 Upload batch complete - Total progress: ${totalUploaded} uploaded, ${totalErrors} errors`,
1093
- );
1094
- }
1229
+ // Process batch using concurrent processing similar to UploadCommand
1230
+ const batchResults = await this.#processRfcBatch(
1231
+ uploadBatch,
1232
+ uploadService,
1233
+ supabase,
1234
+ options,
1235
+ maxConcurrency,
1236
+ );
1095
1237
 
1096
- // Check if we need more data from this chunk
1097
- if (batch.length < queryBatchSize) {
1098
- hasMore = false;
1099
- } else {
1100
- // If uploads occurred, reset pagination to start from beginning
1101
- // since records that matched the query may no longer match after upload
1102
- if (uploadsOccurred) {
1103
- offset = 0;
1104
- console.log(
1105
- ` 📄 Batch complete with uploads: ${batch.length} files processed, restarting pagination from beginning due to query condition changes...`,
1106
- );
1107
- } else {
1108
- offset += queryBatchSize;
1109
- console.log(
1110
- ` 📄 Batch complete: ${batch.length} files processed, continuing to next page (offset: ${offset})...`,
1111
- );
1112
- }
1113
- }
1238
+ totalProcessed += batchResults.processed;
1239
+ totalUploaded += batchResults.uploaded;
1240
+ totalErrors += batchResults.errors;
1114
1241
 
1115
- if (hasMore) {
1116
- console.log(
1117
- ` 📄 Fetching more files for chunk ${chunkNumber}... (offset: ${offset})`,
1118
- );
1119
- }
1242
+ console.log(
1243
+ ` 📊 Batch complete - Progress: ${totalUploaded} uploaded, ${totalErrors} errors`,
1244
+ );
1120
1245
  }
1121
1246
 
1122
1247
  console.log(
@@ -1467,6 +1592,7 @@ export class DatabaseService {
1467
1592
  .update({
1468
1593
  status: 'file-uploaded',
1469
1594
  message: 'Successfully uploaded to Supabase',
1595
+ processing_status: 'UPLOADED',
1470
1596
  })
1471
1597
  .eq('id', file.id);
1472
1598
 
@@ -1648,6 +1774,7 @@ export class DatabaseService {
1648
1774
  .update({
1649
1775
  status: 'file-uploaded',
1650
1776
  message: 'Successfully uploaded to Arela API (batch)',
1777
+ processing_status: 'UPLOADED',
1651
1778
  })
1652
1779
  .in('id', successfulFileIds);
1653
1780
 
@@ -1787,6 +1914,432 @@ export class DatabaseService {
1787
1914
 
1788
1915
  return { processed, uploaded, errors };
1789
1916
  }
1917
+
1918
+ /**
1919
+ * Insert upload session event into watch_uploads table
1920
+ * @param {Object} uploadEvent - Upload event from LoggingService
1921
+ * @param {string} sessionId - Session ID for tracking
1922
+ * @returns {Promise<Object>} Inserted record
1923
+ */
1924
+ async insertUploadEvent(uploadEvent, sessionId) {
1925
+ const supabase = await this.#getSupabaseClient();
1926
+
1927
+ const record = {
1928
+ session_id: sessionId,
1929
+ timestamp: uploadEvent.timestamp || new Date().toISOString(),
1930
+ strategy: uploadEvent.strategy, // 'individual', 'batch', 'full-structure'
1931
+ file_count: uploadEvent.fileCount || 0,
1932
+ success_count: uploadEvent.successCount || 0,
1933
+ failure_count: uploadEvent.failureCount || 0,
1934
+ retry_count: uploadEvent.retryCount || 0,
1935
+ duration_ms: uploadEvent.duration || 0,
1936
+ status: uploadEvent.status || 'completed',
1937
+ metadata: uploadEvent.metadata || null,
1938
+ };
1939
+
1940
+ try {
1941
+ const { data, error } = await this.#queryWithRetry(async () => {
1942
+ return await supabase.from('watch_uploads').insert([record]).select();
1943
+ }, `insert upload event for session ${sessionId}`);
1944
+
1945
+ if (error) {
1946
+ logger.error(`Failed to insert upload event: ${error.message}`);
1947
+ throw error;
1948
+ }
1949
+
1950
+ return data[0];
1951
+ } catch (error) {
1952
+ logger.error(`Error inserting upload event: ${error.message}`);
1953
+ throw error;
1954
+ }
1955
+ }
1956
+
1957
+ /**
1958
+ * Insert retry event into watch_events table
1959
+ * @param {string} uploadEventId - ID of the parent upload event
1960
+ * @param {string} sessionId - Session ID for tracking
1961
+ * @param {Object} retryEvent - Retry event from LoggingService
1962
+ * @returns {Promise<Object>} Inserted record
1963
+ */
1964
+ async insertRetryEvent(uploadEventId, sessionId, retryEvent) {
1965
+ const supabase = await this.#getSupabaseClient();
1966
+
1967
+ const record = {
1968
+ upload_event_id: uploadEventId,
1969
+ session_id: sessionId,
1970
+ timestamp: retryEvent.timestamp || new Date().toISOString(),
1971
+ attempt_number: retryEvent.attemptNumber || 0,
1972
+ error_message: retryEvent.error || null,
1973
+ backoff_ms: retryEvent.backoffMs || 0,
1974
+ type: 'retry',
1975
+ };
1976
+
1977
+ try {
1978
+ const { data, error } = await this.#queryWithRetry(async () => {
1979
+ return await supabase.from('watch_events').insert([record]).select();
1980
+ }, `insert retry event for upload ${uploadEventId}`);
1981
+
1982
+ if (error) {
1983
+ logger.error(`Failed to insert retry event: ${error.message}`);
1984
+ throw error;
1985
+ }
1986
+
1987
+ return data[0];
1988
+ } catch (error) {
1989
+ logger.error(`Error inserting retry event: ${error.message}`);
1990
+ throw error;
1991
+ }
1992
+ }
1993
+
1994
+ /**
1995
+ * Get upload history for a session
1996
+ * @param {string} sessionId - Session ID to query
1997
+ * @param {Object} options - Query options (limit, offset, strategy filter)
1998
+ * @returns {Promise<Array>} Array of upload events
1999
+ */
2000
+ async getSessionUploadHistory(sessionId, options = {}) {
2001
+ const supabase = await this.#getSupabaseClient();
2002
+ const limit = options.limit || 100;
2003
+ const offset = options.offset || 0;
2004
+
2005
+ try {
2006
+ let query = supabase
2007
+ .from('watch_uploads')
2008
+ .select('*')
2009
+ .eq('session_id', sessionId)
2010
+ .order('timestamp', { ascending: false })
2011
+ .range(offset, offset + limit - 1);
2012
+
2013
+ // Filter by strategy if provided
2014
+ if (options.strategy) {
2015
+ query = query.eq('strategy', options.strategy);
2016
+ }
2017
+
2018
+ const { data, error } = await this.#queryWithRetry(async () => {
2019
+ return await query;
2020
+ }, `fetch upload history for session ${sessionId}`);
2021
+
2022
+ if (error) {
2023
+ logger.error(`Failed to fetch upload history: ${error.message}`);
2024
+ throw error;
2025
+ }
2026
+
2027
+ return data || [];
2028
+ } catch (error) {
2029
+ logger.error(`Error fetching upload history: ${error.message}`);
2030
+ return [];
2031
+ }
2032
+ }
2033
+
2034
+ /**
2035
+ * Get retry history for an upload event
2036
+ * @param {string} uploadEventId - Upload event ID to query
2037
+ * @param {Object} options - Query options (limit, offset)
2038
+ * @returns {Promise<Array>} Array of retry events
2039
+ */
2040
+ async getUploadRetryHistory(uploadEventId, options = {}) {
2041
+ const supabase = await this.#getSupabaseClient();
2042
+ const limit = options.limit || 100;
2043
+ const offset = options.offset || 0;
2044
+
2045
+ try {
2046
+ const { data, error } = await this.#queryWithRetry(async () => {
2047
+ return await supabase
2048
+ .from('watch_events')
2049
+ .select('*')
2050
+ .eq('upload_event_id', uploadEventId)
2051
+ .eq('type', 'retry')
2052
+ .order('timestamp', { ascending: true })
2053
+ .range(offset, offset + limit - 1);
2054
+ }, `fetch retry history for upload ${uploadEventId}`);
2055
+
2056
+ if (error) {
2057
+ logger.error(`Failed to fetch retry history: ${error.message}`);
2058
+ throw error;
2059
+ }
2060
+
2061
+ return data || [];
2062
+ } catch (error) {
2063
+ logger.error(`Error fetching retry history: ${error.message}`);
2064
+ return [];
2065
+ }
2066
+ }
2067
+
2068
+ /**
2069
+ * Get session statistics
2070
+ * @param {string} sessionId - Session ID to analyze
2071
+ * @returns {Promise<Object>} Session statistics
2072
+ */
2073
+ async getSessionStatistics(sessionId) {
2074
+ const supabase = await this.#getSupabaseClient();
2075
+
2076
+ try {
2077
+ // Fetch all upload events for the session
2078
+ const { data: uploads, error: uploadError } = await this.#queryWithRetry(
2079
+ async () => {
2080
+ return await supabase
2081
+ .from('watch_uploads')
2082
+ .select('*')
2083
+ .eq('session_id', sessionId);
2084
+ },
2085
+ `fetch statistics for session ${sessionId}`,
2086
+ );
2087
+
2088
+ if (uploadError) {
2089
+ throw uploadError;
2090
+ }
2091
+
2092
+ // Fetch all retry events for the session
2093
+ const { data: retries, error: retryError } = await this.#queryWithRetry(
2094
+ async () => {
2095
+ return await supabase
2096
+ .from('watch_events')
2097
+ .select('*')
2098
+ .eq('session_id', sessionId)
2099
+ .eq('type', 'retry');
2100
+ },
2101
+ `fetch retry statistics for session ${sessionId}`,
2102
+ );
2103
+
2104
+ if (retryError) {
2105
+ throw retryError;
2106
+ }
2107
+
2108
+ // Calculate statistics
2109
+ const stats = {
2110
+ sessionId,
2111
+ totalUploadEvents: uploads?.length || 0,
2112
+ totalRetryEvents: retries?.length || 0,
2113
+ totalFileCount: 0,
2114
+ totalSuccessCount: 0,
2115
+ totalFailureCount: 0,
2116
+ totalRetryCount: 0,
2117
+ totalDuration: 0,
2118
+ byStrategy: {
2119
+ individual: {
2120
+ uploadCount: 0,
2121
+ totalFiles: 0,
2122
+ totalSuccess: 0,
2123
+ totalFailure: 0,
2124
+ successRate: 0,
2125
+ totalDuration: 0,
2126
+ },
2127
+ batch: {
2128
+ uploadCount: 0,
2129
+ totalFiles: 0,
2130
+ totalSuccess: 0,
2131
+ totalFailure: 0,
2132
+ successRate: 0,
2133
+ totalDuration: 0,
2134
+ },
2135
+ 'full-structure': {
2136
+ uploadCount: 0,
2137
+ totalFiles: 0,
2138
+ totalSuccess: 0,
2139
+ totalFailure: 0,
2140
+ successRate: 0,
2141
+ totalDuration: 0,
2142
+ },
2143
+ },
2144
+ retryStats: {
2145
+ totalRetries: retries?.length || 0,
2146
+ uniqueUploadsWithRetries: new Set(
2147
+ retries?.map((r) => r.upload_event_id) || [],
2148
+ ).size,
2149
+ totalRetryDuration:
2150
+ retries?.reduce((sum, r) => sum + (r.backoff_ms || 0), 0) || 0,
2151
+ },
2152
+ };
2153
+
2154
+ // Process upload events
2155
+ if (uploads && uploads.length > 0) {
2156
+ uploads.forEach((upload) => {
2157
+ stats.totalFileCount += upload.file_count || 0;
2158
+ stats.totalSuccessCount += upload.success_count || 0;
2159
+ stats.totalFailureCount += upload.failure_count || 0;
2160
+ stats.totalRetryCount += upload.retry_count || 0;
2161
+ stats.totalDuration += upload.duration_ms || 0;
2162
+
2163
+ const strategyKey = upload.strategy || 'individual';
2164
+ if (stats.byStrategy[strategyKey]) {
2165
+ stats.byStrategy[strategyKey].uploadCount += 1;
2166
+ stats.byStrategy[strategyKey].totalFiles += upload.file_count || 0;
2167
+ stats.byStrategy[strategyKey].totalSuccess +=
2168
+ upload.success_count || 0;
2169
+ stats.byStrategy[strategyKey].totalFailure +=
2170
+ upload.failure_count || 0;
2171
+ stats.byStrategy[strategyKey].totalDuration +=
2172
+ upload.duration_ms || 0;
2173
+
2174
+ // Calculate success rate
2175
+ const totalFiles =
2176
+ stats.byStrategy[strategyKey].totalSuccess +
2177
+ stats.byStrategy[strategyKey].totalFailure;
2178
+ if (totalFiles > 0) {
2179
+ stats.byStrategy[strategyKey].successRate = (
2180
+ (stats.byStrategy[strategyKey].totalSuccess / totalFiles) *
2181
+ 100
2182
+ ).toFixed(2);
2183
+ }
2184
+ }
2185
+ });
2186
+ }
2187
+
2188
+ return stats;
2189
+ } catch (error) {
2190
+ logger.error(`Error calculating session statistics: ${error.message}`);
2191
+ return null;
2192
+ }
2193
+ }
2194
+
2195
+ /**
2196
+ * Delete old session data (cleanup)
2197
+ * @param {number} daysOld - Delete sessions older than this many days
2198
+ * @returns {Promise<Object>} Deletion results
2199
+ */
2200
+ async cleanupOldSessions(daysOld = 30) {
2201
+ const supabase = await this.#getSupabaseClient();
2202
+
2203
+ try {
2204
+ const cutoffDate = new Date();
2205
+ cutoffDate.setDate(cutoffDate.getDate() - daysOld);
2206
+
2207
+ // Get sessions to delete
2208
+ const { data: sessionsToDelete, error: fetchError } =
2209
+ await this.#queryWithRetry(async () => {
2210
+ return await supabase
2211
+ .from('watch_uploads')
2212
+ .select('session_id')
2213
+ .lt('timestamp', cutoffDate.toISOString())
2214
+ .distinct();
2215
+ }, `fetch sessions older than ${daysOld} days`);
2216
+
2217
+ if (fetchError) {
2218
+ throw fetchError;
2219
+ }
2220
+
2221
+ let deletedUploads = 0;
2222
+ let deletedEvents = 0;
2223
+
2224
+ if (sessionsToDelete && sessionsToDelete.length > 0) {
2225
+ const sessionIds = sessionsToDelete.map((s) => s.session_id);
2226
+
2227
+ // Delete events
2228
+ const { count: eventCount, error: eventError } =
2229
+ await this.#queryWithRetry(async () => {
2230
+ return await supabase
2231
+ .from('watch_events')
2232
+ .delete()
2233
+ .in('session_id', sessionIds);
2234
+ }, `delete events for old sessions`);
2235
+
2236
+ if (!eventError) {
2237
+ deletedEvents = eventCount || 0;
2238
+ }
2239
+
2240
+ // Delete uploads
2241
+ const { count: uploadCount, error: uploadError } =
2242
+ await this.#queryWithRetry(async () => {
2243
+ return await supabase
2244
+ .from('watch_uploads')
2245
+ .delete()
2246
+ .in('session_id', sessionIds);
2247
+ }, `delete old session uploads`);
2248
+
2249
+ if (!uploadError) {
2250
+ deletedUploads = uploadCount || 0;
2251
+ }
2252
+ }
2253
+
2254
+ return {
2255
+ deletedUploads,
2256
+ deletedEvents,
2257
+ sessionsDeleted: sessionsToDelete?.length || 0,
2258
+ };
2259
+ } catch (error) {
2260
+ logger.error(`Error cleaning up old sessions: ${error.message}`);
2261
+ return { deletedUploads: 0, deletedEvents: 0, sessionsDeleted: 0 };
2262
+ }
2263
+ }
2264
+
2265
+ /**
2266
+ * Cleanup database connections and resources
2267
+ * Called during graceful shutdown
2268
+ * @returns {Promise<Object>} Cleanup results
2269
+ */
2270
+ async cleanup() {
2271
+ try {
2272
+ logger.info('DatabaseService: Starting cleanup...');
2273
+
2274
+ // Commit any pending transactions
2275
+ const transactionResult = await this.commitPendingTransactions();
2276
+
2277
+ // Close database connections
2278
+ const closeResult = await this.closeConnections();
2279
+
2280
+ logger.info('DatabaseService: Cleanup complete');
2281
+
2282
+ return {
2283
+ success: true,
2284
+ transactionsCommitted: transactionResult.count,
2285
+ connectionsClosedResult: closeResult,
2286
+ };
2287
+ } catch (error) {
2288
+ logger.error(`DatabaseService: Error during cleanup: ${error.message}`);
2289
+ return {
2290
+ success: false,
2291
+ error: error.message,
2292
+ };
2293
+ }
2294
+ }
2295
+
2296
+ /**
2297
+ * Commit any pending transactions before shutdown
2298
+ * @private
2299
+ * @returns {Promise<Object>} Results
2300
+ */
2301
+ async commitPendingTransactions() {
2302
+ try {
2303
+ logger.debug('DatabaseService: Committing pending transactions...');
2304
+
2305
+ // Note: This is a placeholder for actual transaction handling
2306
+ // In a real implementation, you would track active transactions
2307
+ // and ensure they are properly committed before shutdown
2308
+
2309
+ logger.debug('DatabaseService: Pending transactions committed');
2310
+ return { count: 0, success: true };
2311
+ } catch (error) {
2312
+ logger.error(
2313
+ `DatabaseService: Error committing transactions: ${error.message}`,
2314
+ );
2315
+ return { count: 0, success: false, error: error.message };
2316
+ }
2317
+ }
2318
+
2319
+ /**
2320
+ * Close all database connections
2321
+ * @private
2322
+ * @returns {Promise<Object>} Results
2323
+ */
2324
+ async closeConnections() {
2325
+ try {
2326
+ logger.debug('DatabaseService: Closing database connections...');
2327
+
2328
+ // Close Supabase connection if available
2329
+ if (this.supabase) {
2330
+ // Supabase client will handle connection cleanup automatically
2331
+ logger.debug('DatabaseService: Supabase connection cleanup initiated');
2332
+ }
2333
+
2334
+ logger.info('DatabaseService: All database connections closed');
2335
+ return { success: true };
2336
+ } catch (error) {
2337
+ logger.error(
2338
+ `DatabaseService: Error closing connections: ${error.message}`,
2339
+ );
2340
+ return { success: false, error: error.message };
2341
+ }
2342
+ }
1790
2343
  }
1791
2344
 
1792
2345
  // Export singleton instance