@arela/uploader 0.2.13 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/.env.template +66 -0
  2. package/README.md +263 -62
  3. package/docs/API_ENDPOINTS_FOR_DETECTION.md +647 -0
  4. package/docs/QUICK_REFERENCE_API_DETECTION.md +264 -0
  5. package/docs/REFACTORING_SUMMARY_DETECT_PEDIMENTOS.md +200 -0
  6. package/package.json +3 -2
  7. package/scripts/cleanup-ds-store.js +109 -0
  8. package/scripts/cleanup-system-files.js +69 -0
  9. package/scripts/tests/phase-7-features.test.js +415 -0
  10. package/scripts/tests/signal-handling.test.js +275 -0
  11. package/scripts/tests/smart-watch-integration.test.js +554 -0
  12. package/scripts/tests/watch-service-integration.test.js +584 -0
  13. package/src/commands/UploadCommand.js +31 -4
  14. package/src/commands/WatchCommand.js +1342 -0
  15. package/src/config/config.js +270 -2
  16. package/src/document-type-shared.js +2 -0
  17. package/src/document-types/support-document.js +200 -0
  18. package/src/file-detection.js +9 -1
  19. package/src/index.js +163 -4
  20. package/src/services/AdvancedFilterService.js +505 -0
  21. package/src/services/AutoProcessingService.js +749 -0
  22. package/src/services/BenchmarkingService.js +381 -0
  23. package/src/services/DatabaseService.js +1019 -539
  24. package/src/services/ErrorMonitor.js +275 -0
  25. package/src/services/LoggingService.js +419 -1
  26. package/src/services/MonitoringService.js +401 -0
  27. package/src/services/PerformanceOptimizer.js +511 -0
  28. package/src/services/ReportingService.js +511 -0
  29. package/src/services/SignalHandler.js +255 -0
  30. package/src/services/SmartWatchDatabaseService.js +527 -0
  31. package/src/services/WatchService.js +783 -0
  32. package/src/services/upload/ApiUploadService.js +447 -3
  33. package/src/services/upload/MultiApiUploadService.js +233 -0
  34. package/src/services/upload/SupabaseUploadService.js +12 -5
  35. package/src/services/upload/UploadServiceFactory.js +24 -0
  36. package/src/utils/CleanupManager.js +262 -0
  37. package/src/utils/FileOperations.js +44 -0
  38. package/src/utils/WatchEventHandler.js +522 -0
  39. package/supabase/migrations/001_create_initial_schema.sql +366 -0
  40. package/supabase/migrations/002_align_with_arela_api_schema.sql +145 -0
  41. package/.envbackup +0 -37
  42. package/SUPABASE_UPLOAD_FIX.md +0 -157
  43. package/commands.md +0 -14
@@ -0,0 +1,522 @@
1
+ import fs from 'fs/promises';
2
+ import path from 'path';
3
+
4
+ import autoProcessingService from '../services/AutoProcessingService.js';
5
+ import logger from '../services/LoggingService.js';
6
+
7
+ /**
8
+ * WatchEventHandler - Processes file system events and manages upload strategy
9
+ * Handles debouncing, file tracking, and determines what files should be uploaded
10
+ */
11
+ export class WatchEventHandler {
12
+ constructor() {
13
+ this.recentFiles = new Map(); // Map<filePath, {event, timestamp, size}>
14
+ this.eventBatches = new Map(); // Map<batchKey, Set<filePath>>
15
+ this.fileStates = new Map(); // Map<filePath, {size, mtime}>
16
+ this.lastProcessedTime = 0;
17
+ this.minBatchInterval = 100; // Minimum ms between batch processing
18
+ }
19
+
20
+ /**
21
+ * Register a file event
22
+ * @param {string} eventType - Type of event (add, change, unlink)
23
+ * @param {string} filePath - Path to the file
24
+ * @param {number} timestamp - Event timestamp
25
+ * @returns {void}
26
+ */
27
+ registerEvent(eventType, filePath, timestamp = Date.now()) {
28
+ try {
29
+ const normalizedPath = path.normalize(filePath);
30
+
31
+ logger.debug(
32
+ `[EventHandler] Registering ${eventType} event: ${normalizedPath}`,
33
+ );
34
+
35
+ // Update recent files map
36
+ this.recentFiles.set(normalizedPath, {
37
+ event: eventType,
38
+ timestamp,
39
+ originalPath: filePath,
40
+ });
41
+
42
+ // Add to current batch
43
+ const batchKey = this.#generateBatchKey(timestamp);
44
+ if (!this.eventBatches.has(batchKey)) {
45
+ this.eventBatches.set(batchKey, new Set());
46
+ }
47
+ this.eventBatches.get(batchKey).add(normalizedPath);
48
+
49
+ logger.debug(
50
+ `[EventHandler] Batch ${batchKey}: ${this.eventBatches.get(batchKey).size} files`,
51
+ );
52
+ } catch (error) {
53
+ logger.error(`Error registering event: ${error.message}`);
54
+ }
55
+ }
56
+
57
+ /**
58
+ * Get files that should be uploaded based on strategy
59
+ * @param {string} strategy - Upload strategy (individual|batch|full-structure)
60
+ * @param {Object} options - Additional options
61
+ * @returns {Promise<Array>} Array of file objects to upload
62
+ */
63
+ async getFilesForUpload(strategy, options = {}) {
64
+ try {
65
+ logger.debug(
66
+ `[EventHandler] Getting files for upload with strategy: ${strategy}`,
67
+ );
68
+
69
+ switch (strategy) {
70
+ case 'individual':
71
+ return await this.#getIndividualFiles(options);
72
+ case 'batch':
73
+ return await this.#getBatchFiles(options);
74
+ case 'full-structure':
75
+ return await this.#getFullStructureFiles(options);
76
+ default:
77
+ throw new Error(`Unknown strategy: ${strategy}`);
78
+ }
79
+ } catch (error) {
80
+ logger.error(`Error getting files for upload: ${error.message}`);
81
+ return [];
82
+ }
83
+ }
84
+
85
+ /**
86
+ * Process newly added files with the automatic 4-step pipeline
87
+ * @param {Object} options - Processing options
88
+ * @param {string} options.watchDir - Watch directory where file was detected
89
+ * @param {string} options.folderStructure - Folder structure for organization
90
+ * @param {number} options.batchSize - Batch size for processing
91
+ * @returns {Promise<Object>} Result from AutoProcessingService
92
+ */
93
+ async processNewFilesWithPipeline(options = {}) {
94
+ try {
95
+ const { watchDir, folderStructure, batchSize = 10 } = options;
96
+
97
+ // Get the most recently added file
98
+ let newestFile = null;
99
+ let newestTime = 0;
100
+
101
+ for (const [filePath, metadata] of this.recentFiles.entries()) {
102
+ if (metadata.event === 'add' && metadata.timestamp > newestTime) {
103
+ newestTime = metadata.timestamp;
104
+ newestFile = filePath;
105
+ }
106
+ }
107
+
108
+ if (!newestFile) {
109
+ logger.warn(
110
+ '[EventHandler] No newly added files found for pipeline processing',
111
+ );
112
+ return {
113
+ success: false,
114
+ reason: 'No new files detected',
115
+ };
116
+ }
117
+
118
+ logger.info(
119
+ `[EventHandler] Processing newly detected file with pipeline: ${newestFile}`,
120
+ );
121
+
122
+ // Execute the 4-step processing pipeline
123
+ const result = await autoProcessingService.executeProcessingPipeline({
124
+ filePath: newestFile,
125
+ watchDir,
126
+ folderStructure,
127
+ batchSize,
128
+ });
129
+
130
+ return result;
131
+ } catch (error) {
132
+ logger.error(
133
+ `[EventHandler] Error processing files with pipeline: ${error.message}`,
134
+ );
135
+ return {
136
+ success: false,
137
+ error: error.message,
138
+ };
139
+ }
140
+ }
141
+
142
+ /**
143
+ * Get individual file (the most recently modified)
144
+ * @private
145
+ * @param {Object} options - Options with sourceDir, etc
146
+ * @returns {Promise<Array>} Array with single file object
147
+ */
148
+ async #getIndividualFiles(options = {}) {
149
+ try {
150
+ // Find the most recently modified file
151
+ let mostRecentFile = null;
152
+ let mostRecentTime = 0;
153
+
154
+ for (const [filePath, metadata] of this.recentFiles.entries()) {
155
+ if (
156
+ metadata.event !== 'unlink' &&
157
+ metadata.timestamp > mostRecentTime
158
+ ) {
159
+ mostRecentTime = metadata.timestamp;
160
+ mostRecentFile = filePath;
161
+ }
162
+ }
163
+
164
+ if (!mostRecentFile) {
165
+ logger.warn(
166
+ '[EventHandler] No valid files found for individual upload',
167
+ );
168
+ return [];
169
+ }
170
+
171
+ // Check if file still exists and is readable
172
+ try {
173
+ const stats = await fs.stat(mostRecentFile);
174
+ const fileObj = {
175
+ path: mostRecentFile,
176
+ name: path.basename(mostRecentFile),
177
+ size: stats.size,
178
+ mtime: stats.mtime,
179
+ type: 'individual',
180
+ };
181
+
182
+ logger.info(
183
+ `[EventHandler] Individual file for upload: ${mostRecentFile} (${stats.size} bytes)`,
184
+ );
185
+ return [fileObj];
186
+ } catch (error) {
187
+ logger.warn(
188
+ `[EventHandler] File not accessible: ${mostRecentFile} - ${error.message}`,
189
+ );
190
+ return [];
191
+ }
192
+ } catch (error) {
193
+ logger.error(`Error in #getIndividualFiles: ${error.message}`);
194
+ return [];
195
+ }
196
+ }
197
+
198
+ /**
199
+ * Get batch of recent files, grouped by directory
200
+ * When processing batch, ensure all files in a directory are processed together
201
+ * This is critical for pedimento detection and document grouping
202
+ * @private
203
+ * @param {Object} options - Options with batchSize, sourceDir
204
+ * @returns {Promise<Array>} Array of file objects
205
+ */
206
+ async #getBatchFiles(options = {}) {
207
+ try {
208
+ const batchSize = options.batchSize || 10;
209
+ const sourceDir = options.sourceDir;
210
+ const files = [];
211
+
212
+ // Get all recent files (valid, non-deleted ones)
213
+ const validFiles = Array.from(this.recentFiles.entries()).filter(
214
+ ([_, metadata]) => metadata.event !== 'unlink',
215
+ );
216
+
217
+ if (validFiles.length === 0) {
218
+ logger.debug('[EventHandler] No valid files in batch');
219
+ return [];
220
+ }
221
+
222
+ // If sourceDir provided, get ALL files from that directory
223
+ // This ensures when one file is detected, all related files are processed together
224
+ if (sourceDir) {
225
+ logger.debug(
226
+ `[EventHandler] Batch: Processing all files in ${sourceDir}`,
227
+ );
228
+
229
+ for (const [filePath, metadata] of validFiles) {
230
+ const fileDir = path.dirname(filePath);
231
+
232
+ // Only include files from the source directory or its subdirectories
233
+ if (filePath.startsWith(sourceDir)) {
234
+ try {
235
+ const stats = await fs.stat(filePath);
236
+ files.push({
237
+ path: filePath,
238
+ name: path.basename(filePath),
239
+ size: stats.size,
240
+ mtime: stats.mtime,
241
+ event: metadata.event,
242
+ type: 'batch',
243
+ });
244
+
245
+ if (files.length >= batchSize) {
246
+ logger.debug(
247
+ `[EventHandler] Batch size limit (${batchSize}) reached`,
248
+ );
249
+ break;
250
+ }
251
+ } catch (error) {
252
+ logger.debug(
253
+ `[EventHandler] File not accessible for batch: ${filePath}`,
254
+ );
255
+ // Skip files that don't exist or are not accessible
256
+ }
257
+ }
258
+ }
259
+ } else {
260
+ // Fallback: Sort by timestamp (newest first) and take up to batchSize
261
+ const sortedFiles = validFiles.sort(
262
+ ([_, a], [__, b]) => b.timestamp - a.timestamp,
263
+ );
264
+
265
+ for (const [filePath, metadata] of sortedFiles) {
266
+ try {
267
+ const stats = await fs.stat(filePath);
268
+ files.push({
269
+ path: filePath,
270
+ name: path.basename(filePath),
271
+ size: stats.size,
272
+ mtime: stats.mtime,
273
+ event: metadata.event,
274
+ type: 'batch',
275
+ });
276
+
277
+ if (files.length >= batchSize) {
278
+ break;
279
+ }
280
+ } catch (error) {
281
+ logger.debug(
282
+ `[EventHandler] File not accessible for batch: ${filePath}`,
283
+ );
284
+ // Skip files that don't exist or are not accessible
285
+ }
286
+ }
287
+ }
288
+
289
+ logger.info(
290
+ `[EventHandler] Batch upload: ${files.length} files (max ${batchSize}) from ${sourceDir || 'all directories'}`,
291
+ );
292
+ return files;
293
+ } catch (error) {
294
+ logger.error(`Error in #getBatchFiles: ${error.message}`);
295
+ return [];
296
+ }
297
+ }
298
+
299
+ /**
300
+ * Get entire directory structure containing affected files
301
+ * @private
302
+ * @param {Object} options - Options with sourceDir
303
+ * @returns {Promise<Array>} Array of file objects for entire structure
304
+ */
305
+ async #getFullStructureFiles(options = {}) {
306
+ try {
307
+ const sourceDir = options.sourceDir;
308
+ if (!sourceDir) {
309
+ logger.warn(
310
+ '[EventHandler] sourceDir option required for full-structure strategy',
311
+ );
312
+ return [];
313
+ }
314
+
315
+ // Get the top-level directory containing the modified files
316
+ const affectedDirs = new Set();
317
+ for (const filePath of this.recentFiles.keys()) {
318
+ const dir = path.dirname(filePath);
319
+ affectedDirs.add(dir);
320
+ }
321
+
322
+ logger.debug(`[EventHandler] Affected directories: ${affectedDirs.size}`);
323
+
324
+ // For each affected directory, collect all files in its structure
325
+ const files = [];
326
+ const processedFiles = new Set();
327
+
328
+ for (const affectedDir of affectedDirs) {
329
+ await this.#collectDirectoryStructure(
330
+ affectedDir,
331
+ files,
332
+ processedFiles,
333
+ );
334
+ }
335
+
336
+ logger.info(
337
+ `[EventHandler] Full structure upload: ${files.length} files`,
338
+ );
339
+ return files;
340
+ } catch (error) {
341
+ logger.error(`Error in #getFullStructureFiles: ${error.message}`);
342
+ return [];
343
+ }
344
+ }
345
+
346
+ /**
347
+ * Recursively collect all files in a directory structure
348
+ * @private
349
+ * @param {string} dirPath - Directory path
350
+ * @param {Array} files - Array to accumulate files
351
+ * @param {Set} processedFiles - Set of already processed files
352
+ * @returns {Promise<void>}
353
+ */
354
+ async #collectDirectoryStructure(
355
+ dirPath,
356
+ files,
357
+ processedFiles,
358
+ maxDepth = 5,
359
+ currentDepth = 0,
360
+ ) {
361
+ try {
362
+ if (currentDepth > maxDepth) {
363
+ logger.debug(`[EventHandler] Max directory depth reached: ${maxDepth}`);
364
+ return;
365
+ }
366
+
367
+ const entries = await fs.readdir(dirPath, { withFileTypes: true });
368
+
369
+ for (const entry of entries) {
370
+ // Skip hidden files and common ignore patterns
371
+ if (
372
+ entry.name.startsWith('.') ||
373
+ entry.name === 'node_modules' ||
374
+ entry.name === '__pycache__'
375
+ ) {
376
+ continue;
377
+ }
378
+
379
+ const fullPath = path.join(dirPath, entry.name);
380
+
381
+ if (entry.isDirectory()) {
382
+ // Recursively process subdirectories
383
+ await this.#collectDirectoryStructure(
384
+ fullPath,
385
+ files,
386
+ processedFiles,
387
+ maxDepth,
388
+ currentDepth + 1,
389
+ );
390
+ } else if (entry.isFile()) {
391
+ // Add file if not already processed
392
+ if (!processedFiles.has(fullPath)) {
393
+ try {
394
+ const stats = await fs.stat(fullPath);
395
+ files.push({
396
+ path: fullPath,
397
+ name: entry.name,
398
+ size: stats.size,
399
+ mtime: stats.mtime,
400
+ type: 'full-structure',
401
+ relativePath: path.relative(path.dirname(dirPath), fullPath),
402
+ });
403
+ processedFiles.add(fullPath);
404
+ } catch (error) {
405
+ logger.debug(`[EventHandler] Cannot stat file: ${fullPath}`);
406
+ }
407
+ }
408
+ }
409
+ }
410
+ } catch (error) {
411
+ logger.debug(
412
+ `[EventHandler] Error collecting directory: ${dirPath} - ${error.message}`,
413
+ );
414
+ // Don't throw - continue with other directories
415
+ }
416
+ }
417
+
418
+ /**
419
+ * Get all recent events
420
+ * @returns {Array} Array of recent file events
421
+ */
422
+ getRecentEvents() {
423
+ return Array.from(this.recentFiles.entries()).map(
424
+ ([filePath, metadata]) => ({
425
+ path: filePath,
426
+ ...metadata,
427
+ }),
428
+ );
429
+ }
430
+
431
+ /**
432
+ * Clear recent files and batches (after upload)
433
+ * @returns {void}
434
+ */
435
+ clearProcessed() {
436
+ logger.debug(
437
+ `[EventHandler] Clearing ${this.recentFiles.size} recent files`,
438
+ );
439
+ this.recentFiles.clear();
440
+ this.eventBatches.clear();
441
+ }
442
+
443
+ /**
444
+ * Get statistics about current events
445
+ * @returns {Object} Statistics object
446
+ */
447
+ getStats() {
448
+ const adds = Array.from(this.recentFiles.values()).filter(
449
+ (m) => m.event === 'add',
450
+ ).length;
451
+ const changes = Array.from(this.recentFiles.values()).filter(
452
+ (m) => m.event === 'change',
453
+ ).length;
454
+ const unlinks = Array.from(this.recentFiles.values()).filter(
455
+ (m) => m.event === 'unlink',
456
+ ).length;
457
+
458
+ return {
459
+ totalEvents: this.recentFiles.size,
460
+ adds,
461
+ changes,
462
+ unlinks,
463
+ activeBatches: this.eventBatches.size,
464
+ };
465
+ }
466
+
467
+ /**
468
+ * Generate batch key based on time window
469
+ * @private
470
+ * @param {number} timestamp - Event timestamp
471
+ * @returns {string} Batch key
472
+ */
473
+ #generateBatchKey(timestamp) {
474
+ // Group events into 1-second windows
475
+ const windowSize = 1000;
476
+ const windowId = Math.floor(timestamp / windowSize);
477
+ return `batch_${windowId}`;
478
+ }
479
+
480
+ /**
481
+ * Check if a file should be ignored based on patterns
482
+ * @param {string} filePath - File path
483
+ * @param {Array} ignorePatterns - Regex patterns to ignore
484
+ * @returns {boolean} True if file should be ignored
485
+ */
486
+ shouldIgnoreFile(filePath, ignorePatterns = []) {
487
+ if (!ignorePatterns || ignorePatterns.length === 0) {
488
+ return false;
489
+ }
490
+
491
+ const fileName = path.basename(filePath);
492
+ const dirName = path.dirname(filePath);
493
+
494
+ return ignorePatterns.some((pattern) => {
495
+ try {
496
+ const regex = new RegExp(pattern);
497
+ return (
498
+ regex.test(filePath) || regex.test(fileName) || regex.test(dirName)
499
+ );
500
+ } catch (error) {
501
+ logger.warn(
502
+ `[EventHandler] Invalid ignore pattern: ${pattern} - ${error.message}`,
503
+ );
504
+ return false;
505
+ }
506
+ });
507
+ }
508
+
509
+ /**
510
+ * Reset event handler state
511
+ */
512
+ reset() {
513
+ this.recentFiles.clear();
514
+ this.eventBatches.clear();
515
+ this.fileStates.clear();
516
+ this.lastProcessedTime = 0;
517
+ logger.debug('[EventHandler] Handler reset');
518
+ }
519
+ }
520
+
521
+ // Export singleton instance
522
+ export default new WatchEventHandler();