@arela/uploader 0.2.12 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/.env.template +66 -0
  2. package/.vscode/settings.json +1 -0
  3. package/README.md +134 -58
  4. package/SUPABASE_UPLOAD_FIX.md +157 -0
  5. package/package.json +3 -2
  6. package/scripts/cleanup-ds-store.js +109 -0
  7. package/scripts/cleanup-system-files.js +69 -0
  8. package/scripts/tests/phase-7-features.test.js +415 -0
  9. package/scripts/tests/signal-handling.test.js +275 -0
  10. package/scripts/tests/smart-watch-integration.test.js +554 -0
  11. package/scripts/tests/watch-service-integration.test.js +584 -0
  12. package/src/commands/UploadCommand.js +36 -2
  13. package/src/commands/WatchCommand.js +1305 -0
  14. package/src/config/config.js +113 -0
  15. package/src/document-type-shared.js +2 -0
  16. package/src/document-types/support-document.js +201 -0
  17. package/src/file-detection.js +2 -1
  18. package/src/index.js +44 -0
  19. package/src/services/AdvancedFilterService.js +505 -0
  20. package/src/services/AutoProcessingService.js +639 -0
  21. package/src/services/BenchmarkingService.js +381 -0
  22. package/src/services/DatabaseService.js +723 -170
  23. package/src/services/ErrorMonitor.js +275 -0
  24. package/src/services/LoggingService.js +419 -1
  25. package/src/services/MonitoringService.js +401 -0
  26. package/src/services/PerformanceOptimizer.js +511 -0
  27. package/src/services/ReportingService.js +511 -0
  28. package/src/services/SignalHandler.js +255 -0
  29. package/src/services/SmartWatchDatabaseService.js +527 -0
  30. package/src/services/WatchService.js +783 -0
  31. package/src/services/upload/ApiUploadService.js +30 -4
  32. package/src/services/upload/SupabaseUploadService.js +28 -6
  33. package/src/utils/CleanupManager.js +262 -0
  34. package/src/utils/FileOperations.js +41 -0
  35. package/src/utils/WatchEventHandler.js +517 -0
  36. package/supabase/migrations/001_create_initial_schema.sql +366 -0
  37. package/supabase/migrations/002_align_with_arela_api_schema.sql +145 -0
  38. package/commands.md +0 -6
@@ -0,0 +1,517 @@
1
+ import fs from 'fs/promises';
2
+ import path from 'path';
3
+
4
+ import autoProcessingService from '../services/AutoProcessingService.js';
5
+ import logger from '../services/LoggingService.js';
6
+
7
+ /**
8
+ * WatchEventHandler - Processes file system events and manages upload strategy
9
+ * Handles debouncing, file tracking, and determines what files should be uploaded
10
+ */
11
+ export class WatchEventHandler {
12
+ constructor() {
13
+ this.recentFiles = new Map(); // Map<filePath, {event, timestamp, size}>
14
+ this.eventBatches = new Map(); // Map<batchKey, Set<filePath>>
15
+ this.fileStates = new Map(); // Map<filePath, {size, mtime}>
16
+ this.lastProcessedTime = 0;
17
+ this.minBatchInterval = 100; // Minimum ms between batch processing
18
+ }
19
+
20
+ /**
21
+ * Register a file event
22
+ * @param {string} eventType - Type of event (add, change, unlink)
23
+ * @param {string} filePath - Path to the file
24
+ * @param {number} timestamp - Event timestamp
25
+ * @returns {void}
26
+ */
27
+ registerEvent(eventType, filePath, timestamp = Date.now()) {
28
+ try {
29
+ const normalizedPath = path.normalize(filePath);
30
+
31
+ logger.debug(
32
+ `[EventHandler] Registering ${eventType} event: ${normalizedPath}`,
33
+ );
34
+
35
+ // Update recent files map
36
+ this.recentFiles.set(normalizedPath, {
37
+ event: eventType,
38
+ timestamp,
39
+ originalPath: filePath,
40
+ });
41
+
42
+ // Add to current batch
43
+ const batchKey = this.#generateBatchKey(timestamp);
44
+ if (!this.eventBatches.has(batchKey)) {
45
+ this.eventBatches.set(batchKey, new Set());
46
+ }
47
+ this.eventBatches.get(batchKey).add(normalizedPath);
48
+
49
+ logger.debug(
50
+ `[EventHandler] Batch ${batchKey}: ${this.eventBatches.get(batchKey).size} files`,
51
+ );
52
+ } catch (error) {
53
+ logger.error(`Error registering event: ${error.message}`);
54
+ }
55
+ }
56
+
57
+ /**
58
+ * Get files that should be uploaded based on strategy
59
+ * @param {string} strategy - Upload strategy (individual|batch|full-structure)
60
+ * @param {Object} options - Additional options
61
+ * @returns {Promise<Array>} Array of file objects to upload
62
+ */
63
+ async getFilesForUpload(strategy, options = {}) {
64
+ try {
65
+ logger.debug(
66
+ `[EventHandler] Getting files for upload with strategy: ${strategy}`,
67
+ );
68
+
69
+ switch (strategy) {
70
+ case 'individual':
71
+ return await this.#getIndividualFiles(options);
72
+ case 'batch':
73
+ return await this.#getBatchFiles(options);
74
+ case 'full-structure':
75
+ return await this.#getFullStructureFiles(options);
76
+ default:
77
+ throw new Error(`Unknown strategy: ${strategy}`);
78
+ }
79
+ } catch (error) {
80
+ logger.error(`Error getting files for upload: ${error.message}`);
81
+ return [];
82
+ }
83
+ }
84
+
85
+ /**
86
+ * Process newly added files with the automatic 4-step pipeline
87
+ * @param {Object} options - Processing options
88
+ * @param {string} options.watchDir - Watch directory where file was detected
89
+ * @param {string} options.folderStructure - Folder structure for organization
90
+ * @param {number} options.batchSize - Batch size for processing
91
+ * @returns {Promise<Object>} Result from AutoProcessingService
92
+ */
93
+ async processNewFilesWithPipeline(options = {}) {
94
+ try {
95
+ const { watchDir, folderStructure, batchSize = 10 } = options;
96
+
97
+ // Get the most recently added file
98
+ let newestFile = null;
99
+ let newestTime = 0;
100
+
101
+ for (const [filePath, metadata] of this.recentFiles.entries()) {
102
+ if (metadata.event === 'add' && metadata.timestamp > newestTime) {
103
+ newestTime = metadata.timestamp;
104
+ newestFile = filePath;
105
+ }
106
+ }
107
+
108
+ if (!newestFile) {
109
+ logger.warn(
110
+ '[EventHandler] No newly added files found for pipeline processing',
111
+ );
112
+ return {
113
+ success: false,
114
+ reason: 'No new files detected',
115
+ };
116
+ }
117
+
118
+ logger.info(
119
+ `[EventHandler] Processing newly detected file with pipeline: ${newestFile}`,
120
+ );
121
+
122
+ // Execute the 4-step processing pipeline
123
+ const result = await autoProcessingService.executeProcessingPipeline({
124
+ filePath: newestFile,
125
+ watchDir,
126
+ folderStructure,
127
+ batchSize,
128
+ });
129
+
130
+ return result;
131
+ } catch (error) {
132
+ logger.error(
133
+ `[EventHandler] Error processing files with pipeline: ${error.message}`,
134
+ );
135
+ return {
136
+ success: false,
137
+ error: error.message,
138
+ };
139
+ }
140
+ }
141
+
142
+ /**
143
+ * Get individual file (the most recently modified)
144
+ * @private
145
+ * @param {Object} options - Options with sourceDir, etc
146
+ * @returns {Promise<Array>} Array with single file object
147
+ */
148
+ async #getIndividualFiles(options = {}) {
149
+ try {
150
+ // Find the most recently modified file
151
+ let mostRecentFile = null;
152
+ let mostRecentTime = 0;
153
+
154
+ for (const [filePath, metadata] of this.recentFiles.entries()) {
155
+ if (
156
+ metadata.event !== 'unlink' &&
157
+ metadata.timestamp > mostRecentTime
158
+ ) {
159
+ mostRecentTime = metadata.timestamp;
160
+ mostRecentFile = filePath;
161
+ }
162
+ }
163
+
164
+ if (!mostRecentFile) {
165
+ logger.warn(
166
+ '[EventHandler] No valid files found for individual upload',
167
+ );
168
+ return [];
169
+ }
170
+
171
+ // Check if file still exists and is readable
172
+ try {
173
+ const stats = await fs.stat(mostRecentFile);
174
+ const fileObj = {
175
+ path: mostRecentFile,
176
+ name: path.basename(mostRecentFile),
177
+ size: stats.size,
178
+ mtime: stats.mtime,
179
+ type: 'individual',
180
+ };
181
+
182
+ logger.info(
183
+ `[EventHandler] Individual file for upload: ${mostRecentFile} (${stats.size} bytes)`,
184
+ );
185
+ return [fileObj];
186
+ } catch (error) {
187
+ logger.warn(
188
+ `[EventHandler] File not accessible: ${mostRecentFile} - ${error.message}`,
189
+ );
190
+ return [];
191
+ }
192
+ } catch (error) {
193
+ logger.error(`Error in #getIndividualFiles: ${error.message}`);
194
+ return [];
195
+ }
196
+ }
197
+
198
+ /**
199
+ * Get batch of recent files, grouped by directory
200
+ * When processing batch, ensure all files in a directory are processed together
201
+ * This is critical for pedimento detection and document grouping
202
+ * @private
203
+ * @param {Object} options - Options with batchSize, sourceDir
204
+ * @returns {Promise<Array>} Array of file objects
205
+ */
206
+ async #getBatchFiles(options = {}) {
207
+ try {
208
+ const batchSize = options.batchSize || 10;
209
+ const sourceDir = options.sourceDir;
210
+ const files = [];
211
+
212
+ // Get all recent files (valid, non-deleted ones)
213
+ const validFiles = Array.from(this.recentFiles.entries())
214
+ .filter(([_, metadata]) => metadata.event !== 'unlink');
215
+
216
+ if (validFiles.length === 0) {
217
+ logger.debug('[EventHandler] No valid files in batch');
218
+ return [];
219
+ }
220
+
221
+ // If sourceDir provided, get ALL files from that directory
222
+ // This ensures when one file is detected, all related files are processed together
223
+ if (sourceDir) {
224
+ logger.debug(`[EventHandler] Batch: Processing all files in ${sourceDir}`);
225
+
226
+ for (const [filePath, metadata] of validFiles) {
227
+ const fileDir = path.dirname(filePath);
228
+
229
+ // Only include files from the source directory or its subdirectories
230
+ if (filePath.startsWith(sourceDir)) {
231
+ try {
232
+ const stats = await fs.stat(filePath);
233
+ files.push({
234
+ path: filePath,
235
+ name: path.basename(filePath),
236
+ size: stats.size,
237
+ mtime: stats.mtime,
238
+ event: metadata.event,
239
+ type: 'batch',
240
+ });
241
+
242
+ if (files.length >= batchSize) {
243
+ logger.debug(`[EventHandler] Batch size limit (${batchSize}) reached`);
244
+ break;
245
+ }
246
+ } catch (error) {
247
+ logger.debug(
248
+ `[EventHandler] File not accessible for batch: ${filePath}`,
249
+ );
250
+ // Skip files that don't exist or are not accessible
251
+ }
252
+ }
253
+ }
254
+ } else {
255
+ // Fallback: Sort by timestamp (newest first) and take up to batchSize
256
+ const sortedFiles = validFiles.sort(
257
+ ([_, a], [__, b]) => b.timestamp - a.timestamp
258
+ );
259
+
260
+ for (const [filePath, metadata] of sortedFiles) {
261
+ try {
262
+ const stats = await fs.stat(filePath);
263
+ files.push({
264
+ path: filePath,
265
+ name: path.basename(filePath),
266
+ size: stats.size,
267
+ mtime: stats.mtime,
268
+ event: metadata.event,
269
+ type: 'batch',
270
+ });
271
+
272
+ if (files.length >= batchSize) {
273
+ break;
274
+ }
275
+ } catch (error) {
276
+ logger.debug(
277
+ `[EventHandler] File not accessible for batch: ${filePath}`,
278
+ );
279
+ // Skip files that don't exist or are not accessible
280
+ }
281
+ }
282
+ }
283
+
284
+ logger.info(
285
+ `[EventHandler] Batch upload: ${files.length} files (max ${batchSize}) from ${sourceDir || 'all directories'}`,
286
+ );
287
+ return files;
288
+ } catch (error) {
289
+ logger.error(`Error in #getBatchFiles: ${error.message}`);
290
+ return [];
291
+ }
292
+ }
293
+
294
+ /**
295
+ * Get entire directory structure containing affected files
296
+ * @private
297
+ * @param {Object} options - Options with sourceDir
298
+ * @returns {Promise<Array>} Array of file objects for entire structure
299
+ */
300
+ async #getFullStructureFiles(options = {}) {
301
+ try {
302
+ const sourceDir = options.sourceDir;
303
+ if (!sourceDir) {
304
+ logger.warn(
305
+ '[EventHandler] sourceDir option required for full-structure strategy',
306
+ );
307
+ return [];
308
+ }
309
+
310
+ // Get the top-level directory containing the modified files
311
+ const affectedDirs = new Set();
312
+ for (const filePath of this.recentFiles.keys()) {
313
+ const dir = path.dirname(filePath);
314
+ affectedDirs.add(dir);
315
+ }
316
+
317
+ logger.debug(`[EventHandler] Affected directories: ${affectedDirs.size}`);
318
+
319
+ // For each affected directory, collect all files in its structure
320
+ const files = [];
321
+ const processedFiles = new Set();
322
+
323
+ for (const affectedDir of affectedDirs) {
324
+ await this.#collectDirectoryStructure(
325
+ affectedDir,
326
+ files,
327
+ processedFiles,
328
+ );
329
+ }
330
+
331
+ logger.info(
332
+ `[EventHandler] Full structure upload: ${files.length} files`,
333
+ );
334
+ return files;
335
+ } catch (error) {
336
+ logger.error(`Error in #getFullStructureFiles: ${error.message}`);
337
+ return [];
338
+ }
339
+ }
340
+
341
+ /**
342
+ * Recursively collect all files in a directory structure
343
+ * @private
344
+ * @param {string} dirPath - Directory path
345
+ * @param {Array} files - Array to accumulate files
346
+ * @param {Set} processedFiles - Set of already processed files
347
+ * @returns {Promise<void>}
348
+ */
349
+ async #collectDirectoryStructure(
350
+ dirPath,
351
+ files,
352
+ processedFiles,
353
+ maxDepth = 5,
354
+ currentDepth = 0,
355
+ ) {
356
+ try {
357
+ if (currentDepth > maxDepth) {
358
+ logger.debug(`[EventHandler] Max directory depth reached: ${maxDepth}`);
359
+ return;
360
+ }
361
+
362
+ const entries = await fs.readdir(dirPath, { withFileTypes: true });
363
+
364
+ for (const entry of entries) {
365
+ // Skip hidden files and common ignore patterns
366
+ if (
367
+ entry.name.startsWith('.') ||
368
+ entry.name === 'node_modules' ||
369
+ entry.name === '__pycache__'
370
+ ) {
371
+ continue;
372
+ }
373
+
374
+ const fullPath = path.join(dirPath, entry.name);
375
+
376
+ if (entry.isDirectory()) {
377
+ // Recursively process subdirectories
378
+ await this.#collectDirectoryStructure(
379
+ fullPath,
380
+ files,
381
+ processedFiles,
382
+ maxDepth,
383
+ currentDepth + 1,
384
+ );
385
+ } else if (entry.isFile()) {
386
+ // Add file if not already processed
387
+ if (!processedFiles.has(fullPath)) {
388
+ try {
389
+ const stats = await fs.stat(fullPath);
390
+ files.push({
391
+ path: fullPath,
392
+ name: entry.name,
393
+ size: stats.size,
394
+ mtime: stats.mtime,
395
+ type: 'full-structure',
396
+ relativePath: path.relative(path.dirname(dirPath), fullPath),
397
+ });
398
+ processedFiles.add(fullPath);
399
+ } catch (error) {
400
+ logger.debug(`[EventHandler] Cannot stat file: ${fullPath}`);
401
+ }
402
+ }
403
+ }
404
+ }
405
+ } catch (error) {
406
+ logger.debug(
407
+ `[EventHandler] Error collecting directory: ${dirPath} - ${error.message}`,
408
+ );
409
+ // Don't throw - continue with other directories
410
+ }
411
+ }
412
+
413
+ /**
414
+ * Get all recent events
415
+ * @returns {Array} Array of recent file events
416
+ */
417
+ getRecentEvents() {
418
+ return Array.from(this.recentFiles.entries()).map(
419
+ ([filePath, metadata]) => ({
420
+ path: filePath,
421
+ ...metadata,
422
+ }),
423
+ );
424
+ }
425
+
426
+ /**
427
+ * Clear recent files and batches (after upload)
428
+ * @returns {void}
429
+ */
430
+ clearProcessed() {
431
+ logger.debug(
432
+ `[EventHandler] Clearing ${this.recentFiles.size} recent files`,
433
+ );
434
+ this.recentFiles.clear();
435
+ this.eventBatches.clear();
436
+ }
437
+
438
+ /**
439
+ * Get statistics about current events
440
+ * @returns {Object} Statistics object
441
+ */
442
+ getStats() {
443
+ const adds = Array.from(this.recentFiles.values()).filter(
444
+ (m) => m.event === 'add',
445
+ ).length;
446
+ const changes = Array.from(this.recentFiles.values()).filter(
447
+ (m) => m.event === 'change',
448
+ ).length;
449
+ const unlinks = Array.from(this.recentFiles.values()).filter(
450
+ (m) => m.event === 'unlink',
451
+ ).length;
452
+
453
+ return {
454
+ totalEvents: this.recentFiles.size,
455
+ adds,
456
+ changes,
457
+ unlinks,
458
+ activeBatches: this.eventBatches.size,
459
+ };
460
+ }
461
+
462
+ /**
463
+ * Generate batch key based on time window
464
+ * @private
465
+ * @param {number} timestamp - Event timestamp
466
+ * @returns {string} Batch key
467
+ */
468
+ #generateBatchKey(timestamp) {
469
+ // Group events into 1-second windows
470
+ const windowSize = 1000;
471
+ const windowId = Math.floor(timestamp / windowSize);
472
+ return `batch_${windowId}`;
473
+ }
474
+
475
+ /**
476
+ * Check if a file should be ignored based on patterns
477
+ * @param {string} filePath - File path
478
+ * @param {Array} ignorePatterns - Regex patterns to ignore
479
+ * @returns {boolean} True if file should be ignored
480
+ */
481
+ shouldIgnoreFile(filePath, ignorePatterns = []) {
482
+ if (!ignorePatterns || ignorePatterns.length === 0) {
483
+ return false;
484
+ }
485
+
486
+ const fileName = path.basename(filePath);
487
+ const dirName = path.dirname(filePath);
488
+
489
+ return ignorePatterns.some((pattern) => {
490
+ try {
491
+ const regex = new RegExp(pattern);
492
+ return (
493
+ regex.test(filePath) || regex.test(fileName) || regex.test(dirName)
494
+ );
495
+ } catch (error) {
496
+ logger.warn(
497
+ `[EventHandler] Invalid ignore pattern: ${pattern} - ${error.message}`,
498
+ );
499
+ return false;
500
+ }
501
+ });
502
+ }
503
+
504
+ /**
505
+ * Reset event handler state
506
+ */
507
+ reset() {
508
+ this.recentFiles.clear();
509
+ this.eventBatches.clear();
510
+ this.fileStates.clear();
511
+ this.lastProcessedTime = 0;
512
+ logger.debug('[EventHandler] Handler reset');
513
+ }
514
+ }
515
+
516
+ // Export singleton instance
517
+ export default new WatchEventHandler();