@arela/uploader 0.2.12 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/.env.template +66 -0
  2. package/.vscode/settings.json +1 -0
  3. package/README.md +134 -58
  4. package/SUPABASE_UPLOAD_FIX.md +157 -0
  5. package/package.json +3 -2
  6. package/scripts/cleanup-ds-store.js +109 -0
  7. package/scripts/cleanup-system-files.js +69 -0
  8. package/scripts/tests/phase-7-features.test.js +415 -0
  9. package/scripts/tests/signal-handling.test.js +275 -0
  10. package/scripts/tests/smart-watch-integration.test.js +554 -0
  11. package/scripts/tests/watch-service-integration.test.js +584 -0
  12. package/src/commands/UploadCommand.js +36 -2
  13. package/src/commands/WatchCommand.js +1305 -0
  14. package/src/config/config.js +113 -0
  15. package/src/document-type-shared.js +2 -0
  16. package/src/document-types/support-document.js +201 -0
  17. package/src/file-detection.js +2 -1
  18. package/src/index.js +44 -0
  19. package/src/services/AdvancedFilterService.js +505 -0
  20. package/src/services/AutoProcessingService.js +639 -0
  21. package/src/services/BenchmarkingService.js +381 -0
  22. package/src/services/DatabaseService.js +723 -170
  23. package/src/services/ErrorMonitor.js +275 -0
  24. package/src/services/LoggingService.js +419 -1
  25. package/src/services/MonitoringService.js +401 -0
  26. package/src/services/PerformanceOptimizer.js +511 -0
  27. package/src/services/ReportingService.js +511 -0
  28. package/src/services/SignalHandler.js +255 -0
  29. package/src/services/SmartWatchDatabaseService.js +527 -0
  30. package/src/services/WatchService.js +783 -0
  31. package/src/services/upload/ApiUploadService.js +30 -4
  32. package/src/services/upload/SupabaseUploadService.js +28 -6
  33. package/src/utils/CleanupManager.js +262 -0
  34. package/src/utils/FileOperations.js +41 -0
  35. package/src/utils/WatchEventHandler.js +517 -0
  36. package/supabase/migrations/001_create_initial_schema.sql +366 -0
  37. package/supabase/migrations/002_align_with_arela_api_schema.sql +145 -0
  38. package/commands.md +0 -6
@@ -16,6 +16,7 @@ class Config {
16
16
  this.upload = this.#loadUploadConfig();
17
17
  this.performance = this.#loadPerformanceConfig();
18
18
  this.logging = this.#loadLoggingConfig();
19
+ this.watch = this.#loadWatchConfig();
19
20
  }
20
21
 
21
22
  /**
@@ -114,6 +115,58 @@ class Config {
114
115
  };
115
116
  }
116
117
 
118
+ /**
119
+ * Load watch configuration
120
+ * @private
121
+ */
122
+ #loadWatchConfig() {
123
+ // Parse directory configs from JSON format
124
+ let directoryConfigs = {};
125
+ if (process.env.WATCH_DIRECTORY_CONFIGS) {
126
+ try {
127
+ directoryConfigs = JSON.parse(process.env.WATCH_DIRECTORY_CONFIGS);
128
+ } catch (error) {
129
+ console.warn(
130
+ '⚠️ Invalid JSON in WATCH_DIRECTORY_CONFIGS, using empty config',
131
+ );
132
+ directoryConfigs = {};
133
+ }
134
+ }
135
+
136
+ // Fallback to WATCH_DIRECTORIES for backward compatibility
137
+ let directories = [];
138
+ if (
139
+ Object.keys(directoryConfigs).length === 0 &&
140
+ process.env.WATCH_DIRECTORIES
141
+ ) {
142
+ directories = process.env.WATCH_DIRECTORIES.split(',')
143
+ .map((d) => d.trim())
144
+ .filter(Boolean);
145
+ }
146
+
147
+ // Parse ignore patterns
148
+ const ignorePatterns =
149
+ process.env.WATCH_IGNORE_PATTERNS?.split(',')
150
+ .map((p) => p.trim())
151
+ .filter(Boolean) || [];
152
+
153
+ return {
154
+ enabled: process.env.WATCH_ENABLED === 'true' || false,
155
+ directories,
156
+ directoryConfigs,
157
+ strategy: process.env.WATCH_STRATEGY || 'batch',
158
+ debounceMs: parseInt(process.env.WATCH_DEBOUNCE_MS) || 1000,
159
+ batchSize: parseInt(process.env.WATCH_BATCH_SIZE) || 10,
160
+ usePolling: process.env.WATCH_USE_POLLING === 'true' || false,
161
+ pollInterval: parseInt(process.env.WATCH_POLL_INTERVAL) || 100,
162
+ stabilityThreshold:
163
+ parseInt(process.env.WATCH_STABILITY_THRESHOLD) || 300,
164
+ ignorePatterns,
165
+ autoDetect: process.env.WATCH_AUTO_DETECT === 'true' || false,
166
+ autoOrganize: process.env.WATCH_AUTO_ORGANIZE === 'true' || false,
167
+ };
168
+ }
169
+
117
170
  /**
118
171
  * Check if API mode is available
119
172
  * @returns {boolean}
@@ -181,6 +234,66 @@ class Config {
181
234
  }
182
235
  return this.upload.basePath;
183
236
  }
237
+
238
+ /**
239
+ * Get watch directories with validation
240
+ * @param {string[]} cliDirs - Directories from CLI options
241
+ * @returns {string[]} Validated watch directories
242
+ * @throws {Error} If no directories are configured
243
+ */
244
+ getWatchDirectories(cliDirs = null) {
245
+ // CLI options take precedence over environment variables
246
+ const directories =
247
+ cliDirs && cliDirs.length > 0 ? cliDirs : this.watch.directories;
248
+
249
+ if (!directories || directories.length === 0) {
250
+ throw new Error(
251
+ '⚠️ No watch directories configured. Please use --directories option or set WATCH_DIRECTORIES environment variable.',
252
+ );
253
+ }
254
+
255
+ return directories;
256
+ }
257
+
258
+ /**
259
+ * Get watch configuration with validation
260
+ * @returns {Object} Watch configuration object
261
+ */
262
+ getWatchConfig() {
263
+ return {
264
+ ...this.watch,
265
+ };
266
+ }
267
+
268
+ /**
269
+ * Validate watch configuration
270
+ * @param {string[]} directories - Directories to validate
271
+ * @throws {Error} If configuration is invalid
272
+ */
273
+ validateWatchConfig(directories) {
274
+ if (!directories || directories.length === 0) {
275
+ throw new Error(
276
+ 'At least one directory must be specified for watch mode',
277
+ );
278
+ }
279
+
280
+ // Validate strategy
281
+ const validStrategies = ['individual', 'batch', 'full-structure'];
282
+ if (!validStrategies.includes(this.watch.strategy)) {
283
+ throw new Error(
284
+ `Invalid watch strategy: ${this.watch.strategy}. Must be one of: ${validStrategies.join(', ')}`,
285
+ );
286
+ }
287
+
288
+ // Validate debounce and batch size
289
+ if (this.watch.debounceMs < 0) {
290
+ throw new Error('Debounce time must be >= 0');
291
+ }
292
+
293
+ if (this.watch.batchSize < 1) {
294
+ throw new Error('Batch size must be >= 1');
295
+ }
296
+ }
184
297
  }
185
298
 
186
299
  // Export singleton instance
@@ -1,5 +1,6 @@
1
1
  // Import all document type definitions
2
2
  import { pedimentoSimplificadoDefinition } from './document-types/pedimento-simplificado.js';
3
+ import { supportDocumentDefinition } from './document-types/support-document.js';
3
4
 
4
5
  // Document type definitions and extraction utilities
5
6
  // Ported from TypeScript to JavaScript for Node.js
@@ -33,6 +34,7 @@ export class DocumentTypeDefinition {
33
34
  // Registry of all document types
34
35
  const documentTypes = [
35
36
  pedimentoSimplificadoDefinition,
37
+ supportDocumentDefinition,
36
38
  // Add more document types here as needed
37
39
  ];
38
40
 
@@ -0,0 +1,201 @@
1
+ import { FieldResult } from '../document-type-shared.js';
2
+
3
+ /**
4
+ * Support Document Definition
5
+ * Detects XML and other supporting documents related to customs operations
6
+ * These documents are typically metadata or supporting files that accompany pedimento simplificado
7
+ */
8
+ export const supportDocumentDefinition = {
9
+ type: 'support_document',
10
+ extensions: ['xml', 'txt', 'json'],
11
+ match: (source) => {
12
+ // Detect SOAP/XML structures common in customs systems
13
+ const soapClues = [
14
+ /soapenv:Envelope/i,
15
+ /xmlns:soapenv=/i,
16
+ /solicitarRecibirCoveServicio/i,
17
+ /tipoOperacion/i,
18
+ /patenteAduanal/i,
19
+ ];
20
+
21
+ // Detect customs-related metadata
22
+ const customsClues = [
23
+ /rfc|RFC/,
24
+ /patente|aduana|customs|pedimento/i,
25
+ ];
26
+
27
+ const soapFound = soapClues.filter((clue) => clue.test(source)).length;
28
+ const customsFound = customsClues.filter((clue) => clue.test(source)).length;
29
+
30
+ // Must have SOAP structure OR customs metadata
31
+ return soapFound >= 2 || customsFound >= 2;
32
+ },
33
+
34
+ extractNumPedimento: (source, fields) => {
35
+ // Try to extract pedimento from various XML/text formats
36
+ const patterns = [
37
+ // SOAP format with tipoOperacion
38
+ /tipoOperacion[^>]*>([A-Z]{4}[^<]*)<\/oxml:tipoOperacion/i,
39
+ // Pedimento number in XML
40
+ /numPedimento[^>]*>(\d{15})<\/\w+:numPedimento/i,
41
+ // Generic pattern
42
+ /pedimento[:\s]*(\d{15})/i,
43
+ ];
44
+
45
+ for (const pattern of patterns) {
46
+ const match = source.match(pattern);
47
+ if (match) {
48
+ return match[1];
49
+ }
50
+ }
51
+
52
+ return null;
53
+ },
54
+
55
+ extractPedimentoYear: (source, fields) => {
56
+ const numPedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
57
+ if (!numPedimento) {
58
+ // Try to extract year from date in XML
59
+ const dateMatch = source.match(/(\d{4})-\d{2}-\d{2}/);
60
+ if (dateMatch) {
61
+ return parseInt(dateMatch[1], 10);
62
+ }
63
+ return null;
64
+ }
65
+
66
+ const year = parseInt(numPedimento.substring(0, 2), 10);
67
+ return year < 50 ? year + 2000 : year + 1900;
68
+ },
69
+
70
+ extractors: [
71
+ // RFC (Registro Federal de Contribuyentes)
72
+ {
73
+ field: 'rfc',
74
+ extract: (source) => {
75
+ const patterns = [
76
+ /rfc[^>]*>([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})<\/\w+:rfc/i,
77
+ /rfcConsulta[^>]*>([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})<\/\w+:rfcConsulta/i,
78
+ /RFC[:\s]*([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})/,
79
+ ];
80
+
81
+ for (const pattern of patterns) {
82
+ const match = source.match(pattern);
83
+ if (match) {
84
+ return new FieldResult('rfc', true, match[1]);
85
+ }
86
+ }
87
+
88
+ return new FieldResult('rfc', false, null);
89
+ },
90
+ },
91
+
92
+ // Patente Aduanal
93
+ {
94
+ field: 'patente',
95
+ extract: (source) => {
96
+ const patterns = [
97
+ /patenteAduanal[^>]*>(\d{4})<\/\w+:patenteAduanal/i,
98
+ /patente[:\s]*(\d{4})/i,
99
+ ];
100
+
101
+ for (const pattern of patterns) {
102
+ const match = source.match(pattern);
103
+ if (match) {
104
+ return new FieldResult('patente', true, match[1]);
105
+ }
106
+ }
107
+
108
+ return new FieldResult('patente', false, null);
109
+ },
110
+ },
111
+
112
+ // Aduana
113
+ {
114
+ field: 'aduanaEntradaSalida',
115
+ extract: (source) => {
116
+ const patterns = [
117
+ /aduanaEntradaSalida[^>]*>(\d{1,2})<\/\w+:aduanaEntradaSalida/i,
118
+ /aduana[:\s]*(\d{1,2})/i,
119
+ ];
120
+
121
+ for (const pattern of patterns) {
122
+ const match = source.match(pattern);
123
+ if (match) {
124
+ return new FieldResult('aduanaEntradaSalida', true, match[1]);
125
+ }
126
+ }
127
+
128
+ return new FieldResult('aduanaEntradaSalida', false, null);
129
+ },
130
+ },
131
+
132
+ // Pedimento Number
133
+ {
134
+ field: 'numPedimento',
135
+ extract: (source) => {
136
+ const patterns = [
137
+ /numPedimento[^>]*>(\d{15})<\/\w+:numPedimento/i,
138
+ /pedimento[:\s]*(\d{15})/i,
139
+ ];
140
+
141
+ for (const pattern of patterns) {
142
+ const match = source.match(pattern);
143
+ if (match) {
144
+ return new FieldResult('numPedimento', true, match[1]);
145
+ }
146
+ }
147
+
148
+ return new FieldResult('numPedimento', false, null);
149
+ },
150
+ },
151
+
152
+ // Email/Contact
153
+ {
154
+ field: 'email',
155
+ extract: (source) => {
156
+ const match = source.match(/[\w.-]+@[\w.-]+\.\w+/);
157
+ return new FieldResult('email', !!match, match ? match[0] : null);
158
+ },
159
+ },
160
+
161
+ // Type of Operation
162
+ {
163
+ field: 'tipoOperacion',
164
+ extract: (source) => {
165
+ const patterns = [
166
+ /tipoOperacion[^>]*>([A-Z]{4}[^<]*)<\/\w+:tipoOperacion/i,
167
+ /tipoOperacion[:\s]*([A-Z]{4})/i,
168
+ ];
169
+
170
+ for (const pattern of patterns) {
171
+ const match = source.match(pattern);
172
+ if (match) {
173
+ return new FieldResult('tipoOperacion', true, match[1]);
174
+ }
175
+ }
176
+
177
+ return new FieldResult('tipoOperacion', false, null);
178
+ },
179
+ },
180
+
181
+ // Document Date
182
+ {
183
+ field: 'documentDate',
184
+ extract: (source) => {
185
+ const patterns = [
186
+ /fechaExpedicion[^>]*>(\d{4}-\d{2}-\d{2})<\/\w+:fechaExpedicion/i,
187
+ /fecha[:\s]*(\d{4}-\d{2}-\d{2})/i,
188
+ ];
189
+
190
+ for (const pattern of patterns) {
191
+ const match = source.match(pattern);
192
+ if (match) {
193
+ return new FieldResult('documentDate', true, match[1]);
194
+ }
195
+ }
196
+
197
+ return new FieldResult('documentDate', false, null);
198
+ },
199
+ },
200
+ ],
201
+ };
@@ -191,7 +191,8 @@ export class FileDetectionService {
191
191
  */
192
192
  isSupportedFileType(filePath) {
193
193
  const fileExtension = path.extname(filePath).toLowerCase().replace('.', '');
194
- const supportedExtensions = ['pdf'];
194
+ // Support PDF (main documents), XML (metadata/supporting docs), and TXT
195
+ const supportedExtensions = ['pdf', 'xml', 'txt'];
195
196
  return supportedExtensions.includes(fileExtension);
196
197
  }
197
198
 
package/src/index.js CHANGED
@@ -2,6 +2,7 @@
2
2
  import { Command } from 'commander';
3
3
 
4
4
  import UploadCommand from './commands/UploadCommand.js';
5
+ import watchCommand from './commands/WatchCommand.js';
5
6
  import appConfig from './config/config.js';
6
7
  import ErrorHandler from './errors/ErrorHandler.js';
7
8
  import logger from './services/LoggingService.js';
@@ -15,6 +16,7 @@ class ArelaUploaderCLI {
15
16
  this.program = new Command();
16
17
  this.errorHandler = new ErrorHandler(logger);
17
18
  this.uploadCommand = new UploadCommand();
19
+ this.watchCommand = watchCommand;
18
20
 
19
21
  this.#setupProgram();
20
22
  this.#setupCommands();
@@ -229,6 +231,48 @@ class ArelaUploaderCLI {
229
231
  }
230
232
  });
231
233
 
234
+ // Watch command
235
+ this.program
236
+ .command('watch')
237
+ .description(
238
+ 'Monitor directories for file changes and upload automatically',
239
+ )
240
+ .option(
241
+ '-d, --directories <paths>',
242
+ 'Comma-separated directories to watch',
243
+ )
244
+ .option(
245
+ '-s, --strategy <strategy>',
246
+ 'Upload strategy: individual|batch|full-structure',
247
+ 'batch',
248
+ )
249
+ .option('--debounce <ms>', 'Debounce delay in milliseconds', '1000')
250
+ .option(
251
+ '-b, --batch-size <size>',
252
+ 'Number of files to process in each batch',
253
+ '10',
254
+ )
255
+ .option(
256
+ '--poll <ms>',
257
+ 'Use polling instead of native file system events (interval in ms)',
258
+ )
259
+ .option('--ignore <patterns>', 'Comma-separated patterns to ignore')
260
+ .option('--auto-detect', 'Enable automatic document type detection')
261
+ .option('--auto-organize', 'Enable automatic file organization')
262
+ .option(
263
+ '--auto-processing',
264
+ 'Enable automatic 4-step pipeline (stats, detect, propagate, upload)',
265
+ )
266
+ .option('--dry-run', 'Simulate changes without uploading')
267
+ .option('--verbose', 'Enable verbose logging')
268
+ .action(async (options) => {
269
+ try {
270
+ await this.watchCommand.execute(options);
271
+ } catch (error) {
272
+ this.errorHandler.handleFatalError(error, { command: 'watch' });
273
+ }
274
+ });
275
+
232
276
  // Version command (already handled by program.version())
233
277
 
234
278
  // Help command