@arela/uploader 0.2.12 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.template +66 -0
- package/.vscode/settings.json +1 -0
- package/README.md +134 -58
- package/SUPABASE_UPLOAD_FIX.md +157 -0
- package/package.json +3 -2
- package/scripts/cleanup-ds-store.js +109 -0
- package/scripts/cleanup-system-files.js +69 -0
- package/scripts/tests/phase-7-features.test.js +415 -0
- package/scripts/tests/signal-handling.test.js +275 -0
- package/scripts/tests/smart-watch-integration.test.js +554 -0
- package/scripts/tests/watch-service-integration.test.js +584 -0
- package/src/commands/UploadCommand.js +36 -2
- package/src/commands/WatchCommand.js +1305 -0
- package/src/config/config.js +113 -0
- package/src/document-type-shared.js +2 -0
- package/src/document-types/support-document.js +201 -0
- package/src/file-detection.js +2 -1
- package/src/index.js +44 -0
- package/src/services/AdvancedFilterService.js +505 -0
- package/src/services/AutoProcessingService.js +639 -0
- package/src/services/BenchmarkingService.js +381 -0
- package/src/services/DatabaseService.js +723 -170
- package/src/services/ErrorMonitor.js +275 -0
- package/src/services/LoggingService.js +419 -1
- package/src/services/MonitoringService.js +401 -0
- package/src/services/PerformanceOptimizer.js +511 -0
- package/src/services/ReportingService.js +511 -0
- package/src/services/SignalHandler.js +255 -0
- package/src/services/SmartWatchDatabaseService.js +527 -0
- package/src/services/WatchService.js +783 -0
- package/src/services/upload/ApiUploadService.js +30 -4
- package/src/services/upload/SupabaseUploadService.js +28 -6
- package/src/utils/CleanupManager.js +262 -0
- package/src/utils/FileOperations.js +41 -0
- package/src/utils/WatchEventHandler.js +517 -0
- package/supabase/migrations/001_create_initial_schema.sql +366 -0
- package/supabase/migrations/002_align_with_arela_api_schema.sql +145 -0
- package/commands.md +0 -6
package/src/config/config.js
CHANGED
|
@@ -16,6 +16,7 @@ class Config {
|
|
|
16
16
|
this.upload = this.#loadUploadConfig();
|
|
17
17
|
this.performance = this.#loadPerformanceConfig();
|
|
18
18
|
this.logging = this.#loadLoggingConfig();
|
|
19
|
+
this.watch = this.#loadWatchConfig();
|
|
19
20
|
}
|
|
20
21
|
|
|
21
22
|
/**
|
|
@@ -114,6 +115,58 @@ class Config {
|
|
|
114
115
|
};
|
|
115
116
|
}
|
|
116
117
|
|
|
118
|
+
/**
|
|
119
|
+
* Load watch configuration
|
|
120
|
+
* @private
|
|
121
|
+
*/
|
|
122
|
+
#loadWatchConfig() {
|
|
123
|
+
// Parse directory configs from JSON format
|
|
124
|
+
let directoryConfigs = {};
|
|
125
|
+
if (process.env.WATCH_DIRECTORY_CONFIGS) {
|
|
126
|
+
try {
|
|
127
|
+
directoryConfigs = JSON.parse(process.env.WATCH_DIRECTORY_CONFIGS);
|
|
128
|
+
} catch (error) {
|
|
129
|
+
console.warn(
|
|
130
|
+
'⚠️ Invalid JSON in WATCH_DIRECTORY_CONFIGS, using empty config',
|
|
131
|
+
);
|
|
132
|
+
directoryConfigs = {};
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Fallback to WATCH_DIRECTORIES for backward compatibility
|
|
137
|
+
let directories = [];
|
|
138
|
+
if (
|
|
139
|
+
Object.keys(directoryConfigs).length === 0 &&
|
|
140
|
+
process.env.WATCH_DIRECTORIES
|
|
141
|
+
) {
|
|
142
|
+
directories = process.env.WATCH_DIRECTORIES.split(',')
|
|
143
|
+
.map((d) => d.trim())
|
|
144
|
+
.filter(Boolean);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Parse ignore patterns
|
|
148
|
+
const ignorePatterns =
|
|
149
|
+
process.env.WATCH_IGNORE_PATTERNS?.split(',')
|
|
150
|
+
.map((p) => p.trim())
|
|
151
|
+
.filter(Boolean) || [];
|
|
152
|
+
|
|
153
|
+
return {
|
|
154
|
+
enabled: process.env.WATCH_ENABLED === 'true' || false,
|
|
155
|
+
directories,
|
|
156
|
+
directoryConfigs,
|
|
157
|
+
strategy: process.env.WATCH_STRATEGY || 'batch',
|
|
158
|
+
debounceMs: parseInt(process.env.WATCH_DEBOUNCE_MS) || 1000,
|
|
159
|
+
batchSize: parseInt(process.env.WATCH_BATCH_SIZE) || 10,
|
|
160
|
+
usePolling: process.env.WATCH_USE_POLLING === 'true' || false,
|
|
161
|
+
pollInterval: parseInt(process.env.WATCH_POLL_INTERVAL) || 100,
|
|
162
|
+
stabilityThreshold:
|
|
163
|
+
parseInt(process.env.WATCH_STABILITY_THRESHOLD) || 300,
|
|
164
|
+
ignorePatterns,
|
|
165
|
+
autoDetect: process.env.WATCH_AUTO_DETECT === 'true' || false,
|
|
166
|
+
autoOrganize: process.env.WATCH_AUTO_ORGANIZE === 'true' || false,
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
|
|
117
170
|
/**
|
|
118
171
|
* Check if API mode is available
|
|
119
172
|
* @returns {boolean}
|
|
@@ -181,6 +234,66 @@ class Config {
|
|
|
181
234
|
}
|
|
182
235
|
return this.upload.basePath;
|
|
183
236
|
}
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* Get watch directories with validation
|
|
240
|
+
* @param {string[]} cliDirs - Directories from CLI options
|
|
241
|
+
* @returns {string[]} Validated watch directories
|
|
242
|
+
* @throws {Error} If no directories are configured
|
|
243
|
+
*/
|
|
244
|
+
getWatchDirectories(cliDirs = null) {
|
|
245
|
+
// CLI options take precedence over environment variables
|
|
246
|
+
const directories =
|
|
247
|
+
cliDirs && cliDirs.length > 0 ? cliDirs : this.watch.directories;
|
|
248
|
+
|
|
249
|
+
if (!directories || directories.length === 0) {
|
|
250
|
+
throw new Error(
|
|
251
|
+
'⚠️ No watch directories configured. Please use --directories option or set WATCH_DIRECTORIES environment variable.',
|
|
252
|
+
);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
return directories;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Get watch configuration with validation
|
|
260
|
+
* @returns {Object} Watch configuration object
|
|
261
|
+
*/
|
|
262
|
+
getWatchConfig() {
|
|
263
|
+
return {
|
|
264
|
+
...this.watch,
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Validate watch configuration
|
|
270
|
+
* @param {string[]} directories - Directories to validate
|
|
271
|
+
* @throws {Error} If configuration is invalid
|
|
272
|
+
*/
|
|
273
|
+
validateWatchConfig(directories) {
|
|
274
|
+
if (!directories || directories.length === 0) {
|
|
275
|
+
throw new Error(
|
|
276
|
+
'At least one directory must be specified for watch mode',
|
|
277
|
+
);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// Validate strategy
|
|
281
|
+
const validStrategies = ['individual', 'batch', 'full-structure'];
|
|
282
|
+
if (!validStrategies.includes(this.watch.strategy)) {
|
|
283
|
+
throw new Error(
|
|
284
|
+
`Invalid watch strategy: ${this.watch.strategy}. Must be one of: ${validStrategies.join(', ')}`,
|
|
285
|
+
);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Validate debounce and batch size
|
|
289
|
+
if (this.watch.debounceMs < 0) {
|
|
290
|
+
throw new Error('Debounce time must be >= 0');
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
if (this.watch.batchSize < 1) {
|
|
294
|
+
throw new Error('Batch size must be >= 1');
|
|
295
|
+
}
|
|
296
|
+
}
|
|
184
297
|
}
|
|
185
298
|
|
|
186
299
|
// Export singleton instance
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
// Import all document type definitions
|
|
2
2
|
import { pedimentoSimplificadoDefinition } from './document-types/pedimento-simplificado.js';
|
|
3
|
+
import { supportDocumentDefinition } from './document-types/support-document.js';
|
|
3
4
|
|
|
4
5
|
// Document type definitions and extraction utilities
|
|
5
6
|
// Ported from TypeScript to JavaScript for Node.js
|
|
@@ -33,6 +34,7 @@ export class DocumentTypeDefinition {
|
|
|
33
34
|
// Registry of all document types
|
|
34
35
|
const documentTypes = [
|
|
35
36
|
pedimentoSimplificadoDefinition,
|
|
37
|
+
supportDocumentDefinition,
|
|
36
38
|
// Add more document types here as needed
|
|
37
39
|
];
|
|
38
40
|
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import { FieldResult } from '../document-type-shared.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Support Document Definition
|
|
5
|
+
* Detects XML and other supporting documents related to customs operations
|
|
6
|
+
* These documents are typically metadata or supporting files that accompany pedimento simplificado
|
|
7
|
+
*/
|
|
8
|
+
export const supportDocumentDefinition = {
|
|
9
|
+
type: 'support_document',
|
|
10
|
+
extensions: ['xml', 'txt', 'json'],
|
|
11
|
+
match: (source) => {
|
|
12
|
+
// Detect SOAP/XML structures common in customs systems
|
|
13
|
+
const soapClues = [
|
|
14
|
+
/soapenv:Envelope/i,
|
|
15
|
+
/xmlns:soapenv=/i,
|
|
16
|
+
/solicitarRecibirCoveServicio/i,
|
|
17
|
+
/tipoOperacion/i,
|
|
18
|
+
/patenteAduanal/i,
|
|
19
|
+
];
|
|
20
|
+
|
|
21
|
+
// Detect customs-related metadata
|
|
22
|
+
const customsClues = [
|
|
23
|
+
/rfc|RFC/,
|
|
24
|
+
/patente|aduana|customs|pedimento/i,
|
|
25
|
+
];
|
|
26
|
+
|
|
27
|
+
const soapFound = soapClues.filter((clue) => clue.test(source)).length;
|
|
28
|
+
const customsFound = customsClues.filter((clue) => clue.test(source)).length;
|
|
29
|
+
|
|
30
|
+
// Must have SOAP structure OR customs metadata
|
|
31
|
+
return soapFound >= 2 || customsFound >= 2;
|
|
32
|
+
},
|
|
33
|
+
|
|
34
|
+
extractNumPedimento: (source, fields) => {
|
|
35
|
+
// Try to extract pedimento from various XML/text formats
|
|
36
|
+
const patterns = [
|
|
37
|
+
// SOAP format with tipoOperacion
|
|
38
|
+
/tipoOperacion[^>]*>([A-Z]{4}[^<]*)<\/oxml:tipoOperacion/i,
|
|
39
|
+
// Pedimento number in XML
|
|
40
|
+
/numPedimento[^>]*>(\d{15})<\/\w+:numPedimento/i,
|
|
41
|
+
// Generic pattern
|
|
42
|
+
/pedimento[:\s]*(\d{15})/i,
|
|
43
|
+
];
|
|
44
|
+
|
|
45
|
+
for (const pattern of patterns) {
|
|
46
|
+
const match = source.match(pattern);
|
|
47
|
+
if (match) {
|
|
48
|
+
return match[1];
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return null;
|
|
53
|
+
},
|
|
54
|
+
|
|
55
|
+
extractPedimentoYear: (source, fields) => {
|
|
56
|
+
const numPedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
|
|
57
|
+
if (!numPedimento) {
|
|
58
|
+
// Try to extract year from date in XML
|
|
59
|
+
const dateMatch = source.match(/(\d{4})-\d{2}-\d{2}/);
|
|
60
|
+
if (dateMatch) {
|
|
61
|
+
return parseInt(dateMatch[1], 10);
|
|
62
|
+
}
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const year = parseInt(numPedimento.substring(0, 2), 10);
|
|
67
|
+
return year < 50 ? year + 2000 : year + 1900;
|
|
68
|
+
},
|
|
69
|
+
|
|
70
|
+
extractors: [
|
|
71
|
+
// RFC (Registro Federal de Contribuyentes)
|
|
72
|
+
{
|
|
73
|
+
field: 'rfc',
|
|
74
|
+
extract: (source) => {
|
|
75
|
+
const patterns = [
|
|
76
|
+
/rfc[^>]*>([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})<\/\w+:rfc/i,
|
|
77
|
+
/rfcConsulta[^>]*>([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})<\/\w+:rfcConsulta/i,
|
|
78
|
+
/RFC[:\s]*([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})/,
|
|
79
|
+
];
|
|
80
|
+
|
|
81
|
+
for (const pattern of patterns) {
|
|
82
|
+
const match = source.match(pattern);
|
|
83
|
+
if (match) {
|
|
84
|
+
return new FieldResult('rfc', true, match[1]);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return new FieldResult('rfc', false, null);
|
|
89
|
+
},
|
|
90
|
+
},
|
|
91
|
+
|
|
92
|
+
// Patente Aduanal
|
|
93
|
+
{
|
|
94
|
+
field: 'patente',
|
|
95
|
+
extract: (source) => {
|
|
96
|
+
const patterns = [
|
|
97
|
+
/patenteAduanal[^>]*>(\d{4})<\/\w+:patenteAduanal/i,
|
|
98
|
+
/patente[:\s]*(\d{4})/i,
|
|
99
|
+
];
|
|
100
|
+
|
|
101
|
+
for (const pattern of patterns) {
|
|
102
|
+
const match = source.match(pattern);
|
|
103
|
+
if (match) {
|
|
104
|
+
return new FieldResult('patente', true, match[1]);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return new FieldResult('patente', false, null);
|
|
109
|
+
},
|
|
110
|
+
},
|
|
111
|
+
|
|
112
|
+
// Aduana
|
|
113
|
+
{
|
|
114
|
+
field: 'aduanaEntradaSalida',
|
|
115
|
+
extract: (source) => {
|
|
116
|
+
const patterns = [
|
|
117
|
+
/aduanaEntradaSalida[^>]*>(\d{1,2})<\/\w+:aduanaEntradaSalida/i,
|
|
118
|
+
/aduana[:\s]*(\d{1,2})/i,
|
|
119
|
+
];
|
|
120
|
+
|
|
121
|
+
for (const pattern of patterns) {
|
|
122
|
+
const match = source.match(pattern);
|
|
123
|
+
if (match) {
|
|
124
|
+
return new FieldResult('aduanaEntradaSalida', true, match[1]);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return new FieldResult('aduanaEntradaSalida', false, null);
|
|
129
|
+
},
|
|
130
|
+
},
|
|
131
|
+
|
|
132
|
+
// Pedimento Number
|
|
133
|
+
{
|
|
134
|
+
field: 'numPedimento',
|
|
135
|
+
extract: (source) => {
|
|
136
|
+
const patterns = [
|
|
137
|
+
/numPedimento[^>]*>(\d{15})<\/\w+:numPedimento/i,
|
|
138
|
+
/pedimento[:\s]*(\d{15})/i,
|
|
139
|
+
];
|
|
140
|
+
|
|
141
|
+
for (const pattern of patterns) {
|
|
142
|
+
const match = source.match(pattern);
|
|
143
|
+
if (match) {
|
|
144
|
+
return new FieldResult('numPedimento', true, match[1]);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return new FieldResult('numPedimento', false, null);
|
|
149
|
+
},
|
|
150
|
+
},
|
|
151
|
+
|
|
152
|
+
// Email/Contact
|
|
153
|
+
{
|
|
154
|
+
field: 'email',
|
|
155
|
+
extract: (source) => {
|
|
156
|
+
const match = source.match(/[\w.-]+@[\w.-]+\.\w+/);
|
|
157
|
+
return new FieldResult('email', !!match, match ? match[0] : null);
|
|
158
|
+
},
|
|
159
|
+
},
|
|
160
|
+
|
|
161
|
+
// Type of Operation
|
|
162
|
+
{
|
|
163
|
+
field: 'tipoOperacion',
|
|
164
|
+
extract: (source) => {
|
|
165
|
+
const patterns = [
|
|
166
|
+
/tipoOperacion[^>]*>([A-Z]{4}[^<]*)<\/\w+:tipoOperacion/i,
|
|
167
|
+
/tipoOperacion[:\s]*([A-Z]{4})/i,
|
|
168
|
+
];
|
|
169
|
+
|
|
170
|
+
for (const pattern of patterns) {
|
|
171
|
+
const match = source.match(pattern);
|
|
172
|
+
if (match) {
|
|
173
|
+
return new FieldResult('tipoOperacion', true, match[1]);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return new FieldResult('tipoOperacion', false, null);
|
|
178
|
+
},
|
|
179
|
+
},
|
|
180
|
+
|
|
181
|
+
// Document Date
|
|
182
|
+
{
|
|
183
|
+
field: 'documentDate',
|
|
184
|
+
extract: (source) => {
|
|
185
|
+
const patterns = [
|
|
186
|
+
/fechaExpedicion[^>]*>(\d{4}-\d{2}-\d{2})<\/\w+:fechaExpedicion/i,
|
|
187
|
+
/fecha[:\s]*(\d{4}-\d{2}-\d{2})/i,
|
|
188
|
+
];
|
|
189
|
+
|
|
190
|
+
for (const pattern of patterns) {
|
|
191
|
+
const match = source.match(pattern);
|
|
192
|
+
if (match) {
|
|
193
|
+
return new FieldResult('documentDate', true, match[1]);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
return new FieldResult('documentDate', false, null);
|
|
198
|
+
},
|
|
199
|
+
},
|
|
200
|
+
],
|
|
201
|
+
};
|
package/src/file-detection.js
CHANGED
|
@@ -191,7 +191,8 @@ export class FileDetectionService {
|
|
|
191
191
|
*/
|
|
192
192
|
isSupportedFileType(filePath) {
|
|
193
193
|
const fileExtension = path.extname(filePath).toLowerCase().replace('.', '');
|
|
194
|
-
|
|
194
|
+
// Support PDF (main documents), XML (metadata/supporting docs), and TXT
|
|
195
|
+
const supportedExtensions = ['pdf', 'xml', 'txt'];
|
|
195
196
|
return supportedExtensions.includes(fileExtension);
|
|
196
197
|
}
|
|
197
198
|
|
package/src/index.js
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
import { Command } from 'commander';
|
|
3
3
|
|
|
4
4
|
import UploadCommand from './commands/UploadCommand.js';
|
|
5
|
+
import watchCommand from './commands/WatchCommand.js';
|
|
5
6
|
import appConfig from './config/config.js';
|
|
6
7
|
import ErrorHandler from './errors/ErrorHandler.js';
|
|
7
8
|
import logger from './services/LoggingService.js';
|
|
@@ -15,6 +16,7 @@ class ArelaUploaderCLI {
|
|
|
15
16
|
this.program = new Command();
|
|
16
17
|
this.errorHandler = new ErrorHandler(logger);
|
|
17
18
|
this.uploadCommand = new UploadCommand();
|
|
19
|
+
this.watchCommand = watchCommand;
|
|
18
20
|
|
|
19
21
|
this.#setupProgram();
|
|
20
22
|
this.#setupCommands();
|
|
@@ -229,6 +231,48 @@ class ArelaUploaderCLI {
|
|
|
229
231
|
}
|
|
230
232
|
});
|
|
231
233
|
|
|
234
|
+
// Watch command
|
|
235
|
+
this.program
|
|
236
|
+
.command('watch')
|
|
237
|
+
.description(
|
|
238
|
+
'Monitor directories for file changes and upload automatically',
|
|
239
|
+
)
|
|
240
|
+
.option(
|
|
241
|
+
'-d, --directories <paths>',
|
|
242
|
+
'Comma-separated directories to watch',
|
|
243
|
+
)
|
|
244
|
+
.option(
|
|
245
|
+
'-s, --strategy <strategy>',
|
|
246
|
+
'Upload strategy: individual|batch|full-structure',
|
|
247
|
+
'batch',
|
|
248
|
+
)
|
|
249
|
+
.option('--debounce <ms>', 'Debounce delay in milliseconds', '1000')
|
|
250
|
+
.option(
|
|
251
|
+
'-b, --batch-size <size>',
|
|
252
|
+
'Number of files to process in each batch',
|
|
253
|
+
'10',
|
|
254
|
+
)
|
|
255
|
+
.option(
|
|
256
|
+
'--poll <ms>',
|
|
257
|
+
'Use polling instead of native file system events (interval in ms)',
|
|
258
|
+
)
|
|
259
|
+
.option('--ignore <patterns>', 'Comma-separated patterns to ignore')
|
|
260
|
+
.option('--auto-detect', 'Enable automatic document type detection')
|
|
261
|
+
.option('--auto-organize', 'Enable automatic file organization')
|
|
262
|
+
.option(
|
|
263
|
+
'--auto-processing',
|
|
264
|
+
'Enable automatic 4-step pipeline (stats, detect, propagate, upload)',
|
|
265
|
+
)
|
|
266
|
+
.option('--dry-run', 'Simulate changes without uploading')
|
|
267
|
+
.option('--verbose', 'Enable verbose logging')
|
|
268
|
+
.action(async (options) => {
|
|
269
|
+
try {
|
|
270
|
+
await this.watchCommand.execute(options);
|
|
271
|
+
} catch (error) {
|
|
272
|
+
this.errorHandler.handleFatalError(error, { command: 'watch' });
|
|
273
|
+
}
|
|
274
|
+
});
|
|
275
|
+
|
|
232
276
|
// Version command (already handled by program.version())
|
|
233
277
|
|
|
234
278
|
// Help command
|