@arela/uploader 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arela/uploader",
3
- "version": "0.2.1",
3
+ "version": "0.2.2",
4
4
  "description": "CLI to upload files/directories to Arela",
5
5
  "bin": {
6
6
  "arela": "./src/index.js"
@@ -1,3 +1,6 @@
1
+ // Import all document type definitions
2
+ import { pedimentoSimplificadoDefinition } from './document-types/pedimento-simplificado.js';
3
+
1
4
  // Document type definitions and extraction utilities
2
5
  // Ported from TypeScript to JavaScript for Node.js
3
6
 
@@ -10,7 +13,14 @@ export class FieldResult {
10
13
  }
11
14
 
12
15
  export class DocumentTypeDefinition {
13
- constructor(type, extensions, match, extractors, extractNumPedimento, extractPedimentoYear) {
16
+ constructor(
17
+ type,
18
+ extensions,
19
+ match,
20
+ extractors,
21
+ extractNumPedimento,
22
+ extractPedimentoYear,
23
+ ) {
14
24
  this.type = type;
15
25
  this.extensions = extensions;
16
26
  this.match = match;
@@ -20,9 +30,6 @@ export class DocumentTypeDefinition {
20
30
  }
21
31
  }
22
32
 
23
- // Import all document type definitions
24
- import { pedimentoSimplificadoDefinition } from './document-types/pedimento-simplificado.js';
25
-
26
33
  // Registry of all document types
27
34
  const documentTypes = [
28
35
  pedimentoSimplificadoDefinition,
@@ -44,14 +51,17 @@ export function extractDocumentFields(source, fileExtension, filePath) {
44
51
  // Try to match against each document type
45
52
  for (const docType of documentTypes) {
46
53
  // Check if file extension matches
47
- if (fileExtension && !docType.extensions.includes(fileExtension.toLowerCase())) {
54
+ if (
55
+ fileExtension &&
56
+ !docType.extensions.includes(fileExtension.toLowerCase())
57
+ ) {
48
58
  continue;
49
59
  }
50
60
 
51
61
  // Test if content matches this document type
52
62
  if (docType.match(source)) {
53
63
  console.log(`✅ Matched document type: ${docType.type}`);
54
-
64
+
55
65
  // Extract all fields
56
66
  const fields = [];
57
67
  for (const extractor of docType.extractors) {
@@ -68,8 +78,12 @@ export function extractDocumentFields(source, fileExtension, filePath) {
68
78
  }
69
79
 
70
80
  // Extract pedimento number and year
71
- const pedimento = docType.extractNumPedimento ? docType.extractNumPedimento(source, fields) : null;
72
- const year = docType.extractPedimentoYear ? docType.extractPedimentoYear(source, fields) : null;
81
+ const pedimento = docType.extractNumPedimento
82
+ ? docType.extractNumPedimento(source, fields)
83
+ : null;
84
+ const year = docType.extractPedimentoYear
85
+ ? docType.extractPedimentoYear(source, fields)
86
+ : null;
73
87
 
74
88
  return [docType.type, fields, pedimento, year];
75
89
  }
@@ -33,7 +33,7 @@ export const pedimentoSimplificadoDefinition = {
33
33
  return new FieldResult(
34
34
  'numPedimento',
35
35
  !!match,
36
- match ? match[0].replace(/\s/g, '') : null
36
+ match ? match[0].replace(/\s/g, '') : null,
37
37
  );
38
38
  },
39
39
  },
@@ -50,7 +50,7 @@ export const pedimentoSimplificadoDefinition = {
50
50
  return new FieldResult(
51
51
  'tipoOperacion',
52
52
  !!match,
53
- match ? match[1] : null
53
+ match ? match[1] : null,
54
54
  );
55
55
  },
56
56
  },
@@ -67,7 +67,7 @@ export const pedimentoSimplificadoDefinition = {
67
67
  return new FieldResult(
68
68
  'clavePedimento',
69
69
  !!match,
70
- match ? match[1] : null
70
+ match ? match[1] : null,
71
71
  );
72
72
  },
73
73
  },
@@ -83,7 +83,7 @@ export const pedimentoSimplificadoDefinition = {
83
83
  return new FieldResult(
84
84
  'aduanaEntradaSalida',
85
85
  !!match,
86
- match ? match[1] : null
86
+ match ? match[1] : null,
87
87
  );
88
88
  },
89
89
  },
@@ -93,11 +93,7 @@ export const pedimentoSimplificadoDefinition = {
93
93
  field: 'rfc',
94
94
  extract: (source) => {
95
95
  const match = source.match(/\n\s*([A-Z0-9]{12,13})\s*\n/);
96
- return new FieldResult(
97
- 'rfc',
98
- !!match,
99
- match ? match[1] : null
100
- );
96
+ return new FieldResult('rfc', !!match, match ? match[1] : null);
101
97
  },
102
98
  },
103
99
 
@@ -112,9 +108,7 @@ export const pedimentoSimplificadoDefinition = {
112
108
  .filter((l) => l.length > 0);
113
109
 
114
110
  // 2) find the index of an RFC line (12–13 alnum chars)
115
- const rfcIndex = lines.findIndex((l) =>
116
- /^[A-Z0-9]{12,13}$/.test(l),
117
- );
111
+ const rfcIndex = lines.findIndex((l) => /^[A-Z0-9]{12,13}$/.test(l));
118
112
  let code = null;
119
113
 
120
114
  // 3) if next line exists and is exactly 8 alnum chars, that's the code
@@ -122,11 +116,7 @@ export const pedimentoSimplificadoDefinition = {
122
116
  code = lines[rfcIndex + 1];
123
117
  }
124
118
 
125
- return new FieldResult(
126
- 'codigoAceptacion',
127
- code !== null,
128
- code
129
- );
119
+ return new FieldResult('codigoAceptacion', code !== null, code);
130
120
  },
131
121
  },
132
122
 
@@ -175,11 +165,7 @@ export const pedimentoSimplificadoDefinition = {
175
165
  if (!match) {
176
166
  match = source.match(/PRESENTACION:\s*(\d{2}\/\d{2}\/\d{4})/);
177
167
  }
178
- return new FieldResult(
179
- 'paymentDate',
180
- !!match,
181
- match ? match[1] : null
182
- );
168
+ return new FieldResult('paymentDate', !!match, match ? match[1] : null);
183
169
  },
184
170
  },
185
171
 
@@ -224,11 +210,7 @@ export const pedimentoSimplificadoDefinition = {
224
210
  extract: (source) => {
225
211
  // Look for the peso bruto value with decimal format
226
212
  const match = source.match(/(\d+\.\d+)\d{3}/);
227
- return new FieldResult(
228
- 'pesoBruto',
229
- !!match,
230
- match ? match[1] : null
231
- );
213
+ return new FieldResult('pesoBruto', !!match, match ? match[1] : null);
232
214
  },
233
215
  },
234
216
 
@@ -268,7 +250,7 @@ export const pedimentoSimplificadoDefinition = {
268
250
  return new FieldResult(
269
251
  'numeroOperacionBancaria',
270
252
  !!match,
271
- match ? match[1] : null
253
+ match ? match[1] : null,
272
254
  );
273
255
  },
274
256
  },
@@ -281,7 +263,7 @@ export const pedimentoSimplificadoDefinition = {
281
263
  return new FieldResult(
282
264
  'numeroTransaccionSAT',
283
265
  !!match,
284
- match ? match[1] : null
266
+ match ? match[1] : null,
285
267
  );
286
268
  },
287
269
  },
@@ -1,6 +1,7 @@
1
1
  import fs from 'fs';
2
- import path from 'path';
3
2
  import { getTextExtractor } from 'office-text-extractor';
3
+ import path from 'path';
4
+
4
5
  import { extractDocumentFields } from './document-type-shared.js';
5
6
 
6
7
  const extractor = getTextExtractor();
@@ -10,15 +11,20 @@ const extractor = getTextExtractor();
10
11
  * Format: RFC/Year/Patente/Aduana/Pedimento/
11
12
  * Example: PED781129JT6/2023/3429/07/3019796/
12
13
  */
13
- function composeArelaPath(detectedType, fields, detectedPedimentoYear, filePath) {
14
+ function composeArelaPath(
15
+ detectedType,
16
+ fields,
17
+ detectedPedimentoYear,
18
+ filePath,
19
+ ) {
14
20
  if (detectedType !== 'pedimento_simplificado') {
15
21
  return null;
16
22
  }
17
23
 
18
- const rfc = fields?.find(f => f.name === 'rfc')?.value;
19
- const patente = fields?.find(f => f.name === 'patente')?.value;
20
- const aduana = fields?.find(f => f.name === 'aduanaEntradaSalida')?.value;
21
- const pedimento = fields?.find(f => f.name === 'numPedimento')?.value;
24
+ const rfc = fields?.find((f) => f.name === 'rfc')?.value;
25
+ const patente = fields?.find((f) => f.name === 'patente')?.value;
26
+ const aduana = fields?.find((f) => f.name === 'aduanaEntradaSalida')?.value;
27
+ const pedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
22
28
  const year = detectedPedimentoYear;
23
29
 
24
30
  // All components are required for a valid arela_path
@@ -28,17 +34,17 @@ function composeArelaPath(detectedType, fields, detectedPedimentoYear, filePath)
28
34
  year: !!year,
29
35
  patente: !!patente,
30
36
  aduana: !!aduana,
31
- pedimento: !!pedimento
37
+ pedimento: !!pedimento,
32
38
  });
33
39
  return null;
34
40
  }
35
41
 
36
42
  // Ensure aduana is padded to 2 digits if needed (07 instead of 7)
37
43
  const aduanaFormatted = aduana.toString().padStart(2, '0');
38
-
44
+
39
45
  // arela_path should be the folder structure only, without filename
40
46
  const arelaPath = `${rfc}/${year}/${patente}/${aduanaFormatted}/${pedimento}/`;
41
-
47
+
42
48
  console.log(`✅ Composed arela_path: ${arelaPath}`);
43
49
  return arelaPath;
44
50
  }
@@ -48,7 +54,6 @@ function composeArelaPath(detectedType, fields, detectedPedimentoYear, filePath)
48
54
  * Detects document types and extracts metadata from files
49
55
  */
50
56
  export class FileDetectionService {
51
-
52
57
  /**
53
58
  * Detect document type from a file
54
59
  * @param {string} filePath - Path to the file to analyze
@@ -56,13 +61,16 @@ export class FileDetectionService {
56
61
  */
57
62
  async detectFile(filePath) {
58
63
  try {
59
- const fileExtension = path.extname(filePath).toLowerCase().replace('.', '');
64
+ const fileExtension = path
65
+ .extname(filePath)
66
+ .toLowerCase()
67
+ .replace('.', '');
60
68
  const fileName = path.basename(filePath);
61
-
69
+
62
70
  console.log(`🔍 Analyzing file: ${fileName} (${fileExtension})`);
63
71
 
64
72
  let text = '';
65
-
73
+
66
74
  // Extract text based on file type
67
75
  switch (fileExtension) {
68
76
  case 'pdf':
@@ -83,7 +91,7 @@ export class FileDetectionService {
83
91
  detectedPedimentoYear: null,
84
92
  arelaPath: null,
85
93
  text: '',
86
- error: `Unsupported file type: ${fileExtension}`
94
+ error: `Unsupported file type: ${fileExtension}`,
87
95
  };
88
96
  }
89
97
 
@@ -96,16 +104,21 @@ export class FileDetectionService {
96
104
  detectedPedimentoYear: null,
97
105
  arelaPath: null,
98
106
  text: '',
99
- error: 'No text could be extracted from file'
107
+ error: 'No text could be extracted from file',
100
108
  };
101
109
  }
102
110
 
103
111
  // Extract document fields and detect type
104
- const [detectedType, fields, detectedPedimento, detectedPedimentoYear] =
112
+ const [detectedType, fields, detectedPedimento, detectedPedimentoYear] =
105
113
  extractDocumentFields(text, fileExtension, filePath);
106
114
 
107
115
  // Compose arela_path for pedimento_simplificado documents
108
- const arelaPath = composeArelaPath(detectedType, fields, detectedPedimentoYear, filePath);
116
+ const arelaPath = composeArelaPath(
117
+ detectedType,
118
+ fields,
119
+ detectedPedimentoYear,
120
+ filePath,
121
+ );
109
122
 
110
123
  return {
111
124
  detectedType,
@@ -114,9 +127,8 @@ export class FileDetectionService {
114
127
  detectedPedimentoYear,
115
128
  arelaPath,
116
129
  text,
117
- error: null
130
+ error: null,
118
131
  };
119
-
120
132
  } catch (error) {
121
133
  console.error(`❌ Error detecting file ${filePath}:`, error.message);
122
134
  return {
@@ -126,7 +138,7 @@ export class FileDetectionService {
126
138
  detectedPedimentoYear: null,
127
139
  arelaPath: null,
128
140
  text: '',
129
- error: error.message
141
+ error: error.message,
130
142
  };
131
143
  }
132
144
  }
@@ -139,13 +151,16 @@ export class FileDetectionService {
139
151
  async extractTextFromPDF(filePath) {
140
152
  try {
141
153
  const buffer = fs.readFileSync(filePath);
142
- const text = await extractor.extractText({
143
- input: buffer,
144
- type: 'file'
154
+ const text = await extractor.extractText({
155
+ input: buffer,
156
+ type: 'file',
145
157
  });
146
158
  return text;
147
159
  } catch (error) {
148
- console.error(`Error extracting text from PDF ${filePath}:`, error.message);
160
+ console.error(
161
+ `Error extracting text from PDF ${filePath}:`,
162
+ error.message,
163
+ );
149
164
  throw new Error(`Failed to extract text from PDF: ${error.message}`);
150
165
  }
151
166
  }
@@ -157,15 +172,15 @@ export class FileDetectionService {
157
172
  */
158
173
  async detectFiles(filePaths) {
159
174
  const results = [];
160
-
175
+
161
176
  for (const filePath of filePaths) {
162
177
  const result = await this.detectFile(filePath);
163
178
  results.push({
164
179
  filePath,
165
- ...result
180
+ ...result,
166
181
  });
167
182
  }
168
-
183
+
169
184
  return results;
170
185
  }
171
186
 
@@ -186,7 +201,7 @@ export class FileDetectionService {
186
201
  * @returns {Array<string>} - Filtered array of supported file paths
187
202
  */
188
203
  filterSupportedFiles(filePaths) {
189
- return filePaths.filter(filePath => this.isSupportedFileType(filePath));
204
+ return filePaths.filter((filePath) => this.isSupportedFileType(filePath));
190
205
  }
191
206
  }
192
207