@arela/uploader 1.0.20 → 1.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arela/uploader",
3
- "version": "1.0.20",
3
+ "version": "1.0.21",
4
4
  "description": "CLI to upload files/directories to Arela",
5
5
  "bin": {
6
6
  "arela": "./src/index.js"
@@ -39,6 +39,7 @@
39
39
  "form-data": "4.0.4",
40
40
  "formdata-node": "^6.0.3",
41
41
  "globby": "14.1.0",
42
+ "googleapis": "^171.4.0",
42
43
  "ioredis": "^5.10.0",
43
44
  "mime-types": "3.0.1",
44
45
  "node-fetch": "3.3.2",
@@ -0,0 +1,475 @@
1
+ import cliProgress from 'cli-progress';
2
+ import fs from 'fs';
3
+ import fsp from 'fs/promises';
4
+ import pLimit from 'p-limit';
5
+ import path from 'path';
6
+
7
+ import GoogleDriveService from '../services/GoogleDriveService.js';
8
+ import logger from '../services/LoggingService.js';
9
+
10
+ import appConfig from '../config/config.js';
11
+ import ErrorHandler from '../errors/ErrorHandler.js';
12
+ import { FileSanitizer } from '../utils/FileSanitizer.js';
13
+
14
+ const STATE_FILENAME = '.gdrive-sync-state.json';
15
+ const STATE_VERSION = 1;
16
+
17
+ /**
18
+ * GDrive Sync Command
19
+ *
20
+ * Mirrors a Google Drive folder tree to a local directory so the existing
21
+ * scan → identify → propagate → push pipeline can run unchanged.
22
+ *
23
+ * Idempotent & incremental: maintains a `.gdrive-sync-state.json` at the
24
+ * mirror root with per-file md5/modifiedTime so re-runs only download
25
+ * changed/new files.
26
+ */
27
+ export class GDriveSyncCommand {
28
+ constructor() {
29
+ this.errorHandler = new ErrorHandler(logger);
30
+ this.driveService = null;
31
+ this.sanitizer = new FileSanitizer();
32
+ this.onProgress = null;
33
+ }
34
+
35
+ /**
36
+ * Execute the gdrive-sync command
37
+ * @param {Object} [options]
38
+ * @param {string} [options.rootFolder] - override GDRIVE_ROOT_FOLDER_ID
39
+ * @param {string} [options.dest] - override local mirror path
40
+ * @param {boolean} [options.full] - ignore state file and re-verify everything
41
+ * @param {boolean} [options.dryRun] - list/plan only, no downloads or writes
42
+ * @param {Function} [options.onProgress]
43
+ */
44
+ async execute(options = {}) {
45
+ const startTime = Date.now();
46
+ this.onProgress = options.onProgress || null;
47
+
48
+ try {
49
+ // Allow CLI overrides before validation
50
+ if (options.rootFolder) {
51
+ process.env.GDRIVE_ROOT_FOLDER_ID = options.rootFolder;
52
+ }
53
+ if (options.dest) {
54
+ process.env.GDRIVE_LOCAL_MIRROR_PATH = options.dest;
55
+ }
56
+ // Reload config after env mutation
57
+ const cfg = appConfig.getGDriveConfig();
58
+ if (options.rootFolder) cfg.rootFolderId = options.rootFolder;
59
+ if (options.dest) cfg.localMirrorPath = options.dest;
60
+
61
+ appConfig.validateGDriveConfig();
62
+
63
+ const dryRun = !!options.dryRun;
64
+ const full = !!options.full;
65
+
66
+ this.driveService = new GoogleDriveService();
67
+
68
+ this.#say('☁️ Starting arela gdrive-sync command');
69
+ this.#say(`📁 Root folder ID: ${cfg.rootFolderId}`);
70
+ this.#say(`💾 Local mirror: ${cfg.localMirrorPath}`);
71
+ this.#say(`⚙️ Concurrency: ${cfg.concurrency}`);
72
+ this.#say(`📄 Skip native docs: ${cfg.skipNativeDocs}`);
73
+ if (dryRun) this.#say('🧪 DRY-RUN: no files will be written');
74
+ if (full) this.#say('♻️ FULL: ignoring state, re-verifying all files');
75
+
76
+ // Ensure mirror dir exists
77
+ if (!dryRun) {
78
+ await fsp.mkdir(cfg.localMirrorPath, { recursive: true });
79
+ }
80
+
81
+ // Load existing state
82
+ const statePath = path.join(cfg.localMirrorPath, STATE_FILENAME);
83
+ const state = full
84
+ ? this.#emptyState()
85
+ : await this.#loadState(statePath);
86
+
87
+ // Verify root folder exists & is accessible
88
+ this.#say('\n🔍 Verifying root folder access...');
89
+ const rootMeta = await this.driveService.getFile(cfg.rootFolderId);
90
+ if (!GoogleDriveService.isFolder(rootMeta)) {
91
+ throw new Error(
92
+ `GDRIVE_ROOT_FOLDER_ID does not point to a folder (mimeType=${rootMeta.mimeType})`,
93
+ );
94
+ }
95
+ logger.success(` ✓ Root: "${rootMeta.name}"`);
96
+
97
+ // Walk Drive tree → produce file plan
98
+ this.#say('\n🌲 Walking Drive tree...');
99
+ this.#reportProgress(0, 'Walking Drive tree');
100
+ const plan = await this.#walkTree(cfg.rootFolderId, '', cfg);
101
+ this.#say(
102
+ `📋 Found ${plan.folders.length} folder(s) and ${plan.files.length} file(s)`,
103
+ );
104
+
105
+ // Stats
106
+ const stats = {
107
+ foldersCreated: 0,
108
+ filesAdded: 0,
109
+ filesUpdated: 0,
110
+ filesSkipped: 0,
111
+ filesFailed: 0,
112
+ nativeDocsSkipped: 0,
113
+ oversizedSkipped: 0,
114
+ bytesDownloaded: 0,
115
+ };
116
+
117
+ // Create folders first (cheap, sequential)
118
+ if (!dryRun) {
119
+ for (const folderRel of plan.folders) {
120
+ const abs = path.join(cfg.localMirrorPath, folderRel);
121
+ try {
122
+ await fsp.mkdir(abs, { recursive: true });
123
+ stats.foldersCreated += 1;
124
+ } catch (err) {
125
+ logger.warn(`⚠️ mkdir failed for ${abs}: ${err.message}`);
126
+ }
127
+ }
128
+ }
129
+
130
+ // Download files concurrently
131
+ this.#say('\n⬇️ Downloading files...');
132
+ const limit = pLimit(cfg.concurrency);
133
+ const progressBar = this.#createProgressBar(plan.files.length, dryRun);
134
+ let processed = 0;
135
+
136
+ const tasks = plan.files.map((file) =>
137
+ limit(async () => {
138
+ try {
139
+ const result = await this.#processFile(file, cfg, state, dryRun);
140
+ if (result === 'added') stats.filesAdded += 1;
141
+ else if (result === 'updated') stats.filesUpdated += 1;
142
+ else if (result === 'skipped-unchanged') stats.filesSkipped += 1;
143
+ else if (result === 'skipped-native') stats.nativeDocsSkipped += 1;
144
+ else if (result === 'skipped-oversize') stats.oversizedSkipped += 1;
145
+ if (
146
+ (result === 'added' || result === 'updated') &&
147
+ file.size != null
148
+ ) {
149
+ stats.bytesDownloaded += Number(file.size) || 0;
150
+ }
151
+ } catch (err) {
152
+ stats.filesFailed += 1;
153
+ logger.error(
154
+ `❌ Failed: ${file.relPath} (${file.id}) — ${err.message}`,
155
+ );
156
+ } finally {
157
+ processed += 1;
158
+ progressBar.update(processed);
159
+ if (plan.files.length > 0) {
160
+ this.#reportProgress(
161
+ Math.round((processed / plan.files.length) * 100),
162
+ `Synced ${processed}/${plan.files.length}`,
163
+ );
164
+ }
165
+ }
166
+ }),
167
+ );
168
+
169
+ await Promise.all(tasks);
170
+ progressBar.stop();
171
+
172
+ // Persist state
173
+ if (!dryRun) {
174
+ await this.#saveState(statePath, state);
175
+ }
176
+
177
+ const duration = ((Date.now() - startTime) / 1000).toFixed(2);
178
+ this.#reportProgress(100, `Sync completed in ${duration}s`);
179
+
180
+ logger.success('\n✅ gdrive-sync completed successfully!');
181
+ this.#say('\n📊 Sync Statistics:');
182
+ this.#say(` Folders created: ${stats.foldersCreated}`);
183
+ this.#say(` Files added: ${stats.filesAdded}`);
184
+ this.#say(` Files updated: ${stats.filesUpdated}`);
185
+ this.#say(` Files unchanged: ${stats.filesSkipped}`);
186
+ this.#say(` Native docs skipped: ${stats.nativeDocsSkipped}`);
187
+ this.#say(` Oversized skipped: ${stats.oversizedSkipped}`);
188
+ this.#say(` Files failed: ${stats.filesFailed}`);
189
+ this.#say(
190
+ ` Bytes downloaded: ${this.#formatBytes(stats.bytesDownloaded)}`,
191
+ );
192
+ this.#say(` Duration: ${duration}s`);
193
+ this.#say(
194
+ `\n💡 Next: run "arela scan" with UPLOAD_BASE_PATH=${cfg.localMirrorPath}`,
195
+ );
196
+
197
+ return { success: true, stats };
198
+ } catch (error) {
199
+ this.errorHandler.handleError(error, 'gdrive-sync');
200
+ return { success: false, error: error.message };
201
+ }
202
+ }
203
+
204
+ /**
205
+ * Walk Drive tree starting at folderId.
206
+ * Returns flat lists of folders (relative paths) and files (with metadata + relPath).
207
+ * @private
208
+ */
209
+ async #walkTree(folderId, relPath, cfg) {
210
+ const folders = [];
211
+ const files = [];
212
+
213
+ // BFS using a queue
214
+ const queue = [{ id: folderId, relPath }];
215
+ // Track sanitized names per folder to dedupe collisions
216
+ const folderNameMaps = new Map();
217
+
218
+ while (queue.length > 0) {
219
+ const { id, relPath: parentRel } = queue.shift();
220
+ const usedNames = folderNameMaps.get(parentRel) || new Set();
221
+
222
+ for await (const child of this.driveService.listChildren(id)) {
223
+ const safeName = this.#uniqueSanitizedName(
224
+ child.name,
225
+ child.id,
226
+ usedNames,
227
+ );
228
+ const childRel = parentRel ? `${parentRel}/${safeName}` : safeName;
229
+
230
+ if (GoogleDriveService.isFolder(child)) {
231
+ folders.push(childRel);
232
+ folderNameMaps.set(childRel, new Set());
233
+ queue.push({ id: child.id, relPath: childRel });
234
+ continue;
235
+ }
236
+
237
+ // Resolve shortcuts to their targets if enabled
238
+ let effective = child;
239
+ if (GoogleDriveService.isShortcut(child)) {
240
+ if (!cfg.followShortcuts || !child.shortcutDetails?.targetId) {
241
+ logger.debug(`⏭️ Skipping shortcut: ${childRel}`);
242
+ continue;
243
+ }
244
+ try {
245
+ effective = await this.driveService.getFile(
246
+ child.shortcutDetails.targetId,
247
+ );
248
+ } catch (err) {
249
+ logger.warn(
250
+ `⚠️ Could not resolve shortcut "${childRel}": ${err.message}`,
251
+ );
252
+ continue;
253
+ }
254
+ if (GoogleDriveService.isFolder(effective)) {
255
+ folders.push(childRel);
256
+ folderNameMaps.set(childRel, new Set());
257
+ queue.push({ id: effective.id, relPath: childRel });
258
+ continue;
259
+ }
260
+ }
261
+
262
+ files.push({
263
+ id: effective.id,
264
+ name: effective.name,
265
+ mimeType: effective.mimeType,
266
+ modifiedTime: effective.modifiedTime,
267
+ size: effective.size != null ? Number(effective.size) : null,
268
+ md5Checksum: effective.md5Checksum || null,
269
+ relPath: childRel,
270
+ isNativeDoc: GoogleDriveService.isNativeGoogleDoc(effective),
271
+ });
272
+ }
273
+
274
+ folderNameMaps.set(parentRel, usedNames);
275
+ }
276
+
277
+ return { folders, files };
278
+ }
279
+
280
+ /**
281
+ * Process a single file: skip / download / update.
282
+ * @private
283
+ */
284
+ async #processFile(file, cfg, state, dryRun) {
285
+ // Native Google Docs (Docs/Sheets/Slides/...): skip by default
286
+ if (file.isNativeDoc && cfg.skipNativeDocs) {
287
+ logger.debug(`⏭️ Native Google Doc skipped: ${file.relPath}`);
288
+ return 'skipped-native';
289
+ }
290
+
291
+ // Oversize guard
292
+ if (
293
+ cfg.maxFileSizeBytes &&
294
+ file.size != null &&
295
+ file.size > cfg.maxFileSizeBytes
296
+ ) {
297
+ logger.warn(
298
+ `⚠️ File too large (${this.#formatBytes(file.size)}): ${file.relPath}`,
299
+ );
300
+ return 'skipped-oversize';
301
+ }
302
+
303
+ const absPath = path.join(cfg.localMirrorPath, file.relPath);
304
+ const prev = state.files[file.id];
305
+
306
+ // Decide if we can skip (unchanged)
307
+ const localExists = fs.existsSync(absPath);
308
+ const unchanged =
309
+ localExists &&
310
+ prev &&
311
+ prev.relPath === file.relPath &&
312
+ ((file.md5Checksum && prev.md5Checksum === file.md5Checksum) ||
313
+ (!file.md5Checksum && prev.modifiedTime === file.modifiedTime));
314
+
315
+ if (unchanged) {
316
+ return 'skipped-unchanged';
317
+ }
318
+
319
+ if (dryRun) {
320
+ logger.info(
321
+ ` [dry-run] would ${prev ? 'update' : 'add'}: ${file.relPath}`,
322
+ );
323
+ return prev ? 'updated' : 'added';
324
+ }
325
+
326
+ // Ensure parent dir
327
+ await fsp.mkdir(path.dirname(absPath), { recursive: true });
328
+
329
+ // Download to .part then atomic rename
330
+ const partPath = `${absPath}.part`;
331
+ try {
332
+ await fsp.rm(partPath, { force: true });
333
+ } catch {
334
+ /* ignore */
335
+ }
336
+
337
+ const out = fs.createWriteStream(partPath);
338
+ await this.driveService.downloadFile(file.id, out);
339
+ await fsp.rename(partPath, absPath);
340
+
341
+ // Set mtime to Drive's modifiedTime so `arela scan` captures changes correctly
342
+ if (file.modifiedTime) {
343
+ const mtime = new Date(file.modifiedTime);
344
+ try {
345
+ await fsp.utimes(absPath, mtime, mtime);
346
+ } catch (err) {
347
+ logger.debug(`⚠️ utimes failed for ${absPath}: ${err.message}`);
348
+ }
349
+ }
350
+
351
+ // Update state
352
+ state.files[file.id] = {
353
+ relPath: file.relPath,
354
+ md5Checksum: file.md5Checksum,
355
+ modifiedTime: file.modifiedTime,
356
+ size: file.size,
357
+ syncedAt: new Date().toISOString(),
358
+ };
359
+
360
+ return prev ? 'updated' : 'added';
361
+ }
362
+
363
+ /**
364
+ * Sanitize file/folder name; on duplicate, append " (gdrive-<id>)".
365
+ * @private
366
+ */
367
+ #uniqueSanitizedName(originalName, fileId, usedNames) {
368
+ let safe = this.sanitizer.sanitizeFileName(originalName);
369
+ if (!safe) safe = `file-${fileId}`;
370
+
371
+ if (usedNames.has(safe)) {
372
+ const ext = path.extname(safe);
373
+ const base = path.basename(safe, ext);
374
+ safe = `${base} (gdrive-${fileId})${ext}`;
375
+ }
376
+ usedNames.add(safe);
377
+ return safe;
378
+ }
379
+
380
+ /**
381
+ * @private
382
+ */
383
+ async #loadState(statePath) {
384
+ try {
385
+ const raw = await fsp.readFile(statePath, 'utf-8');
386
+ const parsed = JSON.parse(raw);
387
+ if (parsed.version !== STATE_VERSION) {
388
+ logger.warn(
389
+ `⚠️ State file version mismatch (${parsed.version} != ${STATE_VERSION}); starting fresh`,
390
+ );
391
+ return this.#emptyState();
392
+ }
393
+ return parsed;
394
+ } catch (err) {
395
+ if (err.code !== 'ENOENT') {
396
+ logger.warn(`⚠️ Could not read state file: ${err.message}`);
397
+ }
398
+ return this.#emptyState();
399
+ }
400
+ }
401
+
402
+ /**
403
+ * @private
404
+ */
405
+ async #saveState(statePath, state) {
406
+ state.savedAt = new Date().toISOString();
407
+ const tmp = `${statePath}.tmp`;
408
+ await fsp.writeFile(tmp, JSON.stringify(state, null, 2), 'utf-8');
409
+ await fsp.rename(tmp, statePath);
410
+ }
411
+
412
+ /**
413
+ * @private
414
+ */
415
+ #emptyState() {
416
+ return { version: STATE_VERSION, savedAt: null, files: {} };
417
+ }
418
+
419
+ /**
420
+ * @private
421
+ */
422
+ #createProgressBar(total, dryRun) {
423
+ const label = dryRun ? '🧪 Planning' : '⬇️ Downloading';
424
+ const bar = new cliProgress.SingleBar(
425
+ {
426
+ format: `${label} |{bar}| {percentage}% | {value}/{total} files`,
427
+ barCompleteChar: '\u2588',
428
+ barIncompleteChar: '\u2591',
429
+ hideCursor: true,
430
+ },
431
+ cliProgress.Presets.shades_classic,
432
+ );
433
+ bar.start(Math.max(total, 1), 0);
434
+ return bar;
435
+ }
436
+
437
+ /**
438
+ * @private
439
+ */
440
+ #reportProgress(percent, message) {
441
+ if (typeof this.onProgress === 'function') {
442
+ try {
443
+ this.onProgress(percent, message);
444
+ } catch {
445
+ /* ignore listener errors */
446
+ }
447
+ }
448
+ }
449
+
450
+ /**
451
+ * Print to stdout AND log to file (mirrors ScanCommand's user-facing output style).
452
+ * @private
453
+ */
454
+ #say(message) {
455
+ console.log(message);
456
+ logger.info(message);
457
+ }
458
+
459
+ /**
460
+ * @private
461
+ */
462
+ #formatBytes(bytes) {
463
+ if (!bytes) return '0 B';
464
+ const units = ['B', 'KB', 'MB', 'GB', 'TB'];
465
+ let i = 0;
466
+ let n = bytes;
467
+ while (n >= 1024 && i < units.length - 1) {
468
+ n /= 1024;
469
+ i += 1;
470
+ }
471
+ return `${n.toFixed(2)} ${units[i]}`;
472
+ }
473
+ }
474
+
475
+ export default new GDriveSyncCommand();
@@ -9,6 +9,24 @@ import ErrorHandler from '../errors/ErrorHandler.js';
9
9
  import { ConfigurationError } from '../errors/ErrorTypes.js';
10
10
  import FileDetectionService from '../file-detection.js';
11
11
 
12
+ /**
13
+ * Paid pedimento detected_type values. `pedimento_completo_xml` is included
14
+ * even though the XML matcher is currently disabled in the registry so that
15
+ * re-enabling it requires no changes here.
16
+ */
17
+ const DETECTED_PEDIMENTO_TYPES = new Set([
18
+ 'pedimento_simplificado',
19
+ 'pedimento_completo',
20
+ 'pedimento_completo_xml',
21
+ ]);
22
+
23
+ /** Unpaid pedimento detected_type values (proforma variants). */
24
+ const PROFORMA_TYPES = new Set([
25
+ 'proforma',
26
+ 'proforma_completo',
27
+ 'proforma_completo_xml',
28
+ ]);
29
+
12
30
  /**
13
31
  * Identify Command Handler
14
32
  * Optimized replacement for "detect --detect-pdfs"
@@ -247,11 +265,13 @@ export class IdentifyCommand {
247
265
 
248
266
  // Update statistics
249
267
  processedCount += files.length;
250
- detectedCount += detectionResults.filter(
251
- (r) => r.detectedType === 'pedimento_simplificado',
268
+ // "Detected" counts paid pedimentos of any flavour (simplificado,
269
+ // completo, completo_xml when enabled).
270
+ detectedCount += detectionResults.filter((r) =>
271
+ DETECTED_PEDIMENTO_TYPES.has(r.detectedType),
252
272
  ).length;
253
- proformaCount += detectionResults.filter(
254
- (r) => r.detectedType === 'proforma',
273
+ proformaCount += detectionResults.filter((r) =>
274
+ PROFORMA_TYPES.has(r.detectedType),
255
275
  ).length;
256
276
  errorCount += detectionResults.filter((r) => r.detectionError).length;
257
277
 
@@ -324,8 +344,8 @@ export class IdentifyCommand {
324
344
  // Detect using existing FileDetectionService
325
345
  const result = await this.detectionService.detectFile(absolutePath);
326
346
 
327
- // If detection succeeded and found a pedimento_simplificado (paid)
328
- if (result.detectedType === 'pedimento_simplificado') {
347
+ // If detection succeeded and found a paid pedimento (any variant)
348
+ if (DETECTED_PEDIMENTO_TYPES.has(result.detectedType)) {
329
349
  return {
330
350
  id: file.id,
331
351
  detectedType: result.detectedType,
@@ -338,8 +358,8 @@ export class IdentifyCommand {
338
358
  };
339
359
  }
340
360
 
341
- // If detection succeeded and found a proforma (unpaid pedimento)
342
- if (result.detectedType === 'proforma') {
361
+ // If detection succeeded and found a proforma (any variant)
362
+ if (PROFORMA_TYPES.has(result.detectedType)) {
343
363
  return {
344
364
  id: file.id,
345
365
  detectedType: result.detectedType,
@@ -365,7 +385,7 @@ export class IdentifyCommand {
365
385
  detectionError = `DETECTION_ERROR: ${result.error}`;
366
386
  } else if (isDefinitelyNotPedimento) {
367
387
  detectionError =
368
- 'NOT_PEDIMENTO: File does not match pedimento-simplificado pattern. Missing key markers: "FORMA SIMPLIFICADA DE PEDIMENTO".';
388
+ 'NOT_PEDIMENTO: File does not match any pedimento pattern. Missing key markers (e.g. "FORMA SIMPLIFICADA DE PEDIMENTO" or "NUM. PEDIMENTO:" + copy markers).';
369
389
  } else {
370
390
  // Partial match - might be a pedimento with missing fields
371
391
  const missingFields = this.#getMissingFields(result);
@@ -434,18 +454,23 @@ export class IdentifyCommand {
434
454
  return false;
435
455
  }
436
456
 
437
- // If it was detected as a proforma, it's related to a pedimento structure
438
- if (result.detectedType === 'proforma') {
457
+ // If it was detected as a proforma (any variant), it's related to a
458
+ // pedimento structure not "definitely not".
459
+ if (PROFORMA_TYPES.has(result.detectedType)) {
439
460
  return false;
440
461
  }
441
462
 
442
- // Check if the text contains the required pedimento marker
443
- // This must match the criteria in pedimento-simplificado.js match function
463
+ // Check if the text contains any required pedimento marker. This must
464
+ // stay aligned with the `match()` predicates in pedimento-simplificado.js
465
+ // and pedimento-completo.js.
444
466
  const text = result.text || '';
445
- const hasRequiredMarker = /FORMA SIMPLIFICADA DE PEDIMENTO/i.test(text);
467
+ const hasSimplificadoMarker = /FORMA SIMPLIFICADA DE PEDIMENTO/i.test(text);
468
+ const hasCompletoMarkers =
469
+ /NUM\.?\s*PEDIMENTO:/i.test(text) &&
470
+ /CVE\.?\s*PEDIMENTO:/i.test(text) &&
471
+ /T\.?\s*OPER:/i.test(text);
446
472
 
447
- // If the required marker is not found, it's definitely not a pedimento
448
- return !hasRequiredMarker;
473
+ return !hasSimplificadoMarker && !hasCompletoMarkers;
449
474
  }
450
475
 
451
476
  /**
@@ -592,10 +592,13 @@ export class ScanCommand {
592
592
  const relativePath = PathNormalizer.getRelativePath(filePath, basePath);
593
593
  const absolutePath = PathNormalizer.normalizeSeparators(filePath);
594
594
 
595
- // Determine if this is potentially a simplificado document
596
- // Must be a PDF and filename must contain 'simp' (case-insensitive)
595
+ // Determine if this file is potentially a pedimento (simplificado, completo, or CoveFact).
596
+ // PDFs whose filename contains 'simp', 'pedim' or 'covefact' (case-insensitive)
597
+ // are flagged so the identify stage prioritizes them. The column name
598
+ // `likely_simplificado` is preserved for backwards compatibility; semantics
599
+ // are broader (any likely pedimento PDF).
597
600
  const likelySimplificado =
598
- fileExtension === 'pdf' && fileName.toLowerCase().includes('simp');
601
+ fileExtension === 'pdf' && /(simp|pedim|covefact)/i.test(fileName);
599
602
 
600
603
  return {
601
604
  fileName,