@arela/uploader 1.0.20 → 1.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -1
- package/src/commands/GDriveSyncCommand.js +475 -0
- package/src/commands/IdentifyCommand.js +41 -16
- package/src/commands/ScanCommand.js +6 -3
- package/src/config/config.js +88 -2
- package/src/document-type-shared.js +13 -3
- package/src/document-types/_pedimento-shared-extractors.js +226 -0
- package/src/document-types/pedimento-completo-xml.js +322 -0
- package/src/document-types/pedimento-completo.js +68 -0
- package/src/document-types/pedimento-simplificado.js +7 -286
- package/src/file-detection.js +36 -2
- package/src/index.js +27 -0
- package/src/services/DatabaseService.js +3 -1
- package/src/services/GoogleDriveService.js +217 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@arela/uploader",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.21",
|
|
4
4
|
"description": "CLI to upload files/directories to Arela",
|
|
5
5
|
"bin": {
|
|
6
6
|
"arela": "./src/index.js"
|
|
@@ -39,6 +39,7 @@
|
|
|
39
39
|
"form-data": "4.0.4",
|
|
40
40
|
"formdata-node": "^6.0.3",
|
|
41
41
|
"globby": "14.1.0",
|
|
42
|
+
"googleapis": "^171.4.0",
|
|
42
43
|
"ioredis": "^5.10.0",
|
|
43
44
|
"mime-types": "3.0.1",
|
|
44
45
|
"node-fetch": "3.3.2",
|
|
@@ -0,0 +1,475 @@
|
|
|
1
|
+
import cliProgress from 'cli-progress';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import fsp from 'fs/promises';
|
|
4
|
+
import pLimit from 'p-limit';
|
|
5
|
+
import path from 'path';
|
|
6
|
+
|
|
7
|
+
import GoogleDriveService from '../services/GoogleDriveService.js';
|
|
8
|
+
import logger from '../services/LoggingService.js';
|
|
9
|
+
|
|
10
|
+
import appConfig from '../config/config.js';
|
|
11
|
+
import ErrorHandler from '../errors/ErrorHandler.js';
|
|
12
|
+
import { FileSanitizer } from '../utils/FileSanitizer.js';
|
|
13
|
+
|
|
14
|
+
const STATE_FILENAME = '.gdrive-sync-state.json';
|
|
15
|
+
const STATE_VERSION = 1;
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* GDrive Sync Command
|
|
19
|
+
*
|
|
20
|
+
* Mirrors a Google Drive folder tree to a local directory so the existing
|
|
21
|
+
* scan → identify → propagate → push pipeline can run unchanged.
|
|
22
|
+
*
|
|
23
|
+
* Idempotent & incremental: maintains a `.gdrive-sync-state.json` at the
|
|
24
|
+
* mirror root with per-file md5/modifiedTime so re-runs only download
|
|
25
|
+
* changed/new files.
|
|
26
|
+
*/
|
|
27
|
+
export class GDriveSyncCommand {
|
|
28
|
+
constructor() {
|
|
29
|
+
this.errorHandler = new ErrorHandler(logger);
|
|
30
|
+
this.driveService = null;
|
|
31
|
+
this.sanitizer = new FileSanitizer();
|
|
32
|
+
this.onProgress = null;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Execute the gdrive-sync command
|
|
37
|
+
* @param {Object} [options]
|
|
38
|
+
* @param {string} [options.rootFolder] - override GDRIVE_ROOT_FOLDER_ID
|
|
39
|
+
* @param {string} [options.dest] - override local mirror path
|
|
40
|
+
* @param {boolean} [options.full] - ignore state file and re-verify everything
|
|
41
|
+
* @param {boolean} [options.dryRun] - list/plan only, no downloads or writes
|
|
42
|
+
* @param {Function} [options.onProgress]
|
|
43
|
+
*/
|
|
44
|
+
async execute(options = {}) {
|
|
45
|
+
const startTime = Date.now();
|
|
46
|
+
this.onProgress = options.onProgress || null;
|
|
47
|
+
|
|
48
|
+
try {
|
|
49
|
+
// Allow CLI overrides before validation
|
|
50
|
+
if (options.rootFolder) {
|
|
51
|
+
process.env.GDRIVE_ROOT_FOLDER_ID = options.rootFolder;
|
|
52
|
+
}
|
|
53
|
+
if (options.dest) {
|
|
54
|
+
process.env.GDRIVE_LOCAL_MIRROR_PATH = options.dest;
|
|
55
|
+
}
|
|
56
|
+
// Reload config after env mutation
|
|
57
|
+
const cfg = appConfig.getGDriveConfig();
|
|
58
|
+
if (options.rootFolder) cfg.rootFolderId = options.rootFolder;
|
|
59
|
+
if (options.dest) cfg.localMirrorPath = options.dest;
|
|
60
|
+
|
|
61
|
+
appConfig.validateGDriveConfig();
|
|
62
|
+
|
|
63
|
+
const dryRun = !!options.dryRun;
|
|
64
|
+
const full = !!options.full;
|
|
65
|
+
|
|
66
|
+
this.driveService = new GoogleDriveService();
|
|
67
|
+
|
|
68
|
+
this.#say('☁️ Starting arela gdrive-sync command');
|
|
69
|
+
this.#say(`📁 Root folder ID: ${cfg.rootFolderId}`);
|
|
70
|
+
this.#say(`💾 Local mirror: ${cfg.localMirrorPath}`);
|
|
71
|
+
this.#say(`⚙️ Concurrency: ${cfg.concurrency}`);
|
|
72
|
+
this.#say(`📄 Skip native docs: ${cfg.skipNativeDocs}`);
|
|
73
|
+
if (dryRun) this.#say('🧪 DRY-RUN: no files will be written');
|
|
74
|
+
if (full) this.#say('♻️ FULL: ignoring state, re-verifying all files');
|
|
75
|
+
|
|
76
|
+
// Ensure mirror dir exists
|
|
77
|
+
if (!dryRun) {
|
|
78
|
+
await fsp.mkdir(cfg.localMirrorPath, { recursive: true });
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Load existing state
|
|
82
|
+
const statePath = path.join(cfg.localMirrorPath, STATE_FILENAME);
|
|
83
|
+
const state = full
|
|
84
|
+
? this.#emptyState()
|
|
85
|
+
: await this.#loadState(statePath);
|
|
86
|
+
|
|
87
|
+
// Verify root folder exists & is accessible
|
|
88
|
+
this.#say('\n🔍 Verifying root folder access...');
|
|
89
|
+
const rootMeta = await this.driveService.getFile(cfg.rootFolderId);
|
|
90
|
+
if (!GoogleDriveService.isFolder(rootMeta)) {
|
|
91
|
+
throw new Error(
|
|
92
|
+
`GDRIVE_ROOT_FOLDER_ID does not point to a folder (mimeType=${rootMeta.mimeType})`,
|
|
93
|
+
);
|
|
94
|
+
}
|
|
95
|
+
logger.success(` ✓ Root: "${rootMeta.name}"`);
|
|
96
|
+
|
|
97
|
+
// Walk Drive tree → produce file plan
|
|
98
|
+
this.#say('\n🌲 Walking Drive tree...');
|
|
99
|
+
this.#reportProgress(0, 'Walking Drive tree');
|
|
100
|
+
const plan = await this.#walkTree(cfg.rootFolderId, '', cfg);
|
|
101
|
+
this.#say(
|
|
102
|
+
`📋 Found ${plan.folders.length} folder(s) and ${plan.files.length} file(s)`,
|
|
103
|
+
);
|
|
104
|
+
|
|
105
|
+
// Stats
|
|
106
|
+
const stats = {
|
|
107
|
+
foldersCreated: 0,
|
|
108
|
+
filesAdded: 0,
|
|
109
|
+
filesUpdated: 0,
|
|
110
|
+
filesSkipped: 0,
|
|
111
|
+
filesFailed: 0,
|
|
112
|
+
nativeDocsSkipped: 0,
|
|
113
|
+
oversizedSkipped: 0,
|
|
114
|
+
bytesDownloaded: 0,
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
// Create folders first (cheap, sequential)
|
|
118
|
+
if (!dryRun) {
|
|
119
|
+
for (const folderRel of plan.folders) {
|
|
120
|
+
const abs = path.join(cfg.localMirrorPath, folderRel);
|
|
121
|
+
try {
|
|
122
|
+
await fsp.mkdir(abs, { recursive: true });
|
|
123
|
+
stats.foldersCreated += 1;
|
|
124
|
+
} catch (err) {
|
|
125
|
+
logger.warn(`⚠️ mkdir failed for ${abs}: ${err.message}`);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Download files concurrently
|
|
131
|
+
this.#say('\n⬇️ Downloading files...');
|
|
132
|
+
const limit = pLimit(cfg.concurrency);
|
|
133
|
+
const progressBar = this.#createProgressBar(plan.files.length, dryRun);
|
|
134
|
+
let processed = 0;
|
|
135
|
+
|
|
136
|
+
const tasks = plan.files.map((file) =>
|
|
137
|
+
limit(async () => {
|
|
138
|
+
try {
|
|
139
|
+
const result = await this.#processFile(file, cfg, state, dryRun);
|
|
140
|
+
if (result === 'added') stats.filesAdded += 1;
|
|
141
|
+
else if (result === 'updated') stats.filesUpdated += 1;
|
|
142
|
+
else if (result === 'skipped-unchanged') stats.filesSkipped += 1;
|
|
143
|
+
else if (result === 'skipped-native') stats.nativeDocsSkipped += 1;
|
|
144
|
+
else if (result === 'skipped-oversize') stats.oversizedSkipped += 1;
|
|
145
|
+
if (
|
|
146
|
+
(result === 'added' || result === 'updated') &&
|
|
147
|
+
file.size != null
|
|
148
|
+
) {
|
|
149
|
+
stats.bytesDownloaded += Number(file.size) || 0;
|
|
150
|
+
}
|
|
151
|
+
} catch (err) {
|
|
152
|
+
stats.filesFailed += 1;
|
|
153
|
+
logger.error(
|
|
154
|
+
`❌ Failed: ${file.relPath} (${file.id}) — ${err.message}`,
|
|
155
|
+
);
|
|
156
|
+
} finally {
|
|
157
|
+
processed += 1;
|
|
158
|
+
progressBar.update(processed);
|
|
159
|
+
if (plan.files.length > 0) {
|
|
160
|
+
this.#reportProgress(
|
|
161
|
+
Math.round((processed / plan.files.length) * 100),
|
|
162
|
+
`Synced ${processed}/${plan.files.length}`,
|
|
163
|
+
);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}),
|
|
167
|
+
);
|
|
168
|
+
|
|
169
|
+
await Promise.all(tasks);
|
|
170
|
+
progressBar.stop();
|
|
171
|
+
|
|
172
|
+
// Persist state
|
|
173
|
+
if (!dryRun) {
|
|
174
|
+
await this.#saveState(statePath, state);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
const duration = ((Date.now() - startTime) / 1000).toFixed(2);
|
|
178
|
+
this.#reportProgress(100, `Sync completed in ${duration}s`);
|
|
179
|
+
|
|
180
|
+
logger.success('\n✅ gdrive-sync completed successfully!');
|
|
181
|
+
this.#say('\n📊 Sync Statistics:');
|
|
182
|
+
this.#say(` Folders created: ${stats.foldersCreated}`);
|
|
183
|
+
this.#say(` Files added: ${stats.filesAdded}`);
|
|
184
|
+
this.#say(` Files updated: ${stats.filesUpdated}`);
|
|
185
|
+
this.#say(` Files unchanged: ${stats.filesSkipped}`);
|
|
186
|
+
this.#say(` Native docs skipped: ${stats.nativeDocsSkipped}`);
|
|
187
|
+
this.#say(` Oversized skipped: ${stats.oversizedSkipped}`);
|
|
188
|
+
this.#say(` Files failed: ${stats.filesFailed}`);
|
|
189
|
+
this.#say(
|
|
190
|
+
` Bytes downloaded: ${this.#formatBytes(stats.bytesDownloaded)}`,
|
|
191
|
+
);
|
|
192
|
+
this.#say(` Duration: ${duration}s`);
|
|
193
|
+
this.#say(
|
|
194
|
+
`\n💡 Next: run "arela scan" with UPLOAD_BASE_PATH=${cfg.localMirrorPath}`,
|
|
195
|
+
);
|
|
196
|
+
|
|
197
|
+
return { success: true, stats };
|
|
198
|
+
} catch (error) {
|
|
199
|
+
this.errorHandler.handleError(error, 'gdrive-sync');
|
|
200
|
+
return { success: false, error: error.message };
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Walk Drive tree starting at folderId.
|
|
206
|
+
* Returns flat lists of folders (relative paths) and files (with metadata + relPath).
|
|
207
|
+
* @private
|
|
208
|
+
*/
|
|
209
|
+
async #walkTree(folderId, relPath, cfg) {
|
|
210
|
+
const folders = [];
|
|
211
|
+
const files = [];
|
|
212
|
+
|
|
213
|
+
// BFS using a queue
|
|
214
|
+
const queue = [{ id: folderId, relPath }];
|
|
215
|
+
// Track sanitized names per folder to dedupe collisions
|
|
216
|
+
const folderNameMaps = new Map();
|
|
217
|
+
|
|
218
|
+
while (queue.length > 0) {
|
|
219
|
+
const { id, relPath: parentRel } = queue.shift();
|
|
220
|
+
const usedNames = folderNameMaps.get(parentRel) || new Set();
|
|
221
|
+
|
|
222
|
+
for await (const child of this.driveService.listChildren(id)) {
|
|
223
|
+
const safeName = this.#uniqueSanitizedName(
|
|
224
|
+
child.name,
|
|
225
|
+
child.id,
|
|
226
|
+
usedNames,
|
|
227
|
+
);
|
|
228
|
+
const childRel = parentRel ? `${parentRel}/${safeName}` : safeName;
|
|
229
|
+
|
|
230
|
+
if (GoogleDriveService.isFolder(child)) {
|
|
231
|
+
folders.push(childRel);
|
|
232
|
+
folderNameMaps.set(childRel, new Set());
|
|
233
|
+
queue.push({ id: child.id, relPath: childRel });
|
|
234
|
+
continue;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Resolve shortcuts to their targets if enabled
|
|
238
|
+
let effective = child;
|
|
239
|
+
if (GoogleDriveService.isShortcut(child)) {
|
|
240
|
+
if (!cfg.followShortcuts || !child.shortcutDetails?.targetId) {
|
|
241
|
+
logger.debug(`⏭️ Skipping shortcut: ${childRel}`);
|
|
242
|
+
continue;
|
|
243
|
+
}
|
|
244
|
+
try {
|
|
245
|
+
effective = await this.driveService.getFile(
|
|
246
|
+
child.shortcutDetails.targetId,
|
|
247
|
+
);
|
|
248
|
+
} catch (err) {
|
|
249
|
+
logger.warn(
|
|
250
|
+
`⚠️ Could not resolve shortcut "${childRel}": ${err.message}`,
|
|
251
|
+
);
|
|
252
|
+
continue;
|
|
253
|
+
}
|
|
254
|
+
if (GoogleDriveService.isFolder(effective)) {
|
|
255
|
+
folders.push(childRel);
|
|
256
|
+
folderNameMaps.set(childRel, new Set());
|
|
257
|
+
queue.push({ id: effective.id, relPath: childRel });
|
|
258
|
+
continue;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
files.push({
|
|
263
|
+
id: effective.id,
|
|
264
|
+
name: effective.name,
|
|
265
|
+
mimeType: effective.mimeType,
|
|
266
|
+
modifiedTime: effective.modifiedTime,
|
|
267
|
+
size: effective.size != null ? Number(effective.size) : null,
|
|
268
|
+
md5Checksum: effective.md5Checksum || null,
|
|
269
|
+
relPath: childRel,
|
|
270
|
+
isNativeDoc: GoogleDriveService.isNativeGoogleDoc(effective),
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
folderNameMaps.set(parentRel, usedNames);
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
return { folders, files };
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* Process a single file: skip / download / update.
|
|
282
|
+
* @private
|
|
283
|
+
*/
|
|
284
|
+
async #processFile(file, cfg, state, dryRun) {
|
|
285
|
+
// Native Google Docs (Docs/Sheets/Slides/...): skip by default
|
|
286
|
+
if (file.isNativeDoc && cfg.skipNativeDocs) {
|
|
287
|
+
logger.debug(`⏭️ Native Google Doc skipped: ${file.relPath}`);
|
|
288
|
+
return 'skipped-native';
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// Oversize guard
|
|
292
|
+
if (
|
|
293
|
+
cfg.maxFileSizeBytes &&
|
|
294
|
+
file.size != null &&
|
|
295
|
+
file.size > cfg.maxFileSizeBytes
|
|
296
|
+
) {
|
|
297
|
+
logger.warn(
|
|
298
|
+
`⚠️ File too large (${this.#formatBytes(file.size)}): ${file.relPath}`,
|
|
299
|
+
);
|
|
300
|
+
return 'skipped-oversize';
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
const absPath = path.join(cfg.localMirrorPath, file.relPath);
|
|
304
|
+
const prev = state.files[file.id];
|
|
305
|
+
|
|
306
|
+
// Decide if we can skip (unchanged)
|
|
307
|
+
const localExists = fs.existsSync(absPath);
|
|
308
|
+
const unchanged =
|
|
309
|
+
localExists &&
|
|
310
|
+
prev &&
|
|
311
|
+
prev.relPath === file.relPath &&
|
|
312
|
+
((file.md5Checksum && prev.md5Checksum === file.md5Checksum) ||
|
|
313
|
+
(!file.md5Checksum && prev.modifiedTime === file.modifiedTime));
|
|
314
|
+
|
|
315
|
+
if (unchanged) {
|
|
316
|
+
return 'skipped-unchanged';
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
if (dryRun) {
|
|
320
|
+
logger.info(
|
|
321
|
+
` [dry-run] would ${prev ? 'update' : 'add'}: ${file.relPath}`,
|
|
322
|
+
);
|
|
323
|
+
return prev ? 'updated' : 'added';
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// Ensure parent dir
|
|
327
|
+
await fsp.mkdir(path.dirname(absPath), { recursive: true });
|
|
328
|
+
|
|
329
|
+
// Download to .part then atomic rename
|
|
330
|
+
const partPath = `${absPath}.part`;
|
|
331
|
+
try {
|
|
332
|
+
await fsp.rm(partPath, { force: true });
|
|
333
|
+
} catch {
|
|
334
|
+
/* ignore */
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
const out = fs.createWriteStream(partPath);
|
|
338
|
+
await this.driveService.downloadFile(file.id, out);
|
|
339
|
+
await fsp.rename(partPath, absPath);
|
|
340
|
+
|
|
341
|
+
// Set mtime to Drive's modifiedTime so `arela scan` captures changes correctly
|
|
342
|
+
if (file.modifiedTime) {
|
|
343
|
+
const mtime = new Date(file.modifiedTime);
|
|
344
|
+
try {
|
|
345
|
+
await fsp.utimes(absPath, mtime, mtime);
|
|
346
|
+
} catch (err) {
|
|
347
|
+
logger.debug(`⚠️ utimes failed for ${absPath}: ${err.message}`);
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// Update state
|
|
352
|
+
state.files[file.id] = {
|
|
353
|
+
relPath: file.relPath,
|
|
354
|
+
md5Checksum: file.md5Checksum,
|
|
355
|
+
modifiedTime: file.modifiedTime,
|
|
356
|
+
size: file.size,
|
|
357
|
+
syncedAt: new Date().toISOString(),
|
|
358
|
+
};
|
|
359
|
+
|
|
360
|
+
return prev ? 'updated' : 'added';
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* Sanitize file/folder name; on duplicate, append " (gdrive-<id>)".
|
|
365
|
+
* @private
|
|
366
|
+
*/
|
|
367
|
+
#uniqueSanitizedName(originalName, fileId, usedNames) {
|
|
368
|
+
let safe = this.sanitizer.sanitizeFileName(originalName);
|
|
369
|
+
if (!safe) safe = `file-${fileId}`;
|
|
370
|
+
|
|
371
|
+
if (usedNames.has(safe)) {
|
|
372
|
+
const ext = path.extname(safe);
|
|
373
|
+
const base = path.basename(safe, ext);
|
|
374
|
+
safe = `${base} (gdrive-${fileId})${ext}`;
|
|
375
|
+
}
|
|
376
|
+
usedNames.add(safe);
|
|
377
|
+
return safe;
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
/**
|
|
381
|
+
* @private
|
|
382
|
+
*/
|
|
383
|
+
async #loadState(statePath) {
|
|
384
|
+
try {
|
|
385
|
+
const raw = await fsp.readFile(statePath, 'utf-8');
|
|
386
|
+
const parsed = JSON.parse(raw);
|
|
387
|
+
if (parsed.version !== STATE_VERSION) {
|
|
388
|
+
logger.warn(
|
|
389
|
+
`⚠️ State file version mismatch (${parsed.version} != ${STATE_VERSION}); starting fresh`,
|
|
390
|
+
);
|
|
391
|
+
return this.#emptyState();
|
|
392
|
+
}
|
|
393
|
+
return parsed;
|
|
394
|
+
} catch (err) {
|
|
395
|
+
if (err.code !== 'ENOENT') {
|
|
396
|
+
logger.warn(`⚠️ Could not read state file: ${err.message}`);
|
|
397
|
+
}
|
|
398
|
+
return this.#emptyState();
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
/**
|
|
403
|
+
* @private
|
|
404
|
+
*/
|
|
405
|
+
async #saveState(statePath, state) {
|
|
406
|
+
state.savedAt = new Date().toISOString();
|
|
407
|
+
const tmp = `${statePath}.tmp`;
|
|
408
|
+
await fsp.writeFile(tmp, JSON.stringify(state, null, 2), 'utf-8');
|
|
409
|
+
await fsp.rename(tmp, statePath);
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
/**
|
|
413
|
+
* @private
|
|
414
|
+
*/
|
|
415
|
+
#emptyState() {
|
|
416
|
+
return { version: STATE_VERSION, savedAt: null, files: {} };
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* @private
|
|
421
|
+
*/
|
|
422
|
+
#createProgressBar(total, dryRun) {
|
|
423
|
+
const label = dryRun ? '🧪 Planning' : '⬇️ Downloading';
|
|
424
|
+
const bar = new cliProgress.SingleBar(
|
|
425
|
+
{
|
|
426
|
+
format: `${label} |{bar}| {percentage}% | {value}/{total} files`,
|
|
427
|
+
barCompleteChar: '\u2588',
|
|
428
|
+
barIncompleteChar: '\u2591',
|
|
429
|
+
hideCursor: true,
|
|
430
|
+
},
|
|
431
|
+
cliProgress.Presets.shades_classic,
|
|
432
|
+
);
|
|
433
|
+
bar.start(Math.max(total, 1), 0);
|
|
434
|
+
return bar;
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
/**
|
|
438
|
+
* @private
|
|
439
|
+
*/
|
|
440
|
+
#reportProgress(percent, message) {
|
|
441
|
+
if (typeof this.onProgress === 'function') {
|
|
442
|
+
try {
|
|
443
|
+
this.onProgress(percent, message);
|
|
444
|
+
} catch {
|
|
445
|
+
/* ignore listener errors */
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
/**
|
|
451
|
+
* Print to stdout AND log to file (mirrors ScanCommand's user-facing output style).
|
|
452
|
+
* @private
|
|
453
|
+
*/
|
|
454
|
+
#say(message) {
|
|
455
|
+
console.log(message);
|
|
456
|
+
logger.info(message);
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
/**
|
|
460
|
+
* @private
|
|
461
|
+
*/
|
|
462
|
+
#formatBytes(bytes) {
|
|
463
|
+
if (!bytes) return '0 B';
|
|
464
|
+
const units = ['B', 'KB', 'MB', 'GB', 'TB'];
|
|
465
|
+
let i = 0;
|
|
466
|
+
let n = bytes;
|
|
467
|
+
while (n >= 1024 && i < units.length - 1) {
|
|
468
|
+
n /= 1024;
|
|
469
|
+
i += 1;
|
|
470
|
+
}
|
|
471
|
+
return `${n.toFixed(2)} ${units[i]}`;
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
export default new GDriveSyncCommand();
|
|
@@ -9,6 +9,24 @@ import ErrorHandler from '../errors/ErrorHandler.js';
|
|
|
9
9
|
import { ConfigurationError } from '../errors/ErrorTypes.js';
|
|
10
10
|
import FileDetectionService from '../file-detection.js';
|
|
11
11
|
|
|
12
|
+
/**
|
|
13
|
+
* Paid pedimento detected_type values. `pedimento_completo_xml` is included
|
|
14
|
+
* even though the XML matcher is currently disabled in the registry so that
|
|
15
|
+
* re-enabling it requires no changes here.
|
|
16
|
+
*/
|
|
17
|
+
const DETECTED_PEDIMENTO_TYPES = new Set([
|
|
18
|
+
'pedimento_simplificado',
|
|
19
|
+
'pedimento_completo',
|
|
20
|
+
'pedimento_completo_xml',
|
|
21
|
+
]);
|
|
22
|
+
|
|
23
|
+
/** Unpaid pedimento detected_type values (proforma variants). */
|
|
24
|
+
const PROFORMA_TYPES = new Set([
|
|
25
|
+
'proforma',
|
|
26
|
+
'proforma_completo',
|
|
27
|
+
'proforma_completo_xml',
|
|
28
|
+
]);
|
|
29
|
+
|
|
12
30
|
/**
|
|
13
31
|
* Identify Command Handler
|
|
14
32
|
* Optimized replacement for "detect --detect-pdfs"
|
|
@@ -247,11 +265,13 @@ export class IdentifyCommand {
|
|
|
247
265
|
|
|
248
266
|
// Update statistics
|
|
249
267
|
processedCount += files.length;
|
|
250
|
-
|
|
251
|
-
|
|
268
|
+
// "Detected" counts paid pedimentos of any flavour (simplificado,
|
|
269
|
+
// completo, completo_xml when enabled).
|
|
270
|
+
detectedCount += detectionResults.filter((r) =>
|
|
271
|
+
DETECTED_PEDIMENTO_TYPES.has(r.detectedType),
|
|
252
272
|
).length;
|
|
253
|
-
proformaCount += detectionResults.filter(
|
|
254
|
-
(r
|
|
273
|
+
proformaCount += detectionResults.filter((r) =>
|
|
274
|
+
PROFORMA_TYPES.has(r.detectedType),
|
|
255
275
|
).length;
|
|
256
276
|
errorCount += detectionResults.filter((r) => r.detectionError).length;
|
|
257
277
|
|
|
@@ -324,8 +344,8 @@ export class IdentifyCommand {
|
|
|
324
344
|
// Detect using existing FileDetectionService
|
|
325
345
|
const result = await this.detectionService.detectFile(absolutePath);
|
|
326
346
|
|
|
327
|
-
// If detection succeeded and found a
|
|
328
|
-
if (result.detectedType
|
|
347
|
+
// If detection succeeded and found a paid pedimento (any variant)
|
|
348
|
+
if (DETECTED_PEDIMENTO_TYPES.has(result.detectedType)) {
|
|
329
349
|
return {
|
|
330
350
|
id: file.id,
|
|
331
351
|
detectedType: result.detectedType,
|
|
@@ -338,8 +358,8 @@ export class IdentifyCommand {
|
|
|
338
358
|
};
|
|
339
359
|
}
|
|
340
360
|
|
|
341
|
-
// If detection succeeded and found a proforma (
|
|
342
|
-
if (result.detectedType
|
|
361
|
+
// If detection succeeded and found a proforma (any variant)
|
|
362
|
+
if (PROFORMA_TYPES.has(result.detectedType)) {
|
|
343
363
|
return {
|
|
344
364
|
id: file.id,
|
|
345
365
|
detectedType: result.detectedType,
|
|
@@ -365,7 +385,7 @@ export class IdentifyCommand {
|
|
|
365
385
|
detectionError = `DETECTION_ERROR: ${result.error}`;
|
|
366
386
|
} else if (isDefinitelyNotPedimento) {
|
|
367
387
|
detectionError =
|
|
368
|
-
'NOT_PEDIMENTO: File does not match pedimento
|
|
388
|
+
'NOT_PEDIMENTO: File does not match any pedimento pattern. Missing key markers (e.g. "FORMA SIMPLIFICADA DE PEDIMENTO" or "NUM. PEDIMENTO:" + copy markers).';
|
|
369
389
|
} else {
|
|
370
390
|
// Partial match - might be a pedimento with missing fields
|
|
371
391
|
const missingFields = this.#getMissingFields(result);
|
|
@@ -434,18 +454,23 @@ export class IdentifyCommand {
|
|
|
434
454
|
return false;
|
|
435
455
|
}
|
|
436
456
|
|
|
437
|
-
// If it was detected as a proforma, it's related to a
|
|
438
|
-
|
|
457
|
+
// If it was detected as a proforma (any variant), it's related to a
|
|
458
|
+
// pedimento structure — not "definitely not".
|
|
459
|
+
if (PROFORMA_TYPES.has(result.detectedType)) {
|
|
439
460
|
return false;
|
|
440
461
|
}
|
|
441
462
|
|
|
442
|
-
// Check if the text contains
|
|
443
|
-
//
|
|
463
|
+
// Check if the text contains any required pedimento marker. This must
|
|
464
|
+
// stay aligned with the `match()` predicates in pedimento-simplificado.js
|
|
465
|
+
// and pedimento-completo.js.
|
|
444
466
|
const text = result.text || '';
|
|
445
|
-
const
|
|
467
|
+
const hasSimplificadoMarker = /FORMA SIMPLIFICADA DE PEDIMENTO/i.test(text);
|
|
468
|
+
const hasCompletoMarkers =
|
|
469
|
+
/NUM\.?\s*PEDIMENTO:/i.test(text) &&
|
|
470
|
+
/CVE\.?\s*PEDIMENTO:/i.test(text) &&
|
|
471
|
+
/T\.?\s*OPER:/i.test(text);
|
|
446
472
|
|
|
447
|
-
|
|
448
|
-
return !hasRequiredMarker;
|
|
473
|
+
return !hasSimplificadoMarker && !hasCompletoMarkers;
|
|
449
474
|
}
|
|
450
475
|
|
|
451
476
|
/**
|
|
@@ -592,10 +592,13 @@ export class ScanCommand {
|
|
|
592
592
|
const relativePath = PathNormalizer.getRelativePath(filePath, basePath);
|
|
593
593
|
const absolutePath = PathNormalizer.normalizeSeparators(filePath);
|
|
594
594
|
|
|
595
|
-
// Determine if this is potentially a simplificado
|
|
596
|
-
//
|
|
595
|
+
// Determine if this file is potentially a pedimento (simplificado, completo, or CoveFact).
|
|
596
|
+
// PDFs whose filename contains 'simp', 'pedim' or 'covefact' (case-insensitive)
|
|
597
|
+
// are flagged so the identify stage prioritizes them. The column name
|
|
598
|
+
// `likely_simplificado` is preserved for backwards compatibility; semantics
|
|
599
|
+
// are broader (any likely pedimento PDF).
|
|
597
600
|
const likelySimplificado =
|
|
598
|
-
fileExtension === 'pdf' &&
|
|
601
|
+
fileExtension === 'pdf' && /(simp|pedim|covefact)/i.test(fileName);
|
|
599
602
|
|
|
600
603
|
return {
|
|
601
604
|
fileName,
|