@arela/uploader 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arela/uploader",
3
- "version": "1.0.8",
3
+ "version": "1.0.10",
4
4
  "description": "CLI to upload files/directories to Arela",
5
5
  "bin": {
6
6
  "arela": "./src/index.js"
@@ -1,5 +1,5 @@
1
1
  import cliProgress from 'cli-progress';
2
- import { globbyStream } from 'globby';
2
+ import { globby, globbyStream } from 'globby';
3
3
  import path from 'path';
4
4
  import { Transform } from 'stream';
5
5
  import { pipeline } from 'stream/promises';
@@ -27,6 +27,7 @@ export class ScanCommand {
27
27
  * @param {Object} options - Command options
28
28
  * @param {boolean} options.countFirst - Count files first for percentage-based progress
29
29
  * @param {string} options.api - API target: 'default', 'agencia', or 'cliente'
30
+ * @param {boolean} options.stream - Use streaming file discovery (default: true, use --no-stream to disable)
30
31
  */
31
32
  async execute(options = {}) {
32
33
  const startTime = Date.now();
@@ -54,6 +55,9 @@ export class ScanCommand {
54
55
  logger.info(`šŸ–„ļø Server: ${scanConfig.serverId}`);
55
56
  logger.info(`šŸ“‚ Base Path: ${basePath}`);
56
57
  logger.info(`šŸ“Š Directory Level: ${scanConfig.directoryLevel}`);
58
+ logger.info(
59
+ `šŸ”„ File Discovery: ${options.stream !== false ? 'Streaming (globbyStream)' : 'Synchronous (globby)'}`,
60
+ );
57
61
 
58
62
  // Step 1: Discover directories at specified level
59
63
  logger.info('\nšŸ” Discovering directories...');
@@ -94,7 +98,11 @@ export class ScanCommand {
94
98
  let totalFiles = null;
95
99
  if (options.countFirst) {
96
100
  logger.info('\nšŸ”¢ Counting files...');
97
- totalFiles = await this.#countFiles(basePath, scanConfig);
101
+ totalFiles = await this.#countFiles(
102
+ basePath,
103
+ scanConfig,
104
+ options.stream !== false,
105
+ );
98
106
  logger.info(`šŸ“Š Found ${totalFiles.toLocaleString()} files to scan`);
99
107
  }
100
108
 
@@ -114,6 +122,7 @@ export class ScanCommand {
114
122
  scanConfig,
115
123
  reg.tableName,
116
124
  null, // Don't use percentage for individual directories
125
+ options.stream !== false, // Use streaming by default, --no-stream sets stream=false
117
126
  );
118
127
 
119
128
  // Step 4: Complete scan for this directory
@@ -281,22 +290,37 @@ export class ScanCommand {
281
290
  /**
282
291
  * Count files for percentage-based progress
283
292
  * @private
293
+ * @param {string} basePath - Base path to count from
294
+ * @param {Object} scanConfig - Scan configuration
295
+ * @param {boolean} useStream - Use streaming (globbyStream) or sync (globby) approach
284
296
  */
285
- async #countFiles(basePath, scanConfig) {
297
+ async #countFiles(basePath, scanConfig, useStream = true) {
286
298
  const sources = appConfig.getUploadSources();
287
299
  let totalCount = 0;
288
300
 
289
301
  for (const source of sources) {
290
302
  const sourcePath = path.resolve(basePath, source);
291
- const files = await globbyStream('**/*', {
292
- cwd: sourcePath,
293
- onlyFiles: true,
294
- absolute: true,
295
- });
296
303
 
297
- for await (const file of files) {
298
- if (!this.#shouldExcludeFile(file, scanConfig.excludePatterns)) {
299
- totalCount++;
304
+ if (useStream) {
305
+ // Streaming approach
306
+ const files = await globbyStream('**/*', {
307
+ cwd: sourcePath,
308
+ onlyFiles: true,
309
+ absolute: true,
310
+ });
311
+
312
+ for await (const file of files) {
313
+ if (!this.#shouldExcludeFile(file, scanConfig.excludePatterns)) {
314
+ totalCount++;
315
+ }
316
+ }
317
+ } else {
318
+ // Synchronous approach (original method)
319
+ const files = await globby([`${sourcePath}/**/*`], { onlyFiles: true });
320
+ for (const file of files) {
321
+ if (!this.#shouldExcludeFile(file, scanConfig.excludePatterns)) {
322
+ totalCount++;
323
+ }
300
324
  }
301
325
  }
302
326
  }
@@ -307,12 +331,18 @@ export class ScanCommand {
307
331
  /**
308
332
  * Stream files from a single directory and upload stats in batches
309
333
  * @private
334
+ * @param {string} dirPath - Directory path to scan
335
+ * @param {Object} scanConfig - Scan configuration
336
+ * @param {string} tableName - Target table name
337
+ * @param {number|null} totalFiles - Total files count for progress (optional)
338
+ * @param {boolean} useStream - Use streaming (globbyStream) or sync (globby) approach
310
339
  */
311
340
  async #streamScanDirectory(
312
341
  dirPath,
313
342
  scanConfig,
314
343
  tableName,
315
344
  totalFiles = null,
345
+ useStream = true,
316
346
  ) {
317
347
  // For directory-level scanning, we scan the directory directly
318
348
  const batchSize = scanConfig.batchSize || 2000;
@@ -328,61 +358,67 @@ export class ScanCommand {
328
358
  const progressBar = this.#createProgressBar(totalFiles);
329
359
 
330
360
  try {
331
- // Create stream with stats option
332
- const fileStream = globbyStream('**/*', {
333
- cwd: dirPath,
334
- onlyFiles: true,
335
- absolute: true,
336
- stats: true, // Get file stats during discovery
337
- });
338
-
339
- // Process each file from stream
340
- for await (const entry of fileStream) {
341
- // globby with stats:true returns {path, stats} objects
342
- const filePath = typeof entry === 'string' ? entry : entry.path;
343
- const stats = typeof entry === 'object' ? entry.stats : null;
344
-
345
- // Check if file should be excluded
346
- if (this.#shouldExcludeFile(filePath, scanConfig.excludePatterns)) {
347
- filesSkipped++;
348
- continue;
349
- }
350
-
351
- // Get file stats (use from globby or fetch manually)
352
- const fileStats = stats || FileOperations.getFileStats(filePath);
353
- if (!fileStats) {
354
- logger.debug(`āš ļø Could not read stats: ${filePath}`);
355
- filesSkipped++;
356
- continue;
357
- }
358
-
359
- // Normalize file record
360
- const record = this.#normalizeFileRecord(
361
- filePath,
362
- fileStats,
363
- dirPath,
364
- scanTimestamp,
365
- );
361
+ if (useStream) {
362
+ // Streaming approach: use globbyStream with stats option
363
+ const fileStream = globbyStream('**/*', {
364
+ cwd: dirPath,
365
+ onlyFiles: true,
366
+ absolute: true,
367
+ stats: true, // Get file stats during discovery
368
+ });
366
369
 
367
- currentBatch.push(record);
368
- filesScanned++;
369
- totalSize += record.sizeBytes;
370
+ // Process each file from stream
371
+ for await (const entry of fileStream) {
372
+ // globby with stats:true returns {path, stats} objects
373
+ const filePath = typeof entry === 'string' ? entry : entry.path;
374
+ const stats = typeof entry === 'object' ? entry.stats : null;
375
+
376
+ const result = await this.#processFileEntry(
377
+ filePath,
378
+ stats,
379
+ dirPath,
380
+ scanConfig,
381
+ scanTimestamp,
382
+ tableName,
383
+ currentBatch,
384
+ batchSize,
385
+ progressBar,
386
+ totalFiles,
387
+ { filesScanned, filesInserted, filesSkipped, totalSize },
388
+ );
370
389
 
371
- // Update progress
372
- if (totalFiles) {
373
- progressBar.update(filesScanned);
374
- } else {
375
- // Show throughput instead of percentage
376
- const elapsed = (Date.now() - progressBar.startTime) / 1000;
377
- const rate = (filesScanned / elapsed).toFixed(1);
378
- progressBar.update(filesScanned, { rate });
390
+ filesScanned = result.filesScanned;
391
+ filesInserted = result.filesInserted;
392
+ filesSkipped = result.filesSkipped;
393
+ totalSize = result.totalSize;
394
+ currentBatch = result.currentBatch;
379
395
  }
396
+ } else {
397
+ // Synchronous approach: use globby (original method from UploadCommand)
398
+ logger.debug('Using synchronous file discovery (globby)...');
399
+ const files = await globby([`${dirPath}/**/*`], { onlyFiles: true });
400
+ logger.debug(`Found ${files.length} files to process`);
401
+
402
+ for (const filePath of files) {
403
+ const result = await this.#processFileEntry(
404
+ filePath,
405
+ null, // Stats will be fetched manually
406
+ dirPath,
407
+ scanConfig,
408
+ scanTimestamp,
409
+ tableName,
410
+ currentBatch,
411
+ batchSize,
412
+ progressBar,
413
+ totalFiles,
414
+ { filesScanned, filesInserted, filesSkipped, totalSize },
415
+ );
380
416
 
381
- // Upload batch when full
382
- if (currentBatch.length >= batchSize) {
383
- const inserted = await this.#uploadBatch(tableName, currentBatch);
384
- filesInserted += inserted;
385
- currentBatch = [];
417
+ filesScanned = result.filesScanned;
418
+ filesInserted = result.filesInserted;
419
+ filesSkipped = result.filesSkipped;
420
+ totalSize = result.totalSize;
421
+ currentBatch = result.currentBatch;
386
422
  }
387
423
  }
388
424
  } catch (error) {
@@ -405,6 +441,89 @@ export class ScanCommand {
405
441
  };
406
442
  }
407
443
 
444
+ /**
445
+ * Process a single file entry (used by both streaming and sync approaches)
446
+ * @private
447
+ */
448
+ async #processFileEntry(
449
+ filePath,
450
+ stats,
451
+ dirPath,
452
+ scanConfig,
453
+ scanTimestamp,
454
+ tableName,
455
+ currentBatch,
456
+ batchSize,
457
+ progressBar,
458
+ totalFiles,
459
+ counters,
460
+ ) {
461
+ let { filesScanned, filesInserted, filesSkipped, totalSize } = counters;
462
+
463
+ // Check if file should be excluded
464
+ if (this.#shouldExcludeFile(filePath, scanConfig.excludePatterns)) {
465
+ filesSkipped++;
466
+ return {
467
+ filesScanned,
468
+ filesInserted,
469
+ filesSkipped,
470
+ totalSize,
471
+ currentBatch,
472
+ };
473
+ }
474
+
475
+ // Get file stats (use from globby or fetch manually)
476
+ const fileStats = stats || FileOperations.getFileStats(filePath);
477
+ if (!fileStats) {
478
+ logger.debug(`āš ļø Could not read stats: ${filePath}`);
479
+ filesSkipped++;
480
+ return {
481
+ filesScanned,
482
+ filesInserted,
483
+ filesSkipped,
484
+ totalSize,
485
+ currentBatch,
486
+ };
487
+ }
488
+
489
+ // Normalize file record
490
+ const record = this.#normalizeFileRecord(
491
+ filePath,
492
+ fileStats,
493
+ dirPath,
494
+ scanTimestamp,
495
+ );
496
+
497
+ currentBatch.push(record);
498
+ filesScanned++;
499
+ totalSize += record.sizeBytes;
500
+
501
+ // Update progress
502
+ if (totalFiles) {
503
+ progressBar.update(filesScanned);
504
+ } else {
505
+ // Show throughput instead of percentage
506
+ const elapsed = (Date.now() - progressBar.startTime) / 1000;
507
+ const rate = (filesScanned / elapsed).toFixed(1);
508
+ progressBar.update(filesScanned, { rate });
509
+ }
510
+
511
+ // Upload batch when full
512
+ if (currentBatch.length >= batchSize) {
513
+ const inserted = await this.#uploadBatch(tableName, currentBatch);
514
+ filesInserted += inserted;
515
+ currentBatch = [];
516
+ }
517
+
518
+ return {
519
+ filesScanned,
520
+ filesInserted,
521
+ filesSkipped,
522
+ totalSize,
523
+ currentBatch,
524
+ };
525
+ }
526
+
408
527
  /**
409
528
  * Upload a batch of file records
410
529
  * @private
@@ -425,6 +544,7 @@ export class ScanCommand {
425
544
  /**
426
545
  * Normalize file record for database insertion
427
546
  * Stores paths with forward slashes for consistency but keeps them absolute
547
+ * Sets likelySimplificado to true if file is a PDF and filename contains 'simp'
428
548
  * @private
429
549
  */
430
550
  #normalizeFileRecord(filePath, fileStats, basePath, scanTimestamp) {
@@ -438,6 +558,11 @@ export class ScanCommand {
438
558
  const relativePath = PathNormalizer.getRelativePath(filePath, basePath);
439
559
  const absolutePath = PathNormalizer.normalizeSeparators(filePath);
440
560
 
561
+ // Determine if this is potentially a simplificado document
562
+ // Must be a PDF and filename must contain 'simp' (case-insensitive)
563
+ const likelySimplificado =
564
+ fileExtension === 'pdf' && fileName.toLowerCase().includes('simp');
565
+
441
566
  return {
442
567
  fileName,
443
568
  fileExtension,
@@ -447,6 +572,7 @@ export class ScanCommand {
447
572
  sizeBytes: Number(fileStats.size),
448
573
  modifiedAt: fileStats.mtime.toISOString(),
449
574
  scanTimestamp,
575
+ likelySimplificado,
450
576
  };
451
577
  }
452
578
 
@@ -34,10 +34,10 @@ class Config {
34
34
  const __dirname = path.dirname(__filename);
35
35
  const packageJsonPath = path.resolve(__dirname, '../../package.json');
36
36
  const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
37
- return packageJson.version || '1.0.8';
37
+ return packageJson.version || '1.0.10';
38
38
  } catch (error) {
39
39
  console.warn('āš ļø Could not read package.json version, using fallback');
40
- return '1.0.8';
40
+ return '1.0.10';
41
41
  }
42
42
  }
43
43
 
package/src/index.js CHANGED
@@ -185,6 +185,10 @@ class ArelaUploaderCLI {
185
185
  '--count-first',
186
186
  'Count files first for percentage-based progress (slower start)',
187
187
  )
188
+ .option(
189
+ '--no-stream',
190
+ 'Use synchronous file discovery instead of streaming (original approach)',
191
+ )
188
192
  .action(async (options) => {
189
193
  try {
190
194
  // Set API target if specified