@arela/uploader 0.2.9 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.envbackup +37 -0
- package/package.json +1 -1
- package/src/config/config.js +7 -2
- package/src/services/DatabaseService.js +361 -199
- package/arela-upload.log +0 -0
package/.envbackup
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# SUPABASE_URL=https://supabase.prod.app.trader-docs.com
|
|
2
|
+
# SUPABASE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJyb2xlIjoic2VydmljZV9yb2xlIiwiaXNzIjoic3VwYWJhc2UiLCJpYXQiOjE3NDU3MzM2MDAsImV4cCI6MTkwMzUwMDAwMH0.eAoXvaBZzZa31txItwEkoLnplWG10ee-U5GMc265xwk
|
|
3
|
+
# SUPABASE_BUCKET=sample
|
|
4
|
+
|
|
5
|
+
# PATH_VARS=o:/ExpedienteElectronicko/expediente/archivos/:rfcPatente/:patente-:aduana/:year/:pedimento/ALSJDKHF ASD,KFHJA SDFK,ASLDKJFH
|
|
6
|
+
|
|
7
|
+
SUPABASE_URL=http://127.0.0.1:54321
|
|
8
|
+
SUPABASE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImV4cCI6MTk4MzgxMjk5Nn0.EGIM96RAZx35lJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU
|
|
9
|
+
SUPABASE_BUCKET=arela
|
|
10
|
+
|
|
11
|
+
# Arela API Configuration
|
|
12
|
+
ARELA_API_URL=http://localhost:3010
|
|
13
|
+
ARELA_API_TOKEN=f0608f83e5faa4c6be32c19975635c4e9012b31f7249a9f0a9270f54d74ec599
|
|
14
|
+
|
|
15
|
+
# Upload Configuration
|
|
16
|
+
UPLOAD_BASE_PATH=./sample
|
|
17
|
+
UPLOAD_SOURCES=2023|2024
|
|
18
|
+
|
|
19
|
+
# RFC Upload Configuration
|
|
20
|
+
# Pipe-separated list of RFCs to upload files for
|
|
21
|
+
# Example: MMJ0810145N1|ABC1234567XY|DEF9876543ZZ
|
|
22
|
+
UPLOAD_RFCS=AKS151005E46|IMS030409FZ0|RDG1107154L7|SHP031226BV2|CSM9301219B4|LIN960124HT8|LME971009SW4|AKM9707151B6|FEL000822AG2|FDM060802J54|MTM9807279B4|AUM9207011CA|MMJ0810145N1|ACC010328EQ6|PED781129JT6|CAD890407NK7|SME140411IK7|JME1903121C2|EIJ110429NF9|PTJ080414TM6|TME050503BM4
|
|
23
|
+
|
|
24
|
+
# AKS151005E46|IMS030409FZ0|RDG1107154L7|SHP031226BV2|CSM9301219B4|LIN960124HT8|LME971009SW4|AKM9707151B6|FEL000822AG2|PTJ080414TM6|TME050503BM4
|
|
25
|
+
# FDM060802J54|MTM9807279B4|AUM9207011CA|MMJ0810145N1|ACC010328EQ6|PED781129JT6|CAD890407NK7|SME140411IK7|JME1903121C2|EIJ110429NF9
|
|
26
|
+
|
|
27
|
+
# Logging and Verbosity
|
|
28
|
+
VERBOSE_LOGGING=true # Disable verbose path logging for better performance
|
|
29
|
+
BATCH_DELAY=50 # Reduce delay between batches (default: 100ms)
|
|
30
|
+
PROGRESS_UPDATE_INTERVAL=20 # Update progress every 20 items (default: 10)
|
|
31
|
+
|
|
32
|
+
# Log Buffering
|
|
33
|
+
LOG_BUFFER_SIZE=200 # Increase buffer size for fewer I/O ops (default: 100)
|
|
34
|
+
LOG_FLUSH_INTERVAL=3000 # Flush logs every 3 seconds (default: 5000ms)
|
|
35
|
+
|
|
36
|
+
# Example for maximum performance
|
|
37
|
+
# VERBOSE_LOGGING=false BATCH_DELAY=25 LOG_BUFFER_SIZE=500 arela --stats-only /path/to/files
|
package/package.json
CHANGED
package/src/config/config.js
CHANGED
|
@@ -28,10 +28,10 @@ class Config {
|
|
|
28
28
|
const __dirname = path.dirname(__filename);
|
|
29
29
|
const packageJsonPath = path.resolve(__dirname, '../../package.json');
|
|
30
30
|
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
|
|
31
|
-
return packageJson.version || '0.2.
|
|
31
|
+
return packageJson.version || '0.2.11';
|
|
32
32
|
} catch (error) {
|
|
33
33
|
console.warn('⚠️ Could not read package.json version, using fallback');
|
|
34
|
-
return '0.2.
|
|
34
|
+
return '0.2.11';
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
37
|
|
|
@@ -72,10 +72,15 @@ class Config {
|
|
|
72
72
|
.map((s) => s.trim())
|
|
73
73
|
.filter(Boolean);
|
|
74
74
|
|
|
75
|
+
const uploadYears = process.env.UPLOAD_YEARS?.split('|')
|
|
76
|
+
.map((s) => s.trim())
|
|
77
|
+
.filter(Boolean);
|
|
78
|
+
|
|
75
79
|
return {
|
|
76
80
|
basePath,
|
|
77
81
|
sources,
|
|
78
82
|
rfcs: uploadRfcs,
|
|
83
|
+
years: uploadYears,
|
|
79
84
|
};
|
|
80
85
|
}
|
|
81
86
|
|
|
@@ -316,7 +316,6 @@ export class DatabaseService {
|
|
|
316
316
|
filename: record.filename,
|
|
317
317
|
file_extension: record.file_extension,
|
|
318
318
|
is_like_simplificado: record.is_like_simplificado,
|
|
319
|
-
year: record.year,
|
|
320
319
|
})
|
|
321
320
|
.eq('original_path', record.original_path);
|
|
322
321
|
|
|
@@ -557,169 +556,240 @@ export class DatabaseService {
|
|
|
557
556
|
const supabase = await this.#getSupabaseClient();
|
|
558
557
|
|
|
559
558
|
logger.info('Phase 3: Starting arela_path and year propagation process...');
|
|
560
|
-
console.log(
|
|
561
|
-
'🔍 Finding pedimento_simplificado records with arela_path and year...',
|
|
562
|
-
);
|
|
563
|
-
|
|
564
|
-
// Get all pedimento_simplificado records that have arela_path
|
|
565
|
-
const { data: pedimentoRecords, error: pedimentoError } = await supabase
|
|
566
|
-
.from('uploader')
|
|
567
|
-
.select('id, original_path, arela_path, filename, year')
|
|
568
|
-
.eq('document_type', 'pedimento_simplificado')
|
|
569
|
-
.not('arela_path', 'is', null);
|
|
570
|
-
|
|
571
|
-
if (pedimentoError) {
|
|
572
|
-
const errorMsg = `Error fetching pedimento records: ${pedimentoError.message}`;
|
|
573
|
-
logger.error(errorMsg);
|
|
574
|
-
throw new Error(errorMsg);
|
|
575
|
-
}
|
|
559
|
+
console.log('🔍 Processing pedimento_simplificado records page by page...');
|
|
576
560
|
|
|
577
|
-
|
|
578
|
-
|
|
561
|
+
// Log year filtering configuration
|
|
562
|
+
if (appConfig.upload.years && appConfig.upload.years.length > 0) {
|
|
563
|
+
logger.info(
|
|
564
|
+
`🗓️ Year filter enabled: ${appConfig.upload.years.join(', ')}`,
|
|
565
|
+
);
|
|
579
566
|
console.log(
|
|
580
|
-
|
|
567
|
+
`🗓️ Year filter enabled: ${appConfig.upload.years.join(', ')}`,
|
|
581
568
|
);
|
|
582
|
-
|
|
569
|
+
} else {
|
|
570
|
+
logger.info('🗓️ No year filter configured - processing all years');
|
|
571
|
+
console.log('🗓️ No year filter configured - processing all years');
|
|
583
572
|
}
|
|
584
573
|
|
|
585
|
-
console.log(
|
|
586
|
-
`📋 Found ${pedimentoRecords.length} pedimento records with arela_path`,
|
|
587
|
-
);
|
|
588
|
-
logger.info(
|
|
589
|
-
`Found ${pedimentoRecords.length} pedimento records with arela_path to process`,
|
|
590
|
-
);
|
|
591
|
-
|
|
592
574
|
let totalProcessed = 0;
|
|
593
575
|
let totalUpdated = 0;
|
|
594
576
|
let totalErrors = 0;
|
|
577
|
+
let offset = 0;
|
|
578
|
+
const pageSize = 50;
|
|
579
|
+
let hasMoreData = true;
|
|
580
|
+
let pageNumber = 1;
|
|
595
581
|
const BATCH_SIZE = 50; // Process files in batches
|
|
596
582
|
|
|
597
|
-
// Process
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
// Extract base path from original_path (remove filename)
|
|
603
|
-
const basePath = path.dirname(pedimento.original_path);
|
|
583
|
+
// Process pedimento records page by page for memory efficiency
|
|
584
|
+
while (hasMoreData) {
|
|
585
|
+
logger.info(
|
|
586
|
+
`Fetching and processing pedimento records page ${pageNumber} (records ${offset + 1} to ${offset + pageSize})...`,
|
|
587
|
+
);
|
|
604
588
|
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
)
|
|
589
|
+
let query = supabase
|
|
590
|
+
.from('uploader')
|
|
591
|
+
.select('id, original_path, arela_path, filename, year')
|
|
592
|
+
.eq('document_type', 'pedimento_simplificado')
|
|
593
|
+
.not('arela_path', 'is', null);
|
|
608
594
|
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
: existingPath.endsWith('/')
|
|
614
|
-
? existingPath
|
|
615
|
-
: existingPath + '/';
|
|
616
|
-
|
|
617
|
-
// Find all files with the same base path that don't have arela_path yet
|
|
618
|
-
// Use pagination to handle cases with more than 1000 related files
|
|
619
|
-
let allRelatedFiles = [];
|
|
620
|
-
let from = 0;
|
|
621
|
-
const pageSize = 1000;
|
|
622
|
-
let hasMoreData = true;
|
|
623
|
-
let paginationError = null;
|
|
624
|
-
|
|
625
|
-
logger.info(`Searching for related files in base path: ${basePath}`);
|
|
626
|
-
|
|
627
|
-
while (hasMoreData) {
|
|
628
|
-
const { data: relatedFilesPage, error: relatedError } = await supabase
|
|
629
|
-
.from('uploader')
|
|
630
|
-
.select('id, filename, original_path')
|
|
631
|
-
.like('original_path', `${basePath}%`)
|
|
632
|
-
.is('arela_path', null)
|
|
633
|
-
.neq('id', pedimento.id) // Exclude the pedimento itself
|
|
634
|
-
.range(from, from + pageSize - 1);
|
|
595
|
+
// Add year filter if UPLOAD_YEARS is configured
|
|
596
|
+
if (appConfig.upload.years && appConfig.upload.years.length > 0) {
|
|
597
|
+
query = query.in('year', appConfig.upload.years);
|
|
598
|
+
}
|
|
635
599
|
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
);
|
|
640
|
-
paginationError = relatedError;
|
|
641
|
-
totalErrors++;
|
|
642
|
-
break;
|
|
643
|
-
}
|
|
600
|
+
const { data: pedimentoPage, error: pedimentoError } = await query
|
|
601
|
+
.range(offset, offset + pageSize - 1)
|
|
602
|
+
.order('created_at');
|
|
644
603
|
|
|
645
|
-
|
|
646
|
-
|
|
604
|
+
if (pedimentoError) {
|
|
605
|
+
const errorMsg = `Error fetching pedimento records page ${pageNumber}: ${pedimentoError.message}`;
|
|
606
|
+
logger.error(errorMsg);
|
|
607
|
+
throw new Error(errorMsg);
|
|
608
|
+
}
|
|
647
609
|
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
logger.info(
|
|
654
|
-
`Fetched ${relatedFilesPage.length} files, continuing pagination (total so far: ${allRelatedFiles.length})`,
|
|
655
|
-
);
|
|
656
|
-
}
|
|
657
|
-
} else {
|
|
658
|
-
hasMoreData = false;
|
|
659
|
-
}
|
|
660
|
-
}
|
|
610
|
+
if (!pedimentoPage || pedimentoPage.length === 0) {
|
|
611
|
+
hasMoreData = false;
|
|
612
|
+
logger.info('No more pedimento records found');
|
|
613
|
+
break;
|
|
614
|
+
}
|
|
661
615
|
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
}
|
|
616
|
+
logger.info(
|
|
617
|
+
`Processing page ${pageNumber}: ${pedimentoPage.length} pedimento records`,
|
|
618
|
+
);
|
|
666
619
|
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
620
|
+
// Process each pedimento record in the current page
|
|
621
|
+
for (const pedimento of pedimentoPage) {
|
|
622
|
+
try {
|
|
623
|
+
totalProcessed++;
|
|
671
624
|
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
);
|
|
625
|
+
// Extract base path from original_path (remove filename)
|
|
626
|
+
const basePath = path.dirname(pedimento.original_path);
|
|
675
627
|
|
|
676
|
-
|
|
677
|
-
|
|
628
|
+
logger.info(
|
|
629
|
+
`Processing pedimento: ${pedimento.filename} | Base path: ${basePath} | Year: ${pedimento.year || 'N/A'}`,
|
|
630
|
+
);
|
|
678
631
|
|
|
679
|
-
|
|
680
|
-
const
|
|
681
|
-
const
|
|
682
|
-
|
|
632
|
+
// Extract folder part from existing arela_path
|
|
633
|
+
const existingPath = pedimento.arela_path;
|
|
634
|
+
const folderArelaPath = existingPath.includes('/')
|
|
635
|
+
? existingPath.substring(0, existingPath.lastIndexOf('/')) + '/'
|
|
636
|
+
: existingPath.endsWith('/')
|
|
637
|
+
? existingPath
|
|
638
|
+
: existingPath + '/';
|
|
639
|
+
|
|
640
|
+
// Process related files page by page for memory efficiency
|
|
641
|
+
let relatedFilesFrom = 0;
|
|
642
|
+
const relatedFilesPageSize = 50;
|
|
643
|
+
let hasMoreRelatedFiles = true;
|
|
644
|
+
let relatedFilesPageNumber = 1;
|
|
645
|
+
let totalRelatedFilesProcessed = 0;
|
|
683
646
|
|
|
684
647
|
logger.info(
|
|
685
|
-
`
|
|
648
|
+
`Searching and processing related files in base path: ${basePath}`,
|
|
686
649
|
);
|
|
687
650
|
|
|
688
|
-
|
|
689
|
-
const { error:
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
651
|
+
while (hasMoreRelatedFiles) {
|
|
652
|
+
const { data: relatedFilesPage, error: relatedError } =
|
|
653
|
+
await supabase
|
|
654
|
+
.from('uploader')
|
|
655
|
+
.select('id, filename, original_path')
|
|
656
|
+
.like('original_path', `${basePath}%`)
|
|
657
|
+
.is('arela_path', null)
|
|
658
|
+
.neq('id', pedimento.id) // Exclude the pedimento itself
|
|
659
|
+
.range(
|
|
660
|
+
relatedFilesFrom,
|
|
661
|
+
relatedFilesFrom + relatedFilesPageSize - 1,
|
|
662
|
+
);
|
|
696
663
|
|
|
697
|
-
if (
|
|
664
|
+
if (relatedError) {
|
|
698
665
|
logger.error(
|
|
699
|
-
`Error
|
|
666
|
+
`Error finding related files for ${pedimento.filename}: ${relatedError.message}`,
|
|
700
667
|
);
|
|
701
668
|
totalErrors++;
|
|
702
|
-
|
|
703
|
-
|
|
669
|
+
break;
|
|
670
|
+
}
|
|
671
|
+
// console.log(`query by basePath: ${basePath} count: [${relatedFilesPage.length}]`);
|
|
672
|
+
|
|
673
|
+
if (!relatedFilesPage || relatedFilesPage.length === 0) {
|
|
674
|
+
hasMoreRelatedFiles = false;
|
|
675
|
+
if (totalRelatedFilesProcessed === 0) {
|
|
676
|
+
logger.info(`No related files found for ${pedimento.filename}`);
|
|
677
|
+
}
|
|
678
|
+
break;
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
logger.info(
|
|
682
|
+
`Processing related files page ${relatedFilesPageNumber}: ${relatedFilesPage.length} files for ${pedimento.filename}`,
|
|
683
|
+
);
|
|
684
|
+
|
|
685
|
+
// Track if any updates occurred in this page
|
|
686
|
+
let updatesOccurred = false;
|
|
687
|
+
// Process this page of related files in batches
|
|
688
|
+
const pageFileIds = relatedFilesPage.map((f) => f.id);
|
|
689
|
+
|
|
690
|
+
for (let i = 0; i < pageFileIds.length; i += BATCH_SIZE) {
|
|
691
|
+
const batchIds = pageFileIds.slice(i, i + BATCH_SIZE);
|
|
692
|
+
const batchNumber =
|
|
693
|
+
Math.floor(relatedFilesFrom / BATCH_SIZE) +
|
|
694
|
+
Math.floor(i / BATCH_SIZE) +
|
|
695
|
+
1;
|
|
696
|
+
|
|
704
697
|
logger.info(
|
|
705
|
-
`
|
|
698
|
+
`Updating batch ${batchNumber}: ${batchIds.length} files with arela_path and year...`,
|
|
706
699
|
);
|
|
700
|
+
|
|
701
|
+
try {
|
|
702
|
+
const { error: updateError } = await supabase
|
|
703
|
+
.from('uploader')
|
|
704
|
+
.update({
|
|
705
|
+
arela_path: folderArelaPath,
|
|
706
|
+
year: pedimento.year,
|
|
707
|
+
})
|
|
708
|
+
.in('id', batchIds);
|
|
709
|
+
|
|
710
|
+
if (updateError) {
|
|
711
|
+
logger.error(
|
|
712
|
+
`Error in batch ${batchNumber}: ${updateError.message}`,
|
|
713
|
+
);
|
|
714
|
+
totalErrors++;
|
|
715
|
+
} else {
|
|
716
|
+
totalUpdated += batchIds.length;
|
|
717
|
+
totalRelatedFilesProcessed += batchIds.length;
|
|
718
|
+
updatesOccurred = true; // Mark that updates occurred
|
|
719
|
+
logger.info(
|
|
720
|
+
`Successfully updated batch ${batchNumber}: ${batchIds.length} files with arela_path and year`,
|
|
721
|
+
);
|
|
722
|
+
}
|
|
723
|
+
} catch (batchError) {
|
|
724
|
+
logger.error(
|
|
725
|
+
`Exception in batch ${batchNumber}: ${batchError.message}`,
|
|
726
|
+
);
|
|
727
|
+
totalErrors++;
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
// Check if we need to fetch the next page of related files
|
|
732
|
+
if (relatedFilesPage.length < relatedFilesPageSize) {
|
|
733
|
+
console.log('no more related files for ', basePath);
|
|
734
|
+
hasMoreRelatedFiles = false;
|
|
735
|
+
logger.info(
|
|
736
|
+
`Completed processing related files for ${pedimento.filename}. Total processed: ${totalRelatedFilesProcessed}`,
|
|
737
|
+
);
|
|
738
|
+
} else {
|
|
739
|
+
// If updates occurred, reset pagination to start from beginning
|
|
740
|
+
// since records that matched the query may no longer match after update
|
|
741
|
+
if (updatesOccurred) {
|
|
742
|
+
relatedFilesFrom = 0;
|
|
743
|
+
logger.info(
|
|
744
|
+
`Page ${relatedFilesPageNumber} complete with updates: ${relatedFilesPage.length} files processed, restarting pagination from beginning due to query condition changes...`,
|
|
745
|
+
);
|
|
746
|
+
} else {
|
|
747
|
+
relatedFilesFrom += relatedFilesPageSize - 1;
|
|
748
|
+
logger.info(
|
|
749
|
+
`Page ${relatedFilesPageNumber} complete: ${relatedFilesPage.length} files processed, continuing to next page...`,
|
|
750
|
+
);
|
|
751
|
+
}
|
|
752
|
+
relatedFilesPageNumber++;
|
|
707
753
|
}
|
|
708
|
-
} catch (batchError) {
|
|
709
|
-
logger.error(
|
|
710
|
-
`Exception in batch ${batchNumber}: ${batchError.message}`,
|
|
711
|
-
);
|
|
712
|
-
totalErrors++;
|
|
713
754
|
}
|
|
755
|
+
} catch (error) {
|
|
756
|
+
logger.error(
|
|
757
|
+
`Error processing pedimento ${pedimento.filename}: ${error.message}`,
|
|
758
|
+
);
|
|
759
|
+
totalErrors++;
|
|
714
760
|
}
|
|
715
|
-
}
|
|
716
|
-
|
|
717
|
-
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
// Check if we need to fetch the next page
|
|
764
|
+
if (pedimentoPage.length < pageSize) {
|
|
765
|
+
hasMoreData = false;
|
|
766
|
+
logger.info(
|
|
767
|
+
`Completed processing. Last page ${pageNumber} had ${pedimentoPage.length} records`,
|
|
768
|
+
);
|
|
769
|
+
} else {
|
|
770
|
+
offset += pageSize;
|
|
771
|
+
pageNumber++;
|
|
772
|
+
logger.info(
|
|
773
|
+
`Page ${pageNumber - 1} complete: ${pedimentoPage.length} records processed, moving to next page...`,
|
|
718
774
|
);
|
|
719
|
-
totalErrors++;
|
|
720
775
|
}
|
|
721
776
|
}
|
|
722
777
|
|
|
778
|
+
// Final summary
|
|
779
|
+
if (totalProcessed === 0) {
|
|
780
|
+
logger.info('No pedimento_simplificado records with arela_path found');
|
|
781
|
+
console.log(
|
|
782
|
+
'ℹ️ No pedimento_simplificado records with arela_path found',
|
|
783
|
+
);
|
|
784
|
+
} else {
|
|
785
|
+
console.log(
|
|
786
|
+
`📋 Processed ${totalProcessed} pedimento records across ${pageNumber - 1} pages`,
|
|
787
|
+
);
|
|
788
|
+
logger.info(
|
|
789
|
+
`Processed ${totalProcessed} pedimento records across ${pageNumber - 1} pages`,
|
|
790
|
+
);
|
|
791
|
+
}
|
|
792
|
+
|
|
723
793
|
const result = {
|
|
724
794
|
processedCount: totalProcessed,
|
|
725
795
|
updatedCount: totalUpdated,
|
|
@@ -773,25 +843,75 @@ export class DatabaseService {
|
|
|
773
843
|
logger.info(`Total files for specified RFCs: ${totalRfcFiles || 0}`);
|
|
774
844
|
}
|
|
775
845
|
|
|
776
|
-
// Step 1: Get all pedimento_simplificado records that match the specified RFCs and have arela_path
|
|
846
|
+
// Step 1: Get all pedimento_simplificado records that match the specified RFCs and have arela_path using pagination
|
|
777
847
|
console.log(
|
|
778
848
|
'🎯 Finding pedimento_simplificado records for specified RFCs...',
|
|
779
849
|
);
|
|
780
|
-
|
|
781
|
-
|
|
850
|
+
|
|
851
|
+
let allPedimentoRecords = [];
|
|
852
|
+
let offset = 0;
|
|
853
|
+
const pageSize = 500; // Process pedimento records in pages
|
|
854
|
+
let hasMorePedimentoData = true;
|
|
855
|
+
let pageNumber = 1;
|
|
856
|
+
|
|
857
|
+
// Process pedimento records page by page for memory efficiency
|
|
858
|
+
while (hasMorePedimentoData) {
|
|
859
|
+
logger.info(
|
|
860
|
+
`Fetching pedimento records page ${pageNumber} (records ${offset + 1} to ${offset + pageSize})...`,
|
|
861
|
+
);
|
|
862
|
+
|
|
863
|
+
let pedimentoQuery = supabase
|
|
782
864
|
.from('uploader')
|
|
783
865
|
.select('arela_path')
|
|
784
866
|
.eq('document_type', 'pedimento_simplificado')
|
|
785
867
|
.in('rfc', appConfig.upload.rfcs)
|
|
786
868
|
.not('arela_path', 'is', null);
|
|
787
869
|
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
870
|
+
// Add year filter if UPLOAD_YEARS is configured
|
|
871
|
+
if (appConfig.upload.years && appConfig.upload.years.length > 0) {
|
|
872
|
+
pedimentoQuery = pedimentoQuery.in('year', appConfig.upload.years);
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
const { data: pedimentoPage, error: pedimentoError } =
|
|
876
|
+
await pedimentoQuery
|
|
877
|
+
.range(offset, offset + pageSize - 1)
|
|
878
|
+
.order('created_at');
|
|
879
|
+
|
|
880
|
+
if (pedimentoError) {
|
|
881
|
+
const errorMsg = `Error fetching pedimento RFC records page ${pageNumber}: ${pedimentoError.message}`;
|
|
882
|
+
logger.error(errorMsg);
|
|
883
|
+
throw new Error(errorMsg);
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
if (!pedimentoPage || pedimentoPage.length === 0) {
|
|
887
|
+
hasMorePedimentoData = false;
|
|
888
|
+
logger.info('No more pedimento records found');
|
|
889
|
+
break;
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
logger.info(
|
|
893
|
+
`Processing pedimento page ${pageNumber}: ${pedimentoPage.length} pedimento records`,
|
|
894
|
+
);
|
|
895
|
+
|
|
896
|
+
// Add this page to our collection
|
|
897
|
+
allPedimentoRecords = allPedimentoRecords.concat(pedimentoPage);
|
|
898
|
+
|
|
899
|
+
// Check if we need to fetch the next page
|
|
900
|
+
if (pedimentoPage.length < pageSize) {
|
|
901
|
+
hasMorePedimentoData = false;
|
|
902
|
+
logger.info(
|
|
903
|
+
`Completed fetching pedimento records. Last page ${pageNumber} had ${pedimentoPage.length} records`,
|
|
904
|
+
);
|
|
905
|
+
} else {
|
|
906
|
+
offset += pageSize;
|
|
907
|
+
pageNumber++;
|
|
908
|
+
logger.info(
|
|
909
|
+
`Page ${pageNumber - 1} complete: ${pedimentoPage.length} records fetched, moving to next page...`,
|
|
910
|
+
);
|
|
911
|
+
}
|
|
792
912
|
}
|
|
793
913
|
|
|
794
|
-
if (!
|
|
914
|
+
if (!allPedimentoRecords || allPedimentoRecords.length === 0) {
|
|
795
915
|
console.log(
|
|
796
916
|
'ℹ️ No pedimento_simplificado records found for the specified RFCs with arela_path',
|
|
797
917
|
);
|
|
@@ -799,15 +919,15 @@ export class DatabaseService {
|
|
|
799
919
|
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
800
920
|
}
|
|
801
921
|
|
|
802
|
-
// Get unique arela_paths from pedimento records
|
|
922
|
+
// Get unique arela_paths from all pedimento records
|
|
803
923
|
const uniqueArelaPaths = [
|
|
804
|
-
...new Set(
|
|
924
|
+
...new Set(allPedimentoRecords.map((r) => r.arela_path)),
|
|
805
925
|
];
|
|
806
926
|
console.log(
|
|
807
|
-
`📋 Found ${
|
|
927
|
+
`📋 Found ${allPedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths for specified RFCs across ${pageNumber - 1} pages`,
|
|
808
928
|
);
|
|
809
929
|
logger.info(
|
|
810
|
-
`Found ${
|
|
930
|
+
`Found ${allPedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths across ${pageNumber - 1} pages`,
|
|
811
931
|
);
|
|
812
932
|
|
|
813
933
|
// Step 2: Get all files with these arela_paths that haven't been uploaded yet
|
|
@@ -872,16 +992,24 @@ export class DatabaseService {
|
|
|
872
992
|
`Found ${rfcRecords.length} files ready for upload, ${skipped} skipped`,
|
|
873
993
|
);
|
|
874
994
|
|
|
875
|
-
// Step 3:
|
|
876
|
-
let
|
|
995
|
+
// Step 3: Process files with streaming pagination to avoid memory overload
|
|
996
|
+
let totalProcessed = 0;
|
|
997
|
+
let totalUploaded = 0;
|
|
998
|
+
let totalErrors = 0;
|
|
999
|
+
let globalFileCount = 0;
|
|
877
1000
|
const arelaPathChunkSize = 50;
|
|
878
|
-
const queryBatchSize =
|
|
1001
|
+
const queryBatchSize = 500; // Reduced batch size for better memory management
|
|
1002
|
+
const batchSize = parseInt(options.batchSize) || 10;
|
|
1003
|
+
|
|
1004
|
+
// Import performance configuration
|
|
1005
|
+
const { performance: perfConfig } = appConfig;
|
|
1006
|
+
const maxConcurrency = perfConfig?.maxApiConnections || 3;
|
|
879
1007
|
|
|
880
1008
|
console.log(
|
|
881
|
-
'📥
|
|
1009
|
+
'📥 Processing files with streaming pagination (processing arela_paths in chunks to avoid URI limits and memory overload)...',
|
|
882
1010
|
);
|
|
883
1011
|
|
|
884
|
-
// Process arela_paths in chunks
|
|
1012
|
+
// Process arela_paths in chunks and upload files as we fetch them
|
|
885
1013
|
for (let i = 0; i < uploadableArelaPaths.length; i += arelaPathChunkSize) {
|
|
886
1014
|
const arelaPathChunk = uploadableArelaPaths.slice(
|
|
887
1015
|
i,
|
|
@@ -896,9 +1024,10 @@ export class DatabaseService {
|
|
|
896
1024
|
` Processing arela_path chunk ${chunkNumber}/${totalChunks} (${arelaPathChunk.length} paths)`,
|
|
897
1025
|
);
|
|
898
1026
|
|
|
899
|
-
// For each chunk of arela_paths, use pagination to get
|
|
1027
|
+
// For each chunk of arela_paths, use pagination to get related files and process them immediately
|
|
900
1028
|
let hasMore = true;
|
|
901
1029
|
let offset = 0;
|
|
1030
|
+
let chunkFileCount = 0;
|
|
902
1031
|
|
|
903
1032
|
while (hasMore) {
|
|
904
1033
|
const { data: batch, error: queryError } = await supabase
|
|
@@ -907,7 +1036,8 @@ export class DatabaseService {
|
|
|
907
1036
|
.in('arela_path', arelaPathChunk)
|
|
908
1037
|
.not('original_path', 'is', null)
|
|
909
1038
|
.neq('status', 'file-uploaded')
|
|
910
|
-
.range(offset, offset + queryBatchSize - 1)
|
|
1039
|
+
.range(offset, offset + queryBatchSize - 1)
|
|
1040
|
+
.order('created_at');
|
|
911
1041
|
|
|
912
1042
|
if (queryError) {
|
|
913
1043
|
const errorMsg = `Error fetching related files for chunk ${chunkNumber}: ${queryError.message}`;
|
|
@@ -915,74 +1045,94 @@ export class DatabaseService {
|
|
|
915
1045
|
throw new Error(errorMsg);
|
|
916
1046
|
}
|
|
917
1047
|
|
|
918
|
-
if (batch
|
|
919
|
-
|
|
1048
|
+
if (!batch || batch.length === 0) {
|
|
1049
|
+
hasMore = false;
|
|
1050
|
+
break;
|
|
920
1051
|
}
|
|
921
1052
|
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
}
|
|
925
|
-
}
|
|
1053
|
+
chunkFileCount += batch.length;
|
|
1054
|
+
globalFileCount += batch.length;
|
|
926
1055
|
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
931
|
-
}
|
|
1056
|
+
console.log(
|
|
1057
|
+
` 📦 Processing batch within chunk ${chunkNumber}: ${batch.length} files (total processed so far: ${globalFileCount})`,
|
|
1058
|
+
);
|
|
932
1059
|
|
|
933
|
-
|
|
934
|
-
|
|
1060
|
+
// Track if any uploads occurred in this batch
|
|
1061
|
+
let uploadsOccurred = false;
|
|
935
1062
|
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
1063
|
+
// Process this batch of files immediately using concurrent processing
|
|
1064
|
+
// Split batch into upload batches
|
|
1065
|
+
for (let j = 0; j < batch.length; j += batchSize) {
|
|
1066
|
+
const uploadBatch = batch.slice(j, j + batchSize);
|
|
1067
|
+
const batchNum = Math.floor(globalFileCount / batchSize) + 1;
|
|
941
1068
|
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
1069
|
+
console.log(
|
|
1070
|
+
`📦 Processing upload batch ${batchNum} (${uploadBatch.length} files)`,
|
|
1071
|
+
);
|
|
945
1072
|
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
1073
|
+
// Process batch using concurrent processing similar to UploadCommand
|
|
1074
|
+
const batchResults = await this.#processRfcBatch(
|
|
1075
|
+
uploadBatch,
|
|
1076
|
+
uploadService,
|
|
1077
|
+
supabase,
|
|
1078
|
+
options,
|
|
1079
|
+
maxConcurrency,
|
|
1080
|
+
);
|
|
952
1081
|
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
const batchNum = Math.floor(i / batchSize) + 1;
|
|
957
|
-
const totalBatches = Math.ceil(allRelatedFiles.length / batchSize);
|
|
1082
|
+
totalProcessed += batchResults.processed;
|
|
1083
|
+
totalUploaded += batchResults.uploaded;
|
|
1084
|
+
totalErrors += batchResults.errors;
|
|
958
1085
|
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
1086
|
+
// Track if uploads occurred (status changes from non-uploaded to uploaded)
|
|
1087
|
+
if (batchResults.uploaded > 0) {
|
|
1088
|
+
uploadsOccurred = true;
|
|
1089
|
+
}
|
|
962
1090
|
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
supabase,
|
|
968
|
-
options,
|
|
969
|
-
maxConcurrency,
|
|
970
|
-
);
|
|
1091
|
+
console.log(
|
|
1092
|
+
`📊 Upload batch complete - Total progress: ${totalUploaded} uploaded, ${totalErrors} errors`,
|
|
1093
|
+
);
|
|
1094
|
+
}
|
|
971
1095
|
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
1096
|
+
// Check if we need more data from this chunk
|
|
1097
|
+
if (batch.length < queryBatchSize) {
|
|
1098
|
+
hasMore = false;
|
|
1099
|
+
} else {
|
|
1100
|
+
// If uploads occurred, reset pagination to start from beginning
|
|
1101
|
+
// since records that matched the query may no longer match after upload
|
|
1102
|
+
if (uploadsOccurred) {
|
|
1103
|
+
offset = 0;
|
|
1104
|
+
console.log(
|
|
1105
|
+
` 📄 Batch complete with uploads: ${batch.length} files processed, restarting pagination from beginning due to query condition changes...`,
|
|
1106
|
+
);
|
|
1107
|
+
} else {
|
|
1108
|
+
offset += queryBatchSize;
|
|
1109
|
+
console.log(
|
|
1110
|
+
` 📄 Batch complete: ${batch.length} files processed, continuing to next page (offset: ${offset})...`,
|
|
1111
|
+
);
|
|
1112
|
+
}
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
if (hasMore) {
|
|
1116
|
+
console.log(
|
|
1117
|
+
` 📄 Fetching more files for chunk ${chunkNumber}... (offset: ${offset})`,
|
|
1118
|
+
);
|
|
1119
|
+
}
|
|
1120
|
+
}
|
|
975
1121
|
|
|
976
|
-
// Progress update
|
|
977
|
-
const progress = (
|
|
978
|
-
((i + batch.length) / allRelatedFiles.length) *
|
|
979
|
-
100
|
|
980
|
-
).toFixed(1);
|
|
981
1122
|
console.log(
|
|
982
|
-
|
|
1123
|
+
` ✅ Chunk ${chunkNumber}/${totalChunks} complete: ${chunkFileCount} files processed`,
|
|
983
1124
|
);
|
|
984
1125
|
}
|
|
985
1126
|
|
|
1127
|
+
if (globalFileCount === 0) {
|
|
1128
|
+
console.log('ℹ️ No related files found to upload');
|
|
1129
|
+
logger.info('No related files found to upload');
|
|
1130
|
+
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
console.log(`📋 Total files processed: ${globalFileCount}`);
|
|
1134
|
+
logger.info(`Total files processed: ${globalFileCount}`);
|
|
1135
|
+
|
|
986
1136
|
const result = {
|
|
987
1137
|
processedCount: totalProcessed,
|
|
988
1138
|
uploadedCount: totalUploaded,
|
|
@@ -1037,13 +1187,25 @@ export class DatabaseService {
|
|
|
1037
1187
|
console.log(
|
|
1038
1188
|
'🎯 Finding pedimento_simplificado documents for specified RFCs with arela_path...',
|
|
1039
1189
|
);
|
|
1040
|
-
|
|
1190
|
+
|
|
1191
|
+
let pedimentoReadyQuery = supabase
|
|
1041
1192
|
.from('uploader')
|
|
1042
1193
|
.select('arela_path')
|
|
1043
1194
|
.eq('document_type', 'pedimento_simplificado')
|
|
1044
1195
|
.in('rfc', uploadRfcs)
|
|
1045
1196
|
.not('arela_path', 'is', null);
|
|
1046
1197
|
|
|
1198
|
+
// Add year filter if UPLOAD_YEARS is configured
|
|
1199
|
+
if (appConfig.upload.years && appConfig.upload.years.length > 0) {
|
|
1200
|
+
pedimentoReadyQuery = pedimentoReadyQuery.in(
|
|
1201
|
+
'year',
|
|
1202
|
+
appConfig.upload.years,
|
|
1203
|
+
);
|
|
1204
|
+
}
|
|
1205
|
+
|
|
1206
|
+
const { data: pedimentoRecords, error: pedimentoError } =
|
|
1207
|
+
await pedimentoReadyQuery;
|
|
1208
|
+
|
|
1047
1209
|
if (pedimentoError) {
|
|
1048
1210
|
throw new Error(
|
|
1049
1211
|
`Error querying pedimento_simplificado records: ${pedimentoError.message}`,
|
package/arela-upload.log
DELETED
|
File without changes
|