@danielarndt0/cnpj-db-loader 2.3.1 → 2.4.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -2
- package/dist/cli.js +1544 -157
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +134 -1
- package/dist/index.js +1174 -58
- package/dist/index.js.map +1 -1
- package/docs/architecture.md +9 -1
- package/docs/cli.md +1 -1
- package/docs/commands.md +23 -0
- package/docs/postgres-direct.md +138 -0
- package/docs/sanitize.md +52 -16
- package/docs/usage.md +14 -0
- package/package.json +3 -3
package/dist/index.d.ts
CHANGED
|
@@ -582,6 +582,8 @@ declare function showQuarantineRow(id: number, options?: {
|
|
|
582
582
|
dbUrl?: string;
|
|
583
583
|
}): Promise<QuarantineRecord>;
|
|
584
584
|
|
|
585
|
+
type SanitizeSourceEncoding = "WIN1252" | "LATIN1" | "UTF8";
|
|
586
|
+
|
|
585
587
|
type SanitizeDatasetType = Exclude<DatasetType, "zip-archive" | "unknown">;
|
|
586
588
|
type SanitizeFilePlan = {
|
|
587
589
|
dataset: SanitizeDatasetType;
|
|
@@ -607,7 +609,10 @@ type SanitizeSummary = {
|
|
|
607
609
|
totalBytes: number;
|
|
608
610
|
processedFiles: number;
|
|
609
611
|
processedRows: number;
|
|
612
|
+
sourceEncoding: SanitizeSourceEncoding;
|
|
610
613
|
nulBytesRemoved: number;
|
|
614
|
+
invalidBytesRemoved: number;
|
|
615
|
+
controlCharsRemoved: number;
|
|
611
616
|
changedFiles: number;
|
|
612
617
|
unchangedFiles: number;
|
|
613
618
|
datasets: SanitizeDatasetType[];
|
|
@@ -618,6 +623,8 @@ type SanitizeSummary = {
|
|
|
618
623
|
lineCount: number;
|
|
619
624
|
changed: boolean;
|
|
620
625
|
nulBytesRemoved: number;
|
|
626
|
+
invalidBytesRemoved: number;
|
|
627
|
+
controlCharsRemoved: number;
|
|
621
628
|
}>;
|
|
622
629
|
warnings: string[];
|
|
623
630
|
nextStep?: string | undefined;
|
|
@@ -629,6 +636,7 @@ type SanitizeProgressEvent = {
|
|
|
629
636
|
totalFiles: number;
|
|
630
637
|
totalBytes: number;
|
|
631
638
|
datasets: SanitizeDatasetType[];
|
|
639
|
+
sourceEncoding: SanitizeSourceEncoding;
|
|
632
640
|
} | {
|
|
633
641
|
kind: "progress";
|
|
634
642
|
currentFileDisplayPath: string;
|
|
@@ -640,12 +648,16 @@ type SanitizeProgressEvent = {
|
|
|
640
648
|
currentFileSize: number;
|
|
641
649
|
processedRows: number;
|
|
642
650
|
nulBytesRemoved: number;
|
|
651
|
+
invalidBytesRemoved: number;
|
|
652
|
+
controlCharsRemoved: number;
|
|
643
653
|
changedFiles: number;
|
|
644
654
|
} | {
|
|
645
655
|
kind: "finish";
|
|
646
656
|
totalFiles: number;
|
|
647
657
|
processedRows: number;
|
|
648
658
|
nulBytesRemoved: number;
|
|
659
|
+
invalidBytesRemoved: number;
|
|
660
|
+
controlCharsRemoved: number;
|
|
649
661
|
changedFiles: number;
|
|
650
662
|
totalBytes: number;
|
|
651
663
|
};
|
|
@@ -653,6 +665,7 @@ type SanitizeProgressListener = (event: SanitizeProgressEvent) => void;
|
|
|
653
665
|
type SanitizeOptions = {
|
|
654
666
|
outputPath?: string | undefined;
|
|
655
667
|
dataset?: SanitizeDatasetType | undefined;
|
|
668
|
+
sourceEncoding?: string | undefined;
|
|
656
669
|
onProgress?: SanitizeProgressListener | undefined;
|
|
657
670
|
};
|
|
658
671
|
|
|
@@ -967,4 +980,124 @@ declare function finalizeFederalRevenueManifest(outputPath: string, lastStatus:
|
|
|
967
980
|
|
|
968
981
|
declare function syncFederalRevenueDataset(options?: FederalRevenueSyncOptions): Promise<FederalRevenueSyncSummary>;
|
|
969
982
|
|
|
970
|
-
|
|
983
|
+
type PostgresCsvDatasetSummary = {
|
|
984
|
+
dataset: ImportDatasetType;
|
|
985
|
+
files: number;
|
|
986
|
+
rows: number;
|
|
987
|
+
outputFiles: string[];
|
|
988
|
+
};
|
|
989
|
+
type PostgresCsvExportProgressEvent = {
|
|
990
|
+
kind: "start";
|
|
991
|
+
inputPath: string;
|
|
992
|
+
validatedPath: string;
|
|
993
|
+
outputPath: string;
|
|
994
|
+
totalFiles: number;
|
|
995
|
+
datasets: ImportDatasetType[];
|
|
996
|
+
} | {
|
|
997
|
+
kind: "file_start";
|
|
998
|
+
dataset: ImportDatasetType;
|
|
999
|
+
fileIndex: number;
|
|
1000
|
+
totalFiles: number;
|
|
1001
|
+
inputFile: string;
|
|
1002
|
+
outputFile: string;
|
|
1003
|
+
} | {
|
|
1004
|
+
kind: "file_finish";
|
|
1005
|
+
dataset: ImportDatasetType;
|
|
1006
|
+
fileIndex: number;
|
|
1007
|
+
totalFiles: number;
|
|
1008
|
+
inputFile: string;
|
|
1009
|
+
outputFile: string;
|
|
1010
|
+
rows: number;
|
|
1011
|
+
} | {
|
|
1012
|
+
kind: "finish";
|
|
1013
|
+
outputPath: string;
|
|
1014
|
+
scriptPath: string;
|
|
1015
|
+
totalFiles: number;
|
|
1016
|
+
totalRows: number;
|
|
1017
|
+
};
|
|
1018
|
+
type PostgresCsvExportProgressListener = (event: PostgresCsvExportProgressEvent) => void;
|
|
1019
|
+
type PostgresCsvExportOptions = {
|
|
1020
|
+
outputPath?: string | undefined;
|
|
1021
|
+
dataset?: ImportDatasetType | undefined;
|
|
1022
|
+
scriptName?: string | undefined;
|
|
1023
|
+
onProgress?: PostgresCsvExportProgressListener | undefined;
|
|
1024
|
+
};
|
|
1025
|
+
type PostgresCsvExportSummary = {
|
|
1026
|
+
inputPath: string;
|
|
1027
|
+
validatedPath: string;
|
|
1028
|
+
outputPath: string;
|
|
1029
|
+
scriptPath: string;
|
|
1030
|
+
manifestPath: string;
|
|
1031
|
+
totalFiles: number;
|
|
1032
|
+
totalRows: number;
|
|
1033
|
+
datasets: PostgresCsvDatasetSummary[];
|
|
1034
|
+
warnings: string[];
|
|
1035
|
+
nextStep?: string | undefined;
|
|
1036
|
+
};
|
|
1037
|
+
type PostgresCsvFile = {
|
|
1038
|
+
dataset: ImportDatasetType;
|
|
1039
|
+
absolutePath: string;
|
|
1040
|
+
relativePath: string;
|
|
1041
|
+
rowCount: number;
|
|
1042
|
+
};
|
|
1043
|
+
type PostgresDirectSourceFile = {
|
|
1044
|
+
dataset: ImportDatasetType;
|
|
1045
|
+
absolutePath: string;
|
|
1046
|
+
relativePath: string;
|
|
1047
|
+
fileSize: number;
|
|
1048
|
+
};
|
|
1049
|
+
type PostgresDirectScriptDatasetSummary = {
|
|
1050
|
+
dataset: ImportDatasetType;
|
|
1051
|
+
files: number;
|
|
1052
|
+
totalBytes: number;
|
|
1053
|
+
sourceFiles: string[];
|
|
1054
|
+
};
|
|
1055
|
+
type PostgresDirectScriptProgressEvent = {
|
|
1056
|
+
kind: "start";
|
|
1057
|
+
inputPath: string;
|
|
1058
|
+
validatedPath: string;
|
|
1059
|
+
outputPath: string;
|
|
1060
|
+
totalFiles: number;
|
|
1061
|
+
datasets: ImportDatasetType[];
|
|
1062
|
+
sourceEncoding: string;
|
|
1063
|
+
} | {
|
|
1064
|
+
kind: "file_registered";
|
|
1065
|
+
dataset: ImportDatasetType;
|
|
1066
|
+
fileIndex: number;
|
|
1067
|
+
totalFiles: number;
|
|
1068
|
+
inputFile: string;
|
|
1069
|
+
fileSize: number;
|
|
1070
|
+
} | {
|
|
1071
|
+
kind: "finish";
|
|
1072
|
+
outputPath: string;
|
|
1073
|
+
scriptPath: string;
|
|
1074
|
+
totalFiles: number;
|
|
1075
|
+
totalBytes: number;
|
|
1076
|
+
};
|
|
1077
|
+
type PostgresDirectScriptProgressListener = (event: PostgresDirectScriptProgressEvent) => void;
|
|
1078
|
+
type PostgresDirectScriptOptions = {
|
|
1079
|
+
outputPath?: string | undefined;
|
|
1080
|
+
dataset?: ImportDatasetType | undefined;
|
|
1081
|
+
scriptName?: string | undefined;
|
|
1082
|
+
sourceEncoding?: string | undefined;
|
|
1083
|
+
onProgress?: PostgresDirectScriptProgressListener | undefined;
|
|
1084
|
+
};
|
|
1085
|
+
type PostgresDirectScriptSummary = {
|
|
1086
|
+
inputPath: string;
|
|
1087
|
+
validatedPath: string;
|
|
1088
|
+
outputPath: string;
|
|
1089
|
+
scriptPath: string;
|
|
1090
|
+
manifestPath: string;
|
|
1091
|
+
sourceEncoding: string;
|
|
1092
|
+
totalFiles: number;
|
|
1093
|
+
totalBytes: number;
|
|
1094
|
+
datasets: PostgresDirectScriptDatasetSummary[];
|
|
1095
|
+
warnings: string[];
|
|
1096
|
+
nextStep?: string | undefined;
|
|
1097
|
+
};
|
|
1098
|
+
|
|
1099
|
+
declare function exportPostgresCsvDataset(inputPath: string, options?: PostgresCsvExportOptions): Promise<PostgresCsvExportSummary>;
|
|
1100
|
+
|
|
1101
|
+
declare function generatePostgresDirectScript(inputPath: string, options?: PostgresDirectScriptOptions): Promise<PostgresDirectScriptSummary>;
|
|
1102
|
+
|
|
1103
|
+
export { type AppConfig, type AppEnvironment, AppError, type CheckpointCleanupPhase, DEFAULT_FEDERAL_REVENUE_DOWNLOAD_ROOT, DEFAULT_FEDERAL_REVENUE_SHARE_TOKEN, DEFAULT_FEDERAL_REVENUE_USER_AGENT, DEFAULT_FEDERAL_REVENUE_WEBDAV_URL, type DatabaseCleanupSummary, type DatabaseConfig, type DatasetBlock, type DatasetType, type ExtractionEntry, type ExtractionProgressEvent, type ExtractionProgressListener, type ExtractionSummary, FEDERAL_REVENUE_CONTROL_DIR, FEDERAL_REVENUE_CONTROL_SCOPE, FEDERAL_REVENUE_MANIFEST_VERSION, type Failure, type FederalRevenueCheckOptions, type FederalRevenueCheckSummary, type FederalRevenueCleanMode, type FederalRevenueCleanOptions, type FederalRevenueCleanSummary, type FederalRevenueClientOptions, type FederalRevenueDownloadEntry, type FederalRevenueDownloadOptions, type FederalRevenueDownloadProgressEvent, type FederalRevenueDownloadProgressListener, type FederalRevenueDownloadStatus, type FederalRevenueDownloadSummary, type FederalRevenueFile, type FederalRevenueLocalFileStatus, type FederalRevenueLocalStatusEntry, type FederalRevenueLockFile, type FederalRevenueManifest, type FederalRevenueManifestFile, type FederalRevenueManifestLastCommand, type FederalRevenueManifestLastStatus, type FederalRevenueReference, type FederalRevenueReferenceMode, type FederalRevenueReferenceSelection, type FederalRevenueRetryOptions, type FederalRevenueStatusOptions, type FederalRevenueStatusSummary, type FederalRevenueSyncLockOptions, type FederalRevenueSyncOptions, type FederalRevenueSyncSummary, type FileInspection, type ImportCheckpointRecord, type ImportCheckpointStatus, type ImportDatasetPlan, type ImportDatasetType, type ImportFilePlan, type ImportOptions, type ImportPerformanceSummary, type ImportPhaseStatus, type ImportPlanRecord, type ImportProgressEvent, type ImportProgressListener, type ImportSchemaCapabilities, type ImportSummary, type InputDetectionMode, type InputMode, type InspectSummary, type LogLevel, type LogStatus, type PostgresCsvDatasetSummary, type PostgresCsvExportOptions, type PostgresCsvExportProgressEvent, type PostgresCsvExportProgressListener, type PostgresCsvExportSummary, type PostgresCsvFile, type PostgresDirectScriptDatasetSummary, type PostgresDirectScriptOptions, type PostgresDirectScriptProgressEvent, type PostgresDirectScriptProgressListener, type PostgresDirectScriptSummary, type PostgresDirectSourceFile, type QuarantineListFilters, type QuarantineListSummary, type QuarantineRecord, type QuarantineStatsFilters, type QuarantineStatsSummary, type Result, type SanitizeDatasetType, type SanitizeOptions, type SanitizePlan, type SanitizeProgressEvent, type SanitizeProgressListener, type SanitizeSourceEncoding, type SanitizeSummary, type SchemaGenerationOptions, type SchemaProfile, ServiceError, type StructuredLogEntry, type SupportedOs, ValidationError, type ValidationSummary, appendJsonLinesLog, assertPostgresUrl, buildFederalRevenueDownloadHeaders, buildFederalRevenueReferenceOutputPath, checkFederalRevenueDataset, cleanFederalRevenueDataset, cleanupDatabaseCheckpointsData, cleanupDatabaseMaterializedData, cleanupDatabasePlansData, cleanupDatabaseStagingData, createFederalRevenueManifest, createJsonLinesLog, defaultExtractedOutputPath, detectOs, downloadFederalRevenueDataset, ensureDirectory, evaluateFederalRevenueManifestFile, evaluateFederalRevenueManifestFiles, exportPostgresCsvDataset, extractArchives, finalizeFederalRevenueManifest, generatePostgresDirectScript, generateSchemaSql, getAllLayouts, getCurrentFederalRevenueReference, getFederalRevenueControlDirectory, getFederalRevenueManifestPath, getFederalRevenueStatus, getFederalRevenueSyncLockPath, getLayoutSummary, getLogsDirectoryPath, getQuarantineStats, getUserAppDirectoryPath, importDataToDatabase, inspectFiles, listFederalRevenueFiles, listFederalRevenueReferences, listQuarantineRows, loadImportDataToStaging, materializeImportedData, prettyJson, readDatabaseConfig, readFederalRevenueManifest, resetDefaultDbUrl, resolveDatabaseUrl, resolveDbUrl, resolveFederalRevenueReference, resolveInputMode, resolveSchemaProfile, retryFederalRevenueDataset, runDoctor, safeReadText, safeWriteText, sanitizeInputDirectory, setDefaultDbUrl, showQuarantineRow, syncFederalRevenueDataset, testDatabaseConnection, toTitleCase, updateFederalRevenueManifestFile, validateFederalRevenueReference, validateInputDirectory, withFederalRevenueSyncLock, writeCommandFailureLog, writeCommandLog, writeDatabaseConfig, writeFederalRevenueManifest, writeSchemaFile };
|