@danielarndt0/cnpj-db-loader 2.4.0-beta.1 → 2.4.0-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -8
- package/dist/cli.js +856 -137
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +33 -1
- package/dist/index.js +782 -133
- package/dist/index.js.map +1 -1
- package/docs/architecture.md +1 -1
- package/docs/cli.md +1 -1
- package/docs/commands.md +6 -2
- package/docs/postgres-direct.md +239 -45
- package/docs/releases/v2.4.0-beta.3.md +42 -0
- package/docs/sanitize.md +52 -16
- package/package.json +3 -3
- package/docs/releases/v2.4.0.md +0 -40
package/dist/index.d.ts
CHANGED
|
@@ -582,6 +582,8 @@ declare function showQuarantineRow(id: number, options?: {
|
|
|
582
582
|
dbUrl?: string;
|
|
583
583
|
}): Promise<QuarantineRecord>;
|
|
584
584
|
|
|
585
|
+
type SanitizeSourceEncoding = "WIN1252" | "LATIN1" | "UTF8";
|
|
586
|
+
|
|
585
587
|
type SanitizeDatasetType = Exclude<DatasetType, "zip-archive" | "unknown">;
|
|
586
588
|
type SanitizeFilePlan = {
|
|
587
589
|
dataset: SanitizeDatasetType;
|
|
@@ -607,7 +609,10 @@ type SanitizeSummary = {
|
|
|
607
609
|
totalBytes: number;
|
|
608
610
|
processedFiles: number;
|
|
609
611
|
processedRows: number;
|
|
612
|
+
sourceEncoding: SanitizeSourceEncoding;
|
|
610
613
|
nulBytesRemoved: number;
|
|
614
|
+
invalidBytesRemoved: number;
|
|
615
|
+
controlCharsRemoved: number;
|
|
611
616
|
changedFiles: number;
|
|
612
617
|
unchangedFiles: number;
|
|
613
618
|
datasets: SanitizeDatasetType[];
|
|
@@ -618,6 +623,8 @@ type SanitizeSummary = {
|
|
|
618
623
|
lineCount: number;
|
|
619
624
|
changed: boolean;
|
|
620
625
|
nulBytesRemoved: number;
|
|
626
|
+
invalidBytesRemoved: number;
|
|
627
|
+
controlCharsRemoved: number;
|
|
621
628
|
}>;
|
|
622
629
|
warnings: string[];
|
|
623
630
|
nextStep?: string | undefined;
|
|
@@ -629,6 +636,7 @@ type SanitizeProgressEvent = {
|
|
|
629
636
|
totalFiles: number;
|
|
630
637
|
totalBytes: number;
|
|
631
638
|
datasets: SanitizeDatasetType[];
|
|
639
|
+
sourceEncoding: SanitizeSourceEncoding;
|
|
632
640
|
} | {
|
|
633
641
|
kind: "progress";
|
|
634
642
|
currentFileDisplayPath: string;
|
|
@@ -640,12 +648,16 @@ type SanitizeProgressEvent = {
|
|
|
640
648
|
currentFileSize: number;
|
|
641
649
|
processedRows: number;
|
|
642
650
|
nulBytesRemoved: number;
|
|
651
|
+
invalidBytesRemoved: number;
|
|
652
|
+
controlCharsRemoved: number;
|
|
643
653
|
changedFiles: number;
|
|
644
654
|
} | {
|
|
645
655
|
kind: "finish";
|
|
646
656
|
totalFiles: number;
|
|
647
657
|
processedRows: number;
|
|
648
658
|
nulBytesRemoved: number;
|
|
659
|
+
invalidBytesRemoved: number;
|
|
660
|
+
controlCharsRemoved: number;
|
|
649
661
|
changedFiles: number;
|
|
650
662
|
totalBytes: number;
|
|
651
663
|
};
|
|
@@ -653,6 +665,7 @@ type SanitizeProgressListener = (event: SanitizeProgressEvent) => void;
|
|
|
653
665
|
type SanitizeOptions = {
|
|
654
666
|
outputPath?: string | undefined;
|
|
655
667
|
dataset?: SanitizeDatasetType | undefined;
|
|
668
|
+
sourceEncoding?: string | undefined;
|
|
656
669
|
onProgress?: SanitizeProgressListener | undefined;
|
|
657
670
|
};
|
|
658
671
|
|
|
@@ -1039,6 +1052,14 @@ type PostgresDirectScriptDatasetSummary = {
|
|
|
1039
1052
|
totalBytes: number;
|
|
1040
1053
|
sourceFiles: string[];
|
|
1041
1054
|
};
|
|
1055
|
+
type PostgresDirectTransactionMode = "single" | "phase" | "none";
|
|
1056
|
+
type PostgresDirectIncludeTarget = "domains" | "companies" | "establishments" | "partners" | "simples" | "secondary-cnaes" | "indexes" | "analyze";
|
|
1057
|
+
type PostgresDirectScriptStep = {
|
|
1058
|
+
name: string;
|
|
1059
|
+
file: string;
|
|
1060
|
+
dependsOn: string[];
|
|
1061
|
+
included: boolean;
|
|
1062
|
+
};
|
|
1042
1063
|
type PostgresDirectScriptProgressEvent = {
|
|
1043
1064
|
kind: "start";
|
|
1044
1065
|
inputPath: string;
|
|
@@ -1047,6 +1068,10 @@ type PostgresDirectScriptProgressEvent = {
|
|
|
1047
1068
|
totalFiles: number;
|
|
1048
1069
|
datasets: ImportDatasetType[];
|
|
1049
1070
|
sourceEncoding: string;
|
|
1071
|
+
transactionMode: PostgresDirectTransactionMode;
|
|
1072
|
+
include: PostgresDirectIncludeTarget[];
|
|
1073
|
+
skipIndexes: boolean;
|
|
1074
|
+
skipAnalyze: boolean;
|
|
1050
1075
|
} | {
|
|
1051
1076
|
kind: "file_registered";
|
|
1052
1077
|
dataset: ImportDatasetType;
|
|
@@ -1067,6 +1092,10 @@ type PostgresDirectScriptOptions = {
|
|
|
1067
1092
|
dataset?: ImportDatasetType | undefined;
|
|
1068
1093
|
scriptName?: string | undefined;
|
|
1069
1094
|
sourceEncoding?: string | undefined;
|
|
1095
|
+
transactionMode?: PostgresDirectTransactionMode | undefined;
|
|
1096
|
+
include?: PostgresDirectIncludeTarget[] | undefined;
|
|
1097
|
+
skipIndexes?: boolean | undefined;
|
|
1098
|
+
skipAnalyze?: boolean | undefined;
|
|
1070
1099
|
onProgress?: PostgresDirectScriptProgressListener | undefined;
|
|
1071
1100
|
};
|
|
1072
1101
|
type PostgresDirectScriptSummary = {
|
|
@@ -1076,9 +1105,12 @@ type PostgresDirectScriptSummary = {
|
|
|
1076
1105
|
scriptPath: string;
|
|
1077
1106
|
manifestPath: string;
|
|
1078
1107
|
sourceEncoding: string;
|
|
1108
|
+
transactionMode: PostgresDirectTransactionMode;
|
|
1079
1109
|
totalFiles: number;
|
|
1080
1110
|
totalBytes: number;
|
|
1081
1111
|
datasets: PostgresDirectScriptDatasetSummary[];
|
|
1112
|
+
scriptFiles: string[];
|
|
1113
|
+
steps: PostgresDirectScriptStep[];
|
|
1082
1114
|
warnings: string[];
|
|
1083
1115
|
nextStep?: string | undefined;
|
|
1084
1116
|
};
|
|
@@ -1087,4 +1119,4 @@ declare function exportPostgresCsvDataset(inputPath: string, options?: PostgresC
|
|
|
1087
1119
|
|
|
1088
1120
|
declare function generatePostgresDirectScript(inputPath: string, options?: PostgresDirectScriptOptions): Promise<PostgresDirectScriptSummary>;
|
|
1089
1121
|
|
|
1090
|
-
export { type AppConfig, type AppEnvironment, AppError, type CheckpointCleanupPhase, DEFAULT_FEDERAL_REVENUE_DOWNLOAD_ROOT, DEFAULT_FEDERAL_REVENUE_SHARE_TOKEN, DEFAULT_FEDERAL_REVENUE_USER_AGENT, DEFAULT_FEDERAL_REVENUE_WEBDAV_URL, type DatabaseCleanupSummary, type DatabaseConfig, type DatasetBlock, type DatasetType, type ExtractionEntry, type ExtractionProgressEvent, type ExtractionProgressListener, type ExtractionSummary, FEDERAL_REVENUE_CONTROL_DIR, FEDERAL_REVENUE_CONTROL_SCOPE, FEDERAL_REVENUE_MANIFEST_VERSION, type Failure, type FederalRevenueCheckOptions, type FederalRevenueCheckSummary, type FederalRevenueCleanMode, type FederalRevenueCleanOptions, type FederalRevenueCleanSummary, type FederalRevenueClientOptions, type FederalRevenueDownloadEntry, type FederalRevenueDownloadOptions, type FederalRevenueDownloadProgressEvent, type FederalRevenueDownloadProgressListener, type FederalRevenueDownloadStatus, type FederalRevenueDownloadSummary, type FederalRevenueFile, type FederalRevenueLocalFileStatus, type FederalRevenueLocalStatusEntry, type FederalRevenueLockFile, type FederalRevenueManifest, type FederalRevenueManifestFile, type FederalRevenueManifestLastCommand, type FederalRevenueManifestLastStatus, type FederalRevenueReference, type FederalRevenueReferenceMode, type FederalRevenueReferenceSelection, type FederalRevenueRetryOptions, type FederalRevenueStatusOptions, type FederalRevenueStatusSummary, type FederalRevenueSyncLockOptions, type FederalRevenueSyncOptions, type FederalRevenueSyncSummary, type FileInspection, type ImportCheckpointRecord, type ImportCheckpointStatus, type ImportDatasetPlan, type ImportDatasetType, type ImportFilePlan, type ImportOptions, type ImportPerformanceSummary, type ImportPhaseStatus, type ImportPlanRecord, type ImportProgressEvent, type ImportProgressListener, type ImportSchemaCapabilities, type ImportSummary, type InputDetectionMode, type InputMode, type InspectSummary, type LogLevel, type LogStatus, type PostgresCsvDatasetSummary, type PostgresCsvExportOptions, type PostgresCsvExportProgressEvent, type PostgresCsvExportProgressListener, type PostgresCsvExportSummary, type PostgresCsvFile, type PostgresDirectScriptDatasetSummary, type PostgresDirectScriptOptions, type PostgresDirectScriptProgressEvent, type PostgresDirectScriptProgressListener, type PostgresDirectScriptSummary, type PostgresDirectSourceFile, type QuarantineListFilters, type QuarantineListSummary, type QuarantineRecord, type QuarantineStatsFilters, type QuarantineStatsSummary, type Result, type SanitizeDatasetType, type SanitizeOptions, type SanitizePlan, type SanitizeProgressEvent, type SanitizeProgressListener, type SanitizeSummary, type SchemaGenerationOptions, type SchemaProfile, ServiceError, type StructuredLogEntry, type SupportedOs, ValidationError, type ValidationSummary, appendJsonLinesLog, assertPostgresUrl, buildFederalRevenueDownloadHeaders, buildFederalRevenueReferenceOutputPath, checkFederalRevenueDataset, cleanFederalRevenueDataset, cleanupDatabaseCheckpointsData, cleanupDatabaseMaterializedData, cleanupDatabasePlansData, cleanupDatabaseStagingData, createFederalRevenueManifest, createJsonLinesLog, defaultExtractedOutputPath, detectOs, downloadFederalRevenueDataset, ensureDirectory, evaluateFederalRevenueManifestFile, evaluateFederalRevenueManifestFiles, exportPostgresCsvDataset, extractArchives, finalizeFederalRevenueManifest, generatePostgresDirectScript, generateSchemaSql, getAllLayouts, getCurrentFederalRevenueReference, getFederalRevenueControlDirectory, getFederalRevenueManifestPath, getFederalRevenueStatus, getFederalRevenueSyncLockPath, getLayoutSummary, getLogsDirectoryPath, getQuarantineStats, getUserAppDirectoryPath, importDataToDatabase, inspectFiles, listFederalRevenueFiles, listFederalRevenueReferences, listQuarantineRows, loadImportDataToStaging, materializeImportedData, prettyJson, readDatabaseConfig, readFederalRevenueManifest, resetDefaultDbUrl, resolveDatabaseUrl, resolveDbUrl, resolveFederalRevenueReference, resolveInputMode, resolveSchemaProfile, retryFederalRevenueDataset, runDoctor, safeReadText, safeWriteText, sanitizeInputDirectory, setDefaultDbUrl, showQuarantineRow, syncFederalRevenueDataset, testDatabaseConnection, toTitleCase, updateFederalRevenueManifestFile, validateFederalRevenueReference, validateInputDirectory, withFederalRevenueSyncLock, writeCommandFailureLog, writeCommandLog, writeDatabaseConfig, writeFederalRevenueManifest, writeSchemaFile };
|
|
1122
|
+
export { type AppConfig, type AppEnvironment, AppError, type CheckpointCleanupPhase, DEFAULT_FEDERAL_REVENUE_DOWNLOAD_ROOT, DEFAULT_FEDERAL_REVENUE_SHARE_TOKEN, DEFAULT_FEDERAL_REVENUE_USER_AGENT, DEFAULT_FEDERAL_REVENUE_WEBDAV_URL, type DatabaseCleanupSummary, type DatabaseConfig, type DatasetBlock, type DatasetType, type ExtractionEntry, type ExtractionProgressEvent, type ExtractionProgressListener, type ExtractionSummary, FEDERAL_REVENUE_CONTROL_DIR, FEDERAL_REVENUE_CONTROL_SCOPE, FEDERAL_REVENUE_MANIFEST_VERSION, type Failure, type FederalRevenueCheckOptions, type FederalRevenueCheckSummary, type FederalRevenueCleanMode, type FederalRevenueCleanOptions, type FederalRevenueCleanSummary, type FederalRevenueClientOptions, type FederalRevenueDownloadEntry, type FederalRevenueDownloadOptions, type FederalRevenueDownloadProgressEvent, type FederalRevenueDownloadProgressListener, type FederalRevenueDownloadStatus, type FederalRevenueDownloadSummary, type FederalRevenueFile, type FederalRevenueLocalFileStatus, type FederalRevenueLocalStatusEntry, type FederalRevenueLockFile, type FederalRevenueManifest, type FederalRevenueManifestFile, type FederalRevenueManifestLastCommand, type FederalRevenueManifestLastStatus, type FederalRevenueReference, type FederalRevenueReferenceMode, type FederalRevenueReferenceSelection, type FederalRevenueRetryOptions, type FederalRevenueStatusOptions, type FederalRevenueStatusSummary, type FederalRevenueSyncLockOptions, type FederalRevenueSyncOptions, type FederalRevenueSyncSummary, type FileInspection, type ImportCheckpointRecord, type ImportCheckpointStatus, type ImportDatasetPlan, type ImportDatasetType, type ImportFilePlan, type ImportOptions, type ImportPerformanceSummary, type ImportPhaseStatus, type ImportPlanRecord, type ImportProgressEvent, type ImportProgressListener, type ImportSchemaCapabilities, type ImportSummary, type InputDetectionMode, type InputMode, type InspectSummary, type LogLevel, type LogStatus, type PostgresCsvDatasetSummary, type PostgresCsvExportOptions, type PostgresCsvExportProgressEvent, type PostgresCsvExportProgressListener, type PostgresCsvExportSummary, type PostgresCsvFile, type PostgresDirectIncludeTarget, type PostgresDirectScriptDatasetSummary, type PostgresDirectScriptOptions, type PostgresDirectScriptProgressEvent, type PostgresDirectScriptProgressListener, type PostgresDirectScriptStep, type PostgresDirectScriptSummary, type PostgresDirectSourceFile, type PostgresDirectTransactionMode, type QuarantineListFilters, type QuarantineListSummary, type QuarantineRecord, type QuarantineStatsFilters, type QuarantineStatsSummary, type Result, type SanitizeDatasetType, type SanitizeOptions, type SanitizePlan, type SanitizeProgressEvent, type SanitizeProgressListener, type SanitizeSourceEncoding, type SanitizeSummary, type SchemaGenerationOptions, type SchemaProfile, ServiceError, type StructuredLogEntry, type SupportedOs, ValidationError, type ValidationSummary, appendJsonLinesLog, assertPostgresUrl, buildFederalRevenueDownloadHeaders, buildFederalRevenueReferenceOutputPath, checkFederalRevenueDataset, cleanFederalRevenueDataset, cleanupDatabaseCheckpointsData, cleanupDatabaseMaterializedData, cleanupDatabasePlansData, cleanupDatabaseStagingData, createFederalRevenueManifest, createJsonLinesLog, defaultExtractedOutputPath, detectOs, downloadFederalRevenueDataset, ensureDirectory, evaluateFederalRevenueManifestFile, evaluateFederalRevenueManifestFiles, exportPostgresCsvDataset, extractArchives, finalizeFederalRevenueManifest, generatePostgresDirectScript, generateSchemaSql, getAllLayouts, getCurrentFederalRevenueReference, getFederalRevenueControlDirectory, getFederalRevenueManifestPath, getFederalRevenueStatus, getFederalRevenueSyncLockPath, getLayoutSummary, getLogsDirectoryPath, getQuarantineStats, getUserAppDirectoryPath, importDataToDatabase, inspectFiles, listFederalRevenueFiles, listFederalRevenueReferences, listQuarantineRows, loadImportDataToStaging, materializeImportedData, prettyJson, readDatabaseConfig, readFederalRevenueManifest, resetDefaultDbUrl, resolveDatabaseUrl, resolveDbUrl, resolveFederalRevenueReference, resolveInputMode, resolveSchemaProfile, retryFederalRevenueDataset, runDoctor, safeReadText, safeWriteText, sanitizeInputDirectory, setDefaultDbUrl, showQuarantineRow, syncFederalRevenueDataset, testDatabaseConnection, toTitleCase, updateFederalRevenueManifestFile, validateFederalRevenueReference, validateInputDirectory, withFederalRevenueSyncLock, writeCommandFailureLog, writeCommandLog, writeDatabaseConfig, writeFederalRevenueManifest, writeSchemaFile };
|