@danielarndt0/cnpj-db-loader 2.4.0-beta.1 → 2.4.0-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -8
- package/dist/cli.js +856 -137
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +33 -1
- package/dist/index.js +782 -133
- package/dist/index.js.map +1 -1
- package/docs/architecture.md +1 -1
- package/docs/cli.md +1 -1
- package/docs/commands.md +6 -2
- package/docs/postgres-direct.md +239 -45
- package/docs/releases/v2.4.0-beta.3.md +42 -0
- package/docs/sanitize.md +52 -16
- package/package.json +3 -3
- package/docs/releases/v2.4.0.md +0 -40
package/dist/index.js
CHANGED
|
@@ -7853,81 +7853,264 @@ function isRecognizedSanitizeEntry(entry) {
|
|
|
7853
7853
|
return entry.entryKind === "file" && entry.inferredType !== "zip-archive" && entry.inferredType !== "unknown";
|
|
7854
7854
|
}
|
|
7855
7855
|
|
|
7856
|
+
// src/services/sanitize/encoding.ts
|
|
7857
|
+
import { StringDecoder } from "string_decoder";
|
|
7858
|
+
var WINDOWS_1252_C1_MAP = {
|
|
7859
|
+
128: "\u20AC",
|
|
7860
|
+
130: "\u201A",
|
|
7861
|
+
131: "\u0192",
|
|
7862
|
+
132: "\u201E",
|
|
7863
|
+
133: "\u2026",
|
|
7864
|
+
134: "\u2020",
|
|
7865
|
+
135: "\u2021",
|
|
7866
|
+
136: "\u02C6",
|
|
7867
|
+
137: "\u2030",
|
|
7868
|
+
138: "\u0160",
|
|
7869
|
+
139: "\u2039",
|
|
7870
|
+
140: "\u0152",
|
|
7871
|
+
142: "\u017D",
|
|
7872
|
+
145: "\u2018",
|
|
7873
|
+
146: "\u2019",
|
|
7874
|
+
147: "\u201C",
|
|
7875
|
+
148: "\u201D",
|
|
7876
|
+
149: "\u2022",
|
|
7877
|
+
150: "\u2013",
|
|
7878
|
+
151: "\u2014",
|
|
7879
|
+
152: "\u02DC",
|
|
7880
|
+
153: "\u2122",
|
|
7881
|
+
154: "\u0161",
|
|
7882
|
+
155: "\u203A",
|
|
7883
|
+
156: "\u0153",
|
|
7884
|
+
158: "\u017E",
|
|
7885
|
+
159: "\u0178"
|
|
7886
|
+
};
|
|
7887
|
+
function normalizeSanitizeSourceEncoding(value) {
|
|
7888
|
+
const normalized = (value ?? "WIN1252").trim().toUpperCase().replace(/_/g, "-");
|
|
7889
|
+
switch (normalized) {
|
|
7890
|
+
case "WIN1252":
|
|
7891
|
+
case "WINDOWS-1252":
|
|
7892
|
+
case "CP1252":
|
|
7893
|
+
return "WIN1252";
|
|
7894
|
+
case "LATIN1":
|
|
7895
|
+
case "LATIN-1":
|
|
7896
|
+
case "ISO-8859-1":
|
|
7897
|
+
case "ISO8859-1":
|
|
7898
|
+
return "LATIN1";
|
|
7899
|
+
case "UTF8":
|
|
7900
|
+
case "UTF-8":
|
|
7901
|
+
return "UTF8";
|
|
7902
|
+
default:
|
|
7903
|
+
throw new ValidationError(
|
|
7904
|
+
`Unsupported sanitize source encoding: ${value}. Supported values: WIN1252, LATIN1, UTF8.`
|
|
7905
|
+
);
|
|
7906
|
+
}
|
|
7907
|
+
}
|
|
7908
|
+
function isAllowedControlCodePoint(codePoint) {
|
|
7909
|
+
return codePoint === 9 || codePoint === 10 || codePoint === 13;
|
|
7910
|
+
}
|
|
7911
|
+
function isProblematicControlCodePoint(codePoint) {
|
|
7912
|
+
if (isAllowedControlCodePoint(codePoint)) {
|
|
7913
|
+
return false;
|
|
7914
|
+
}
|
|
7915
|
+
return codePoint >= 0 && codePoint <= 31 || codePoint === 127 || codePoint >= 128 && codePoint <= 159 || codePoint === 65279;
|
|
7916
|
+
}
|
|
7917
|
+
function sanitizeDecodedText(text) {
|
|
7918
|
+
const output = [];
|
|
7919
|
+
let invalidBytesRemoved = 0;
|
|
7920
|
+
let controlCharsRemoved = 0;
|
|
7921
|
+
for (const char of text) {
|
|
7922
|
+
const codePoint = char.codePointAt(0);
|
|
7923
|
+
if (codePoint === 65533) {
|
|
7924
|
+
invalidBytesRemoved += 1;
|
|
7925
|
+
continue;
|
|
7926
|
+
}
|
|
7927
|
+
if (isProblematicControlCodePoint(codePoint)) {
|
|
7928
|
+
controlCharsRemoved += 1;
|
|
7929
|
+
continue;
|
|
7930
|
+
}
|
|
7931
|
+
output.push(char);
|
|
7932
|
+
}
|
|
7933
|
+
return {
|
|
7934
|
+
text: output.join(""),
|
|
7935
|
+
invalidBytesRemoved,
|
|
7936
|
+
controlCharsRemoved
|
|
7937
|
+
};
|
|
7938
|
+
}
|
|
7939
|
+
var SanitizeEncodingNormalizer = class {
|
|
7940
|
+
constructor(sourceEncoding) {
|
|
7941
|
+
this.sourceEncoding = sourceEncoding;
|
|
7942
|
+
this.utf8Decoder = sourceEncoding === "UTF8" ? new StringDecoder("utf8") : void 0;
|
|
7943
|
+
}
|
|
7944
|
+
sourceEncoding;
|
|
7945
|
+
utf8Decoder;
|
|
7946
|
+
normalizeChunk(chunk) {
|
|
7947
|
+
if (this.sourceEncoding === "UTF8") {
|
|
7948
|
+
const decoded = this.utf8Decoder.write(chunk);
|
|
7949
|
+
const sanitized = sanitizeDecodedText(decoded);
|
|
7950
|
+
const nulBytesRemoved = [...decoded].filter(
|
|
7951
|
+
(char) => char === "\0"
|
|
7952
|
+
).length;
|
|
7953
|
+
return {
|
|
7954
|
+
...sanitized,
|
|
7955
|
+
nulBytesRemoved
|
|
7956
|
+
};
|
|
7957
|
+
}
|
|
7958
|
+
return this.normalizeSingleByteChunk(chunk);
|
|
7959
|
+
}
|
|
7960
|
+
flush() {
|
|
7961
|
+
if (!this.utf8Decoder) {
|
|
7962
|
+
return {
|
|
7963
|
+
text: "",
|
|
7964
|
+
nulBytesRemoved: 0,
|
|
7965
|
+
invalidBytesRemoved: 0,
|
|
7966
|
+
controlCharsRemoved: 0
|
|
7967
|
+
};
|
|
7968
|
+
}
|
|
7969
|
+
const decoded = this.utf8Decoder.end();
|
|
7970
|
+
const sanitized = sanitizeDecodedText(decoded);
|
|
7971
|
+
const nulBytesRemoved = [...decoded].filter((char) => char === "\0").length;
|
|
7972
|
+
return {
|
|
7973
|
+
...sanitized,
|
|
7974
|
+
nulBytesRemoved
|
|
7975
|
+
};
|
|
7976
|
+
}
|
|
7977
|
+
normalizeSingleByteChunk(chunk) {
|
|
7978
|
+
const output = [];
|
|
7979
|
+
let nulBytesRemoved = 0;
|
|
7980
|
+
let invalidBytesRemoved = 0;
|
|
7981
|
+
let controlCharsRemoved = 0;
|
|
7982
|
+
for (const byte of chunk) {
|
|
7983
|
+
if (byte === 0) {
|
|
7984
|
+
nulBytesRemoved += 1;
|
|
7985
|
+
continue;
|
|
7986
|
+
}
|
|
7987
|
+
if (byte < 32 || byte === 127) {
|
|
7988
|
+
if (isAllowedControlCodePoint(byte)) {
|
|
7989
|
+
output.push(String.fromCharCode(byte));
|
|
7990
|
+
} else {
|
|
7991
|
+
controlCharsRemoved += 1;
|
|
7992
|
+
}
|
|
7993
|
+
continue;
|
|
7994
|
+
}
|
|
7995
|
+
if (byte >= 128 && byte <= 159) {
|
|
7996
|
+
if (this.sourceEncoding === "WIN1252") {
|
|
7997
|
+
const mapped = WINDOWS_1252_C1_MAP[byte];
|
|
7998
|
+
if (mapped === void 0) {
|
|
7999
|
+
invalidBytesRemoved += 1;
|
|
8000
|
+
} else {
|
|
8001
|
+
output.push(mapped);
|
|
8002
|
+
}
|
|
8003
|
+
} else {
|
|
8004
|
+
controlCharsRemoved += 1;
|
|
8005
|
+
}
|
|
8006
|
+
continue;
|
|
8007
|
+
}
|
|
8008
|
+
output.push(String.fromCharCode(byte));
|
|
8009
|
+
}
|
|
8010
|
+
return {
|
|
8011
|
+
text: output.join(""),
|
|
8012
|
+
nulBytesRemoved,
|
|
8013
|
+
invalidBytesRemoved,
|
|
8014
|
+
controlCharsRemoved
|
|
8015
|
+
};
|
|
8016
|
+
}
|
|
8017
|
+
};
|
|
8018
|
+
|
|
7856
8019
|
// src/services/sanitize/runner.ts
|
|
7857
8020
|
import { createReadStream as createReadStream2, createWriteStream as createWriteStream2 } from "fs";
|
|
7858
8021
|
import { mkdir as mkdir7 } from "fs/promises";
|
|
7859
8022
|
import path13 from "path";
|
|
7860
|
-
function
|
|
7861
|
-
|
|
7862
|
-
|
|
7863
|
-
if (chunk[index] === 0) {
|
|
7864
|
-
removed += 1;
|
|
7865
|
-
}
|
|
8023
|
+
async function writeUtf8(output, value) {
|
|
8024
|
+
if (value.length === 0) {
|
|
8025
|
+
return;
|
|
7866
8026
|
}
|
|
7867
|
-
if (
|
|
7868
|
-
|
|
8027
|
+
if (!output.write(value, "utf8")) {
|
|
8028
|
+
await new Promise((resolve, reject) => {
|
|
8029
|
+
output.once("drain", resolve);
|
|
8030
|
+
output.once("error", reject);
|
|
8031
|
+
});
|
|
7869
8032
|
}
|
|
7870
|
-
|
|
7871
|
-
|
|
7872
|
-
|
|
7873
|
-
|
|
7874
|
-
if (value
|
|
7875
|
-
|
|
7876
|
-
outputIndex += 1;
|
|
8033
|
+
}
|
|
8034
|
+
function countNewlines(value) {
|
|
8035
|
+
let count = 0;
|
|
8036
|
+
for (let index = 0; index < value.length; index += 1) {
|
|
8037
|
+
if (value[index] === "\n") {
|
|
8038
|
+
count += 1;
|
|
7877
8039
|
}
|
|
7878
8040
|
}
|
|
7879
|
-
return
|
|
8041
|
+
return count;
|
|
7880
8042
|
}
|
|
7881
|
-
async function sanitizeDatasetFile(plan, onChunk) {
|
|
8043
|
+
async function sanitizeDatasetFile(plan, onChunk, options = {}) {
|
|
7882
8044
|
await mkdir7(path13.dirname(plan.outputPath), { recursive: true });
|
|
8045
|
+
const sourceEncoding = normalizeSanitizeSourceEncoding(
|
|
8046
|
+
options.sourceEncoding
|
|
8047
|
+
);
|
|
8048
|
+
const normalizer = new SanitizeEncodingNormalizer(sourceEncoding);
|
|
7883
8049
|
const input = createReadStream2(plan.absolutePath);
|
|
7884
|
-
const output = createWriteStream2(plan.outputPath);
|
|
8050
|
+
const output = createWriteStream2(plan.outputPath, { encoding: "utf8" });
|
|
7885
8051
|
let totalBytesRead = 0;
|
|
7886
8052
|
let totalBytesWritten = 0;
|
|
7887
8053
|
let nulBytesRemoved = 0;
|
|
8054
|
+
let invalidBytesRemoved = 0;
|
|
8055
|
+
let controlCharsRemoved = 0;
|
|
7888
8056
|
let lineCount = 0;
|
|
7889
|
-
let
|
|
7890
|
-
let
|
|
8057
|
+
let sawAnyCharacter = false;
|
|
8058
|
+
let lastCharacterWasNewline = false;
|
|
8059
|
+
const processText = async (text) => {
|
|
8060
|
+
if (text.length === 0) {
|
|
8061
|
+
return;
|
|
8062
|
+
}
|
|
8063
|
+
sawAnyCharacter = true;
|
|
8064
|
+
lineCount += countNewlines(text);
|
|
8065
|
+
lastCharacterWasNewline = text.endsWith("\n");
|
|
8066
|
+
totalBytesWritten += Buffer.byteLength(text, "utf8");
|
|
8067
|
+
await writeUtf8(output, text);
|
|
8068
|
+
};
|
|
7891
8069
|
try {
|
|
7892
8070
|
for await (const chunk of input) {
|
|
7893
8071
|
const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
7894
8072
|
totalBytesRead += chunkBuffer.length;
|
|
7895
|
-
const
|
|
7896
|
-
nulBytesRemoved +=
|
|
7897
|
-
|
|
7898
|
-
|
|
7899
|
-
|
|
7900
|
-
lineCount += 1;
|
|
7901
|
-
}
|
|
7902
|
-
}
|
|
7903
|
-
if (buffer.length > 0) {
|
|
7904
|
-
lastByteWasNewline = buffer[buffer.length - 1] === 10;
|
|
7905
|
-
}
|
|
7906
|
-
totalBytesWritten += buffer.length;
|
|
7907
|
-
output.write(buffer);
|
|
8073
|
+
const normalized = normalizer.normalizeChunk(chunkBuffer);
|
|
8074
|
+
nulBytesRemoved += normalized.nulBytesRemoved;
|
|
8075
|
+
invalidBytesRemoved += normalized.invalidBytesRemoved;
|
|
8076
|
+
controlCharsRemoved += normalized.controlCharsRemoved;
|
|
8077
|
+
await processText(normalized.text);
|
|
7908
8078
|
onChunk?.({
|
|
7909
8079
|
bytesProcessed: chunkBuffer.length,
|
|
7910
8080
|
fileBytesProcessed: totalBytesRead,
|
|
7911
8081
|
currentFileSize: plan.fileSize,
|
|
7912
8082
|
processedRows: lineCount,
|
|
7913
|
-
nulBytesRemoved
|
|
8083
|
+
nulBytesRemoved,
|
|
8084
|
+
invalidBytesRemoved,
|
|
8085
|
+
controlCharsRemoved
|
|
7914
8086
|
});
|
|
7915
8087
|
}
|
|
7916
|
-
|
|
8088
|
+
const flushed = normalizer.flush();
|
|
8089
|
+
nulBytesRemoved += flushed.nulBytesRemoved;
|
|
8090
|
+
invalidBytesRemoved += flushed.invalidBytesRemoved;
|
|
8091
|
+
controlCharsRemoved += flushed.controlCharsRemoved;
|
|
8092
|
+
await processText(flushed.text);
|
|
8093
|
+
if (sawAnyCharacter && !lastCharacterWasNewline) {
|
|
7917
8094
|
lineCount += 1;
|
|
7918
8095
|
}
|
|
7919
8096
|
} finally {
|
|
7920
8097
|
input.close();
|
|
7921
8098
|
output.end();
|
|
7922
|
-
await new Promise((resolve
|
|
8099
|
+
await new Promise((resolve, reject) => {
|
|
8100
|
+
output.on("finish", () => resolve());
|
|
8101
|
+
output.on("error", (error) => reject(error));
|
|
8102
|
+
});
|
|
7923
8103
|
}
|
|
7924
8104
|
return {
|
|
7925
8105
|
plan,
|
|
7926
8106
|
totalBytesRead,
|
|
7927
8107
|
totalBytesWritten,
|
|
8108
|
+
sourceEncoding,
|
|
7928
8109
|
nulBytesRemoved,
|
|
8110
|
+
invalidBytesRemoved,
|
|
8111
|
+
controlCharsRemoved,
|
|
7929
8112
|
lineCount,
|
|
7930
|
-
changed: nulBytesRemoved > 0 || totalBytesRead !== totalBytesWritten
|
|
8113
|
+
changed: nulBytesRemoved > 0 || invalidBytesRemoved > 0 || controlCharsRemoved > 0 || totalBytesRead !== totalBytesWritten
|
|
7931
8114
|
};
|
|
7932
8115
|
}
|
|
7933
8116
|
|
|
@@ -7990,40 +8173,54 @@ async function sanitizeInputDirectory(inputPath, options = {}) {
|
|
|
7990
8173
|
"No recognized validated dataset files were found for sanitization."
|
|
7991
8174
|
);
|
|
7992
8175
|
}
|
|
8176
|
+
const sourceEncoding = normalizeSanitizeSourceEncoding(
|
|
8177
|
+
options.sourceEncoding
|
|
8178
|
+
);
|
|
7993
8179
|
options.onProgress?.({
|
|
7994
8180
|
kind: "start",
|
|
7995
8181
|
validatedPath,
|
|
7996
8182
|
outputPath,
|
|
7997
8183
|
totalFiles: plan.totalFiles,
|
|
7998
8184
|
totalBytes: plan.totalBytes,
|
|
7999
|
-
datasets: plan.datasets
|
|
8185
|
+
datasets: plan.datasets,
|
|
8186
|
+
sourceEncoding
|
|
8000
8187
|
});
|
|
8001
8188
|
let processedFiles = 0;
|
|
8002
8189
|
let processedRows = 0;
|
|
8003
8190
|
let processedBytes = 0;
|
|
8004
8191
|
let nulBytesRemoved = 0;
|
|
8192
|
+
let invalidBytesRemoved = 0;
|
|
8193
|
+
let controlCharsRemoved = 0;
|
|
8005
8194
|
let changedFiles = 0;
|
|
8006
8195
|
const fileSummaries = [];
|
|
8007
8196
|
for (const [index, filePlan] of plan.files.entries()) {
|
|
8008
|
-
const fileResult = await sanitizeDatasetFile(
|
|
8009
|
-
|
|
8010
|
-
|
|
8011
|
-
|
|
8012
|
-
|
|
8013
|
-
|
|
8014
|
-
|
|
8015
|
-
|
|
8016
|
-
|
|
8017
|
-
|
|
8018
|
-
|
|
8019
|
-
|
|
8020
|
-
|
|
8021
|
-
|
|
8022
|
-
|
|
8197
|
+
const fileResult = await sanitizeDatasetFile(
|
|
8198
|
+
filePlan,
|
|
8199
|
+
(chunk) => {
|
|
8200
|
+
options.onProgress?.({
|
|
8201
|
+
kind: "progress",
|
|
8202
|
+
currentFileDisplayPath: filePlan.displayPath,
|
|
8203
|
+
fileIndex: index + 1,
|
|
8204
|
+
totalFiles: plan.totalFiles,
|
|
8205
|
+
bytesProcessed: processedBytes + chunk.fileBytesProcessed,
|
|
8206
|
+
totalBytes: plan.totalBytes,
|
|
8207
|
+
fileBytesProcessed: chunk.fileBytesProcessed,
|
|
8208
|
+
currentFileSize: chunk.currentFileSize,
|
|
8209
|
+
processedRows: processedRows + chunk.processedRows,
|
|
8210
|
+
nulBytesRemoved: nulBytesRemoved + chunk.nulBytesRemoved,
|
|
8211
|
+
invalidBytesRemoved: invalidBytesRemoved + chunk.invalidBytesRemoved,
|
|
8212
|
+
controlCharsRemoved: controlCharsRemoved + chunk.controlCharsRemoved,
|
|
8213
|
+
changedFiles
|
|
8214
|
+
});
|
|
8215
|
+
},
|
|
8216
|
+
{ sourceEncoding }
|
|
8217
|
+
);
|
|
8023
8218
|
processedFiles += 1;
|
|
8024
8219
|
processedRows += fileResult.lineCount;
|
|
8025
8220
|
processedBytes += fileResult.totalBytesRead;
|
|
8026
8221
|
nulBytesRemoved += fileResult.nulBytesRemoved;
|
|
8222
|
+
invalidBytesRemoved += fileResult.invalidBytesRemoved;
|
|
8223
|
+
controlCharsRemoved += fileResult.controlCharsRemoved;
|
|
8027
8224
|
changedFiles += fileResult.changed ? 1 : 0;
|
|
8028
8225
|
fileSummaries.push({
|
|
8029
8226
|
dataset: filePlan.dataset,
|
|
@@ -8031,7 +8228,9 @@ async function sanitizeInputDirectory(inputPath, options = {}) {
|
|
|
8031
8228
|
outputPath: filePlan.outputPath,
|
|
8032
8229
|
lineCount: fileResult.lineCount,
|
|
8033
8230
|
changed: fileResult.changed,
|
|
8034
|
-
nulBytesRemoved: fileResult.nulBytesRemoved
|
|
8231
|
+
nulBytesRemoved: fileResult.nulBytesRemoved,
|
|
8232
|
+
invalidBytesRemoved: fileResult.invalidBytesRemoved,
|
|
8233
|
+
controlCharsRemoved: fileResult.controlCharsRemoved
|
|
8035
8234
|
});
|
|
8036
8235
|
}
|
|
8037
8236
|
options.onProgress?.({
|
|
@@ -8039,6 +8238,8 @@ async function sanitizeInputDirectory(inputPath, options = {}) {
|
|
|
8039
8238
|
totalFiles: plan.totalFiles,
|
|
8040
8239
|
processedRows,
|
|
8041
8240
|
nulBytesRemoved,
|
|
8241
|
+
invalidBytesRemoved,
|
|
8242
|
+
controlCharsRemoved,
|
|
8042
8243
|
changedFiles,
|
|
8043
8244
|
totalBytes: plan.totalBytes
|
|
8044
8245
|
});
|
|
@@ -8050,13 +8251,17 @@ async function sanitizeInputDirectory(inputPath, options = {}) {
|
|
|
8050
8251
|
totalBytes: plan.totalBytes,
|
|
8051
8252
|
processedFiles,
|
|
8052
8253
|
processedRows,
|
|
8254
|
+
sourceEncoding,
|
|
8053
8255
|
nulBytesRemoved,
|
|
8256
|
+
invalidBytesRemoved,
|
|
8257
|
+
controlCharsRemoved,
|
|
8054
8258
|
changedFiles,
|
|
8055
8259
|
unchangedFiles: plan.totalFiles - changedFiles,
|
|
8056
8260
|
datasets: plan.datasets,
|
|
8057
8261
|
files: fileSummaries,
|
|
8058
8262
|
warnings: [
|
|
8059
|
-
"Sanitization
|
|
8263
|
+
"Sanitization now writes UTF-8 output and removes invalid bytes plus problematic control characters before PostgreSQL loading begins.",
|
|
8264
|
+
"The PostgreSQL direct import path can use --source-encoding UTF8 when reading files generated by this sanitization command.",
|
|
8060
8265
|
"The import command still keeps quarantine and row-level recovery for unexpected issues, but sanitizing first reduces the amount of slow fallback work during import."
|
|
8061
8266
|
],
|
|
8062
8267
|
nextStep: inferNextStep3(outputPath)
|
|
@@ -8206,6 +8411,18 @@ var STAGING_TABLE_BY_DATASET3 = {
|
|
|
8206
8411
|
partners: "staging_partners",
|
|
8207
8412
|
simples_options: "staging_simples_options"
|
|
8208
8413
|
};
|
|
8414
|
+
var STEP_ORDER = [
|
|
8415
|
+
"setup",
|
|
8416
|
+
"load-domains",
|
|
8417
|
+
"load-companies",
|
|
8418
|
+
"load-establishments",
|
|
8419
|
+
"load-partners",
|
|
8420
|
+
"load-simples",
|
|
8421
|
+
"materialize",
|
|
8422
|
+
"materialize-secondary-cnaes",
|
|
8423
|
+
"indexes",
|
|
8424
|
+
"analyze"
|
|
8425
|
+
];
|
|
8209
8426
|
function quoteSqlLiteral(value) {
|
|
8210
8427
|
return `'${value.replace(/'/g, "''")}'`;
|
|
8211
8428
|
}
|
|
@@ -8223,6 +8440,9 @@ function receitaCopyCommand(tableName, columns, filePath) {
|
|
|
8223
8440
|
const normalizedFilePath = normalizePathForPsql(filePath);
|
|
8224
8441
|
return `\\copy ${tableName} (${columns.join(", ")}) from ${quoteSqlLiteral(normalizedFilePath)} with (format csv, header false, delimiter ';', quote '"', escape '"')`;
|
|
8225
8442
|
}
|
|
8443
|
+
function echo(message) {
|
|
8444
|
+
return `\\echo ${quoteSqlLiteral(message)}`;
|
|
8445
|
+
}
|
|
8226
8446
|
function datasetColumns(dataset) {
|
|
8227
8447
|
return DATASET_LAYOUTS[dataset].fields.map((field) => field.columnName);
|
|
8228
8448
|
}
|
|
@@ -8249,7 +8469,7 @@ function partnerDedupeExpression(alias) {
|
|
|
8249
8469
|
function materializeCompaniesSql() {
|
|
8250
8470
|
const columns = companiesLayout.fields.map((field) => field.columnName);
|
|
8251
8471
|
return [
|
|
8252
|
-
"
|
|
8472
|
+
echo("[materialize] Materializing companies..."),
|
|
8253
8473
|
"with source as (",
|
|
8254
8474
|
" select",
|
|
8255
8475
|
` ${columns.map((column) => `source.${column}`).join(",\n ")},`,
|
|
@@ -8263,7 +8483,8 @@ function materializeCompaniesSql() {
|
|
|
8263
8483
|
`select ${columns.join(", ")}`,
|
|
8264
8484
|
"from deduped",
|
|
8265
8485
|
"on conflict (cnpj_root) do update set",
|
|
8266
|
-
` ${updateAssignments(columns, ["cnpj_root"])}
|
|
8486
|
+
` ${updateAssignments(columns, ["cnpj_root"])};`,
|
|
8487
|
+
echo("[materialize] Companies materialization completed.")
|
|
8267
8488
|
].join("\n");
|
|
8268
8489
|
}
|
|
8269
8490
|
function materializeEstablishmentsSql() {
|
|
@@ -8272,7 +8493,7 @@ function materializeEstablishmentsSql() {
|
|
|
8272
8493
|
);
|
|
8273
8494
|
const insertColumns = [...baseColumns, "cnpj_full"];
|
|
8274
8495
|
return [
|
|
8275
|
-
"
|
|
8496
|
+
echo("[materialize] Materializing establishments..."),
|
|
8276
8497
|
"with source as (",
|
|
8277
8498
|
" select",
|
|
8278
8499
|
` ${baseColumns.map((column) => `source.${column}`).join(",\n ")},`,
|
|
@@ -8282,14 +8503,29 @@ function materializeEstablishmentsSql() {
|
|
|
8282
8503
|
"),",
|
|
8283
8504
|
"deduped as (",
|
|
8284
8505
|
" select * from source where dedupe_rank = 1",
|
|
8506
|
+
")",
|
|
8507
|
+
`insert into establishments (${insertColumns.join(", ")})`,
|
|
8508
|
+
`select ${insertColumns.join(", ")}`,
|
|
8509
|
+
"from deduped",
|
|
8510
|
+
"on conflict (cnpj_full) do update set",
|
|
8511
|
+
` ${updateAssignments(insertColumns, ["cnpj_root", "cnpj_order", "cnpj_check_digits", "cnpj_full"])};`,
|
|
8512
|
+
echo("[materialize] Establishments materialization completed.")
|
|
8513
|
+
].join("\n");
|
|
8514
|
+
}
|
|
8515
|
+
function materializeSecondaryCnaesSql() {
|
|
8516
|
+
return [
|
|
8517
|
+
echo(
|
|
8518
|
+
"[materialize-secondary-cnaes] Materializing establishment secondary CNAEs..."
|
|
8519
|
+
),
|
|
8520
|
+
"with source as (",
|
|
8521
|
+
" select",
|
|
8522
|
+
" staging.cnpj_root || staging.cnpj_order || staging.cnpj_check_digits as cnpj_full,",
|
|
8523
|
+
" staging.secondary_cnaes_raw,",
|
|
8524
|
+
" row_number() over (partition by staging.cnpj_root || staging.cnpj_order || staging.cnpj_check_digits order by staging.staging_id desc) as dedupe_rank",
|
|
8525
|
+
" from staging_establishments staging",
|
|
8285
8526
|
"),",
|
|
8286
|
-
"
|
|
8287
|
-
|
|
8288
|
-
` select ${insertColumns.join(", ")}`,
|
|
8289
|
-
" from deduped",
|
|
8290
|
-
" on conflict (cnpj_full) do update set",
|
|
8291
|
-
` ${updateAssignments(insertColumns, ["cnpj_root", "cnpj_order", "cnpj_check_digits", "cnpj_full"])}`,
|
|
8292
|
-
" returning cnpj_full",
|
|
8527
|
+
"deduped as (",
|
|
8528
|
+
" select * from source where dedupe_rank = 1",
|
|
8293
8529
|
"),",
|
|
8294
8530
|
"deleted_secondary_cnaes as (",
|
|
8295
8531
|
" delete from establishment_secondary_cnaes target",
|
|
@@ -8310,14 +8546,17 @@ function materializeEstablishmentsSql() {
|
|
|
8310
8546
|
"insert into establishment_secondary_cnaes (cnpj_full, cnae_code)",
|
|
8311
8547
|
"select cnpj_full, cnae_code",
|
|
8312
8548
|
"from secondary_cnaes_source",
|
|
8313
|
-
"on conflict (cnpj_full, cnae_code) do nothing;"
|
|
8549
|
+
"on conflict (cnpj_full, cnae_code) do nothing;",
|
|
8550
|
+
echo(
|
|
8551
|
+
"[materialize-secondary-cnaes] Secondary CNAEs materialization completed."
|
|
8552
|
+
)
|
|
8314
8553
|
].join("\n");
|
|
8315
8554
|
}
|
|
8316
8555
|
function materializePartnersSql() {
|
|
8317
8556
|
const baseColumns = partnersLayout.fields.map((field) => field.columnName);
|
|
8318
8557
|
const insertColumns = [...baseColumns, "partner_dedupe_key"];
|
|
8319
8558
|
return [
|
|
8320
|
-
"
|
|
8559
|
+
echo("[materialize] Materializing partners..."),
|
|
8321
8560
|
"with source as (",
|
|
8322
8561
|
" select",
|
|
8323
8562
|
` ${baseColumns.map((column) => `source.${column}`).join(",\n ")},`,
|
|
@@ -8337,13 +8576,14 @@ function materializePartnersSql() {
|
|
|
8337
8576
|
`select ${insertColumns.join(", ")}`,
|
|
8338
8577
|
"from deduped",
|
|
8339
8578
|
"on conflict (partner_dedupe_key) do update set",
|
|
8340
|
-
` ${updateAssignments(insertColumns, ["partner_dedupe_key"])}
|
|
8579
|
+
` ${updateAssignments(insertColumns, ["partner_dedupe_key"])};`,
|
|
8580
|
+
echo("[materialize] Partners materialization completed.")
|
|
8341
8581
|
].join("\n");
|
|
8342
8582
|
}
|
|
8343
8583
|
function materializeSimplesSql() {
|
|
8344
8584
|
const columns = simplesLayout.fields.map((field) => field.columnName);
|
|
8345
8585
|
return [
|
|
8346
|
-
"
|
|
8586
|
+
echo("[materialize] Materializing simples options..."),
|
|
8347
8587
|
"with source as (",
|
|
8348
8588
|
" select",
|
|
8349
8589
|
` ${columns.map((column) => `source.${column}`).join(",\n ")},`,
|
|
@@ -8357,7 +8597,8 @@ function materializeSimplesSql() {
|
|
|
8357
8597
|
`select ${columns.join(", ")}`,
|
|
8358
8598
|
"from deduped",
|
|
8359
8599
|
"on conflict (cnpj_root) do update set",
|
|
8360
|
-
` ${updateAssignments(columns, ["cnpj_root"])}
|
|
8600
|
+
` ${updateAssignments(columns, ["cnpj_root"])};`,
|
|
8601
|
+
echo("[materialize] Simples options materialization completed.")
|
|
8361
8602
|
].join("\n");
|
|
8362
8603
|
}
|
|
8363
8604
|
function copyDomainSql(dataset, files) {
|
|
@@ -8367,12 +8608,20 @@ function copyDomainSql(dataset, files) {
|
|
|
8367
8608
|
const columns = datasetColumns(dataset);
|
|
8368
8609
|
const tempTable = `tmp_hybrid_${dataset}`;
|
|
8369
8610
|
const lines = [
|
|
8370
|
-
|
|
8611
|
+
echo(`[load-domains] Loading ${dataset} lookup data...`),
|
|
8371
8612
|
`drop table if exists ${tempTable};`,
|
|
8372
8613
|
`create temporary table ${tempTable} (code text, description text);`
|
|
8373
8614
|
];
|
|
8374
|
-
for (const file of files) {
|
|
8375
|
-
lines.push(
|
|
8615
|
+
for (const [index, file] of files.entries()) {
|
|
8616
|
+
lines.push(
|
|
8617
|
+
echo(
|
|
8618
|
+
`[load-domains] Loading ${dataset} file ${index + 1} of ${files.length}: ${file.relativePath}`
|
|
8619
|
+
),
|
|
8620
|
+
csvCopyCommand(tempTable, columns, file.absolutePath),
|
|
8621
|
+
echo(
|
|
8622
|
+
`[load-domains] Loaded ${dataset} file ${index + 1} of ${files.length}.`
|
|
8623
|
+
)
|
|
8624
|
+
);
|
|
8376
8625
|
}
|
|
8377
8626
|
lines.push(
|
|
8378
8627
|
`insert into ${dataset} (${columns.join(", ")})`,
|
|
@@ -8393,12 +8642,17 @@ function copyStagingSql(dataset, files) {
|
|
|
8393
8642
|
return [];
|
|
8394
8643
|
}
|
|
8395
8644
|
const columns = datasetColumns(dataset);
|
|
8396
|
-
|
|
8397
|
-
|
|
8398
|
-
|
|
8399
|
-
(
|
|
8400
|
-
|
|
8401
|
-
|
|
8645
|
+
const lines = [echo(`[load-${dataset}] Loading ${dataset} staging data...`)];
|
|
8646
|
+
for (const [index, file] of files.entries()) {
|
|
8647
|
+
lines.push(
|
|
8648
|
+
echo(
|
|
8649
|
+
`[load-${dataset}] Loading file ${index + 1} of ${files.length}: ${file.relativePath}`
|
|
8650
|
+
),
|
|
8651
|
+
csvCopyCommand(tableName, columns, file.absolutePath),
|
|
8652
|
+
echo(`[load-${dataset}] Loaded file ${index + 1} of ${files.length}.`)
|
|
8653
|
+
);
|
|
8654
|
+
}
|
|
8655
|
+
return lines;
|
|
8402
8656
|
}
|
|
8403
8657
|
function csvFilesByDataset(files) {
|
|
8404
8658
|
const grouped = {};
|
|
@@ -8424,7 +8678,9 @@ function rawTableName(dataset) {
|
|
|
8424
8678
|
function createRawTempTableSql(dataset) {
|
|
8425
8679
|
const columns = DATASET_LAYOUTS[dataset].fields.map((field) => ` ${quoteIdentifier(field.columnName)} text`).join(",\n");
|
|
8426
8680
|
return [
|
|
8681
|
+
"set client_min_messages to warning;",
|
|
8427
8682
|
`drop table if exists ${rawTableName(dataset)};`,
|
|
8683
|
+
"reset client_min_messages;",
|
|
8428
8684
|
`create temporary table ${rawTableName(dataset)} (`,
|
|
8429
8685
|
columns,
|
|
8430
8686
|
");"
|
|
@@ -8506,11 +8762,21 @@ function rawDomainSql(dataset, files) {
|
|
|
8506
8762
|
const columns = layout.fields.map((field) => field.columnName);
|
|
8507
8763
|
const tableName = rawTableName(dataset);
|
|
8508
8764
|
const lines = [
|
|
8509
|
-
|
|
8765
|
+
echo(
|
|
8766
|
+
`[load-domains] Loading ${dataset} lookup data directly from sanitized Receita files...`
|
|
8767
|
+
),
|
|
8510
8768
|
createRawTempTableSql(dataset)
|
|
8511
8769
|
];
|
|
8512
|
-
for (const file of files) {
|
|
8513
|
-
lines.push(
|
|
8770
|
+
for (const [index, file] of files.entries()) {
|
|
8771
|
+
lines.push(
|
|
8772
|
+
echo(
|
|
8773
|
+
`[load-domains] Loading ${dataset} file ${index + 1} of ${files.length}: ${file.relativePath}`
|
|
8774
|
+
),
|
|
8775
|
+
receitaCopyCommand(tableName, columns, file.absolutePath),
|
|
8776
|
+
echo(
|
|
8777
|
+
`[load-domains] Loaded ${dataset} file ${index + 1} of ${files.length}.`
|
|
8778
|
+
)
|
|
8779
|
+
);
|
|
8514
8780
|
}
|
|
8515
8781
|
lines.push(
|
|
8516
8782
|
`insert into ${dataset} (${columns.join(", ")})`,
|
|
@@ -8520,7 +8786,8 @@ function rawDomainSql(dataset, files) {
|
|
|
8520
8786
|
`from ${tableName}`,
|
|
8521
8787
|
"where nullif(btrim(code), '') is not null",
|
|
8522
8788
|
"order by code",
|
|
8523
|
-
"on conflict (code) do update set description = excluded.description;"
|
|
8789
|
+
"on conflict (code) do update set description = excluded.description;",
|
|
8790
|
+
echo(`[load-domains] ${dataset} lookup data completed.`)
|
|
8524
8791
|
);
|
|
8525
8792
|
return lines;
|
|
8526
8793
|
}
|
|
@@ -8539,70 +8806,363 @@ function rawStagingSql(dataset, files) {
|
|
|
8539
8806
|
const expressions = layout.fields.map(
|
|
8540
8807
|
(field) => ` ${fieldExpression(dataset, field, alias)} as ${field.columnName}`
|
|
8541
8808
|
);
|
|
8809
|
+
const stepName = loadStepName(dataset);
|
|
8542
8810
|
const lines = [
|
|
8543
|
-
|
|
8811
|
+
echo(
|
|
8812
|
+
`[${stepName}] Loading ${dataset} staging data directly from sanitized Receita files...`
|
|
8813
|
+
),
|
|
8814
|
+
`truncate table ${targetTable} restart identity;`,
|
|
8544
8815
|
createRawTempTableSql(dataset)
|
|
8545
8816
|
];
|
|
8546
|
-
for (const file of files) {
|
|
8547
|
-
lines.push(
|
|
8817
|
+
for (const [index, file] of files.entries()) {
|
|
8818
|
+
lines.push(
|
|
8819
|
+
echo(
|
|
8820
|
+
`[${stepName}] Loading file ${index + 1} of ${files.length}: ${file.relativePath}`
|
|
8821
|
+
),
|
|
8822
|
+
receitaCopyCommand(tableName, columns, file.absolutePath),
|
|
8823
|
+
echo(`[${stepName}] Loaded file ${index + 1} of ${files.length}.`)
|
|
8824
|
+
);
|
|
8548
8825
|
}
|
|
8549
8826
|
lines.push(
|
|
8827
|
+
echo(
|
|
8828
|
+
`[${stepName}] Transforming ${dataset} raw rows into ${targetTable}...`
|
|
8829
|
+
),
|
|
8550
8830
|
`insert into ${targetTable} (${columns.join(", ")})`,
|
|
8551
8831
|
"select",
|
|
8552
8832
|
expressions.join(",\n"),
|
|
8553
|
-
`from ${tableName} ${alias}
|
|
8833
|
+
`from ${tableName} ${alias};`,
|
|
8834
|
+
echo(`[${stepName}] ${dataset} staging load completed.`)
|
|
8554
8835
|
);
|
|
8555
8836
|
return lines;
|
|
8556
8837
|
}
|
|
8557
|
-
function
|
|
8558
|
-
|
|
8559
|
-
|
|
8560
|
-
|
|
8561
|
-
|
|
8562
|
-
|
|
8563
|
-
|
|
8564
|
-
|
|
8838
|
+
function loadStepName(dataset) {
|
|
8839
|
+
switch (dataset) {
|
|
8840
|
+
case "companies":
|
|
8841
|
+
return "load-companies";
|
|
8842
|
+
case "establishments":
|
|
8843
|
+
return "load-establishments";
|
|
8844
|
+
case "partners":
|
|
8845
|
+
return "load-partners";
|
|
8846
|
+
case "simples_options":
|
|
8847
|
+
return "load-simples";
|
|
8848
|
+
default:
|
|
8849
|
+
return `load-${dataset}`;
|
|
8850
|
+
}
|
|
8851
|
+
}
|
|
8852
|
+
function scriptHeader(title, sourceEncoding) {
|
|
8853
|
+
return [
|
|
8854
|
+
`-- ${title}`,
|
|
8855
|
+
"-- Generated by cnpj-db-loader postgres generate-script.",
|
|
8565
8856
|
"\\set ON_ERROR_STOP on",
|
|
8566
|
-
|
|
8567
|
-
|
|
8568
|
-
|
|
8569
|
-
|
|
8570
|
-
|
|
8571
|
-
|
|
8572
|
-
"truncate table staging_companies restart identity;",
|
|
8573
|
-
"truncate table staging_establishments restart identity;",
|
|
8574
|
-
"truncate table staging_partners restart identity;",
|
|
8575
|
-
"truncate table staging_simples_options restart identity;",
|
|
8857
|
+
...sourceEncoding ? [
|
|
8858
|
+
echo(
|
|
8859
|
+
`Using source file encoding ${sourceEncoding} for psql copy operations...`
|
|
8860
|
+
),
|
|
8861
|
+
`set client_encoding to ${quoteSqlLiteral(sourceEncoding)};`
|
|
8862
|
+
] : [],
|
|
8576
8863
|
""
|
|
8577
8864
|
];
|
|
8578
|
-
|
|
8579
|
-
|
|
8865
|
+
}
|
|
8866
|
+
function wrapTransaction(lines, mode, shouldWrap) {
|
|
8867
|
+
if (!shouldWrap || mode !== "phase") {
|
|
8868
|
+
return [...lines];
|
|
8580
8869
|
}
|
|
8581
|
-
|
|
8582
|
-
|
|
8870
|
+
return ["begin;", "", ...lines, "", "commit;"];
|
|
8871
|
+
}
|
|
8872
|
+
function buildStepScript(title, body, input, wrapInPhaseTransaction) {
|
|
8873
|
+
return [
|
|
8874
|
+
...scriptHeader(title, input.sourceEncoding),
|
|
8875
|
+
...wrapTransaction(body, input.transactionMode, wrapInPhaseTransaction),
|
|
8876
|
+
""
|
|
8877
|
+
].join("\n");
|
|
8878
|
+
}
|
|
8879
|
+
function includeSet(input) {
|
|
8880
|
+
const selected = new Set(input.include);
|
|
8881
|
+
if (input.skipIndexes) {
|
|
8882
|
+
selected.delete("indexes");
|
|
8583
8883
|
}
|
|
8584
|
-
|
|
8585
|
-
|
|
8884
|
+
if (input.skipAnalyze) {
|
|
8885
|
+
selected.delete("analyze");
|
|
8886
|
+
}
|
|
8887
|
+
return selected;
|
|
8888
|
+
}
|
|
8889
|
+
function hasAnyFinalMaterialization(selected) {
|
|
8890
|
+
return selected.has("companies") || selected.has("establishments") || selected.has("partners") || selected.has("simples");
|
|
8891
|
+
}
|
|
8892
|
+
function materializeSql(selected) {
|
|
8893
|
+
const lines = [echo("[materialize] Starting final table materialization...")];
|
|
8894
|
+
if (selected.has("companies")) {
|
|
8895
|
+
lines.push(materializeCompaniesSql(), "");
|
|
8896
|
+
}
|
|
8897
|
+
if (selected.has("establishments")) {
|
|
8898
|
+
lines.push(materializeEstablishmentsSql(), "");
|
|
8899
|
+
}
|
|
8900
|
+
if (selected.has("partners")) {
|
|
8901
|
+
lines.push(materializePartnersSql(), "");
|
|
8902
|
+
}
|
|
8903
|
+
if (selected.has("simples")) {
|
|
8904
|
+
lines.push(materializeSimplesSql(), "");
|
|
8905
|
+
}
|
|
8906
|
+
lines.push(echo("[materialize] Final table materialization completed."));
|
|
8907
|
+
return lines;
|
|
8908
|
+
}
|
|
8909
|
+
function indexesSql() {
|
|
8910
|
+
return [
|
|
8911
|
+
echo(
|
|
8912
|
+
"[indexes] No additional index operations are generated in this beta."
|
|
8913
|
+
),
|
|
8914
|
+
"-- Indexes are expected to be managed by the schema generated by cnpj-db-loader schema generate.",
|
|
8915
|
+
"-- A future fast-rebuild mode may generate DROP/CREATE INDEX operations here."
|
|
8916
|
+
];
|
|
8917
|
+
}
|
|
8918
|
+
function analyzeSql(selected) {
|
|
8919
|
+
const tables = /* @__PURE__ */ new Set();
|
|
8920
|
+
if (selected.has("companies")) {
|
|
8921
|
+
tables.add("companies");
|
|
8922
|
+
}
|
|
8923
|
+
if (selected.has("establishments")) {
|
|
8924
|
+
tables.add("establishments");
|
|
8925
|
+
}
|
|
8926
|
+
if (selected.has("secondary-cnaes")) {
|
|
8927
|
+
tables.add("establishment_secondary_cnaes");
|
|
8928
|
+
}
|
|
8929
|
+
if (selected.has("partners")) {
|
|
8930
|
+
tables.add("partners");
|
|
8931
|
+
}
|
|
8932
|
+
if (selected.has("simples")) {
|
|
8933
|
+
tables.add("simples_options");
|
|
8934
|
+
}
|
|
8935
|
+
if (selected.has("domains")) {
|
|
8936
|
+
for (const dataset of DOMAIN_DATASETS) {
|
|
8937
|
+
tables.add(dataset);
|
|
8938
|
+
}
|
|
8939
|
+
}
|
|
8940
|
+
return [
|
|
8941
|
+
echo("[analyze] Refreshing planner statistics..."),
|
|
8942
|
+
...[...tables].map((table) => `analyze ${table};`),
|
|
8943
|
+
echo("[analyze] Planner statistics refreshed.")
|
|
8944
|
+
];
|
|
8945
|
+
}
|
|
8946
|
+
function step(name, file, dependsOn, included) {
|
|
8947
|
+
return { name, file, dependsOn, included };
|
|
8586
8948
|
}
|
|
8587
|
-
function
|
|
8949
|
+
function generatePostgresDirectScriptFiles(input) {
|
|
8588
8950
|
const grouped = directFilesByDataset(input.files);
|
|
8589
|
-
const
|
|
8590
|
-
|
|
8951
|
+
const selected = includeSet(input);
|
|
8952
|
+
if (!DOMAIN_DATASETS.some((dataset) => (grouped[dataset] ?? []).length > 0)) {
|
|
8953
|
+
selected.delete("domains");
|
|
8954
|
+
}
|
|
8955
|
+
if ((grouped.companies ?? []).length === 0) {
|
|
8956
|
+
selected.delete("companies");
|
|
8957
|
+
}
|
|
8958
|
+
if ((grouped.establishments ?? []).length === 0) {
|
|
8959
|
+
selected.delete("establishments");
|
|
8960
|
+
selected.delete("secondary-cnaes");
|
|
8961
|
+
}
|
|
8962
|
+
if ((grouped.partners ?? []).length === 0) {
|
|
8963
|
+
selected.delete("partners");
|
|
8964
|
+
}
|
|
8965
|
+
if ((grouped.simples_options ?? []).length === 0) {
|
|
8966
|
+
selected.delete("simples");
|
|
8967
|
+
}
|
|
8968
|
+
const scripts = {};
|
|
8969
|
+
const steps = [];
|
|
8970
|
+
const setupIncluded = true;
|
|
8971
|
+
steps.push(step("setup", "setup.sql", [], setupIncluded));
|
|
8972
|
+
scripts["setup.sql"] = [
|
|
8973
|
+
...scriptHeader(
|
|
8974
|
+
"CNPJ DB Loader PostgreSQL direct import setup",
|
|
8975
|
+
input.sourceEncoding
|
|
8976
|
+
),
|
|
8977
|
+
echo("[setup] Preparing PostgreSQL direct import session..."),
|
|
8978
|
+
"-- The database schema must be applied before running these scripts.",
|
|
8979
|
+
"-- This setup script configures the psql session used by the generated orchestrator.",
|
|
8980
|
+
echo("[setup] Setup completed."),
|
|
8981
|
+
""
|
|
8982
|
+
].join("\n");
|
|
8983
|
+
const domainsIncluded = selected.has("domains") && DOMAIN_DATASETS.some((dataset) => (grouped[dataset] ?? []).length > 0);
|
|
8984
|
+
steps.push(
|
|
8985
|
+
step("load-domains", "load-domains.sql", ["setup"], domainsIncluded)
|
|
8986
|
+
);
|
|
8987
|
+
if (domainsIncluded) {
|
|
8988
|
+
const lines = [echo("[load-domains] Starting domain tables load...")];
|
|
8989
|
+
for (const dataset of DOMAIN_DATASETS) {
|
|
8990
|
+
lines.push(...rawDomainSql(dataset, grouped[dataset] ?? []), "");
|
|
8991
|
+
}
|
|
8992
|
+
lines.push(echo("[load-domains] Domain tables load completed."));
|
|
8993
|
+
scripts["load-domains.sql"] = buildStepScript(
|
|
8994
|
+
"CNPJ DB Loader PostgreSQL direct import domains step",
|
|
8995
|
+
lines,
|
|
8996
|
+
input,
|
|
8997
|
+
true
|
|
8998
|
+
);
|
|
8999
|
+
}
|
|
9000
|
+
const datasetSteps = [
|
|
9001
|
+
{
|
|
9002
|
+
dataset: "companies",
|
|
9003
|
+
name: "load-companies",
|
|
9004
|
+
file: "load-companies.sql",
|
|
9005
|
+
include: "companies"
|
|
9006
|
+
},
|
|
9007
|
+
{
|
|
9008
|
+
dataset: "establishments",
|
|
9009
|
+
name: "load-establishments",
|
|
9010
|
+
file: "load-establishments.sql",
|
|
9011
|
+
include: "establishments"
|
|
9012
|
+
},
|
|
9013
|
+
{
|
|
9014
|
+
dataset: "partners",
|
|
9015
|
+
name: "load-partners",
|
|
9016
|
+
file: "load-partners.sql",
|
|
9017
|
+
include: "partners"
|
|
9018
|
+
},
|
|
9019
|
+
{
|
|
9020
|
+
dataset: "simples_options",
|
|
9021
|
+
name: "load-simples",
|
|
9022
|
+
file: "load-simples.sql",
|
|
9023
|
+
include: "simples"
|
|
9024
|
+
}
|
|
9025
|
+
];
|
|
9026
|
+
for (const item of datasetSteps) {
|
|
9027
|
+
const files = grouped[item.dataset] ?? [];
|
|
9028
|
+
const included = selected.has(item.include) && files.length > 0;
|
|
9029
|
+
steps.push(step(item.name, item.file, ["setup"], included));
|
|
9030
|
+
if (included) {
|
|
9031
|
+
scripts[item.file] = buildStepScript(
|
|
9032
|
+
`CNPJ DB Loader PostgreSQL direct import ${item.name} step`,
|
|
9033
|
+
rawStagingSql(item.dataset, files),
|
|
9034
|
+
input,
|
|
9035
|
+
true
|
|
9036
|
+
);
|
|
9037
|
+
}
|
|
9038
|
+
}
|
|
9039
|
+
const materializeIncluded = hasAnyFinalMaterialization(selected);
|
|
9040
|
+
steps.push(
|
|
9041
|
+
step(
|
|
9042
|
+
"materialize",
|
|
9043
|
+
"materialize.sql",
|
|
9044
|
+
datasetSteps.filter((item) => selected.has(item.include)).map((item) => item.name),
|
|
9045
|
+
materializeIncluded
|
|
9046
|
+
)
|
|
9047
|
+
);
|
|
9048
|
+
if (materializeIncluded) {
|
|
9049
|
+
scripts["materialize.sql"] = buildStepScript(
|
|
9050
|
+
"CNPJ DB Loader PostgreSQL direct import materialization step",
|
|
9051
|
+
materializeSql(selected),
|
|
9052
|
+
input,
|
|
9053
|
+
true
|
|
9054
|
+
);
|
|
9055
|
+
}
|
|
9056
|
+
const secondaryIncluded = selected.has("secondary-cnaes") && selected.has("establishments");
|
|
9057
|
+
steps.push(
|
|
9058
|
+
step(
|
|
9059
|
+
"materialize-secondary-cnaes",
|
|
9060
|
+
"materialize-secondary-cnaes.sql",
|
|
9061
|
+
["load-establishments"],
|
|
9062
|
+
secondaryIncluded
|
|
9063
|
+
)
|
|
9064
|
+
);
|
|
9065
|
+
if (secondaryIncluded) {
|
|
9066
|
+
scripts["materialize-secondary-cnaes.sql"] = buildStepScript(
|
|
9067
|
+
"CNPJ DB Loader PostgreSQL direct import secondary CNAEs step",
|
|
9068
|
+
[materializeSecondaryCnaesSql()],
|
|
9069
|
+
input,
|
|
9070
|
+
true
|
|
9071
|
+
);
|
|
9072
|
+
}
|
|
9073
|
+
const indexesIncluded = selected.has("indexes");
|
|
9074
|
+
steps.push(
|
|
9075
|
+
step(
|
|
9076
|
+
"indexes",
|
|
9077
|
+
"indexes.sql",
|
|
9078
|
+
materializeIncluded ? ["materialize"] : ["setup"],
|
|
9079
|
+
indexesIncluded
|
|
9080
|
+
)
|
|
9081
|
+
);
|
|
9082
|
+
if (indexesIncluded) {
|
|
9083
|
+
scripts["indexes.sql"] = buildStepScript(
|
|
9084
|
+
"CNPJ DB Loader PostgreSQL direct import indexes step",
|
|
9085
|
+
indexesSql(),
|
|
9086
|
+
input,
|
|
9087
|
+
true
|
|
9088
|
+
);
|
|
9089
|
+
}
|
|
9090
|
+
const analyzeIncluded = selected.has("analyze");
|
|
9091
|
+
const analyzeDependencies = [
|
|
9092
|
+
...domainsIncluded ? ["load-domains"] : [],
|
|
9093
|
+
...materializeIncluded ? ["materialize"] : [],
|
|
9094
|
+
...secondaryIncluded ? ["materialize-secondary-cnaes"] : []
|
|
9095
|
+
];
|
|
9096
|
+
steps.push(
|
|
9097
|
+
step(
|
|
9098
|
+
"analyze",
|
|
9099
|
+
"analyze.sql",
|
|
9100
|
+
analyzeDependencies.length > 0 ? analyzeDependencies : ["setup"],
|
|
9101
|
+
analyzeIncluded
|
|
9102
|
+
)
|
|
9103
|
+
);
|
|
9104
|
+
if (analyzeIncluded) {
|
|
9105
|
+
scripts["analyze.sql"] = buildStepScript(
|
|
9106
|
+
"CNPJ DB Loader PostgreSQL direct import analyze step",
|
|
9107
|
+
analyzeSql(selected),
|
|
9108
|
+
input,
|
|
9109
|
+
true
|
|
9110
|
+
);
|
|
9111
|
+
}
|
|
9112
|
+
const orchestratorLines = [
|
|
9113
|
+
"-- CNPJ DB Loader direct PostgreSQL import orchestrator",
|
|
8591
9114
|
"-- Generated from sanitized Receita files by cnpj-db-loader postgres generate-script.",
|
|
8592
|
-
"-- This path avoids rewriting the dataset into a second CSV tree.",
|
|
8593
9115
|
"-- Execute with psql, for example:",
|
|
8594
|
-
'-- psql "postgres://postgres:postgres@localhost:5432/cnpj" -f import-postgres-direct.sql',
|
|
9116
|
+
'-- psql -d "postgres://postgres:postgres@localhost:5432/cnpj" -f import-postgres-direct.sql',
|
|
8595
9117
|
"",
|
|
8596
9118
|
"\\set ON_ERROR_STOP on",
|
|
8597
|
-
|
|
9119
|
+
echo(
|
|
9120
|
+
`Using source file encoding ${input.sourceEncoding} for psql copy operations...`
|
|
9121
|
+
),
|
|
8598
9122
|
`set client_encoding to ${quoteSqlLiteral(input.sourceEncoding)};`,
|
|
8599
|
-
|
|
9123
|
+
echo(
|
|
9124
|
+
`Starting CNPJ DB Loader direct PostgreSQL import using transaction mode ${input.transactionMode}...`
|
|
9125
|
+
),
|
|
9126
|
+
"",
|
|
9127
|
+
...input.transactionMode === "single" ? ["begin;", ""] : []
|
|
9128
|
+
];
|
|
9129
|
+
for (const name of STEP_ORDER) {
|
|
9130
|
+
const currentStep = steps.find((item) => item.name === name);
|
|
9131
|
+
if (!currentStep?.included) {
|
|
9132
|
+
continue;
|
|
9133
|
+
}
|
|
9134
|
+
orchestratorLines.push(
|
|
9135
|
+
echo(
|
|
9136
|
+
`[orchestrator] Running ${currentStep.name} (${currentStep.file})...`
|
|
9137
|
+
),
|
|
9138
|
+
`\\ir ${currentStep.file}`,
|
|
9139
|
+
echo(`[orchestrator] Completed ${currentStep.name}.`),
|
|
9140
|
+
""
|
|
9141
|
+
);
|
|
9142
|
+
}
|
|
9143
|
+
orchestratorLines.push(
|
|
9144
|
+
...input.transactionMode === "single" ? ["commit;", ""] : [],
|
|
9145
|
+
echo("CNPJ DB Loader hybrid PostgreSQL import completed."),
|
|
9146
|
+
""
|
|
9147
|
+
);
|
|
9148
|
+
scripts["import-postgres-direct.sql"] = orchestratorLines.join("\n");
|
|
9149
|
+
return { scripts, steps };
|
|
9150
|
+
}
|
|
9151
|
+
function generatePostgresDirectImportScript(input) {
|
|
9152
|
+
const grouped = csvFilesByDataset(input.files);
|
|
9153
|
+
const lines = [
|
|
9154
|
+
"-- CNPJ DB Loader hybrid PostgreSQL import script",
|
|
9155
|
+
"-- Generated from PostgreSQL-ready CSV files exported by cnpj-db-loader postgres export-csv.",
|
|
9156
|
+
"-- Execute with psql, for example:",
|
|
9157
|
+
'-- psql -d "postgres://postgres:postgres@localhost:5432/cnpj" -f import-postgres-direct.sql',
|
|
9158
|
+
"",
|
|
9159
|
+
"\\set ON_ERROR_STOP on",
|
|
9160
|
+
echo("Starting CNPJ DB Loader hybrid PostgreSQL import..."),
|
|
8600
9161
|
"",
|
|
8601
9162
|
"begin;",
|
|
8602
9163
|
"",
|
|
8603
9164
|
"-- Keep the final schema and seed data managed by sql/schema.sql.",
|
|
8604
|
-
"-- This script
|
|
8605
|
-
"-- transforms values inside PostgreSQL, resets staging tables and upserts final data.",
|
|
9165
|
+
"-- This script only resets staging tables and then upserts final data.",
|
|
8606
9166
|
"truncate table staging_companies restart identity;",
|
|
8607
9167
|
"truncate table staging_establishments restart identity;",
|
|
8608
9168
|
"truncate table staging_partners restart identity;",
|
|
@@ -8610,10 +9170,10 @@ function generatePostgresSanitizedDirectImportScript(input) {
|
|
|
8610
9170
|
""
|
|
8611
9171
|
];
|
|
8612
9172
|
for (const dataset of DOMAIN_DATASETS) {
|
|
8613
|
-
lines.push(...
|
|
9173
|
+
lines.push(...copyDomainSql(dataset, grouped[dataset] ?? []), "");
|
|
8614
9174
|
}
|
|
8615
9175
|
for (const dataset of STAGING_DATASETS) {
|
|
8616
|
-
lines.push(...
|
|
9176
|
+
lines.push(...copyStagingSql(dataset, grouped[dataset] ?? []), "");
|
|
8617
9177
|
}
|
|
8618
9178
|
lines.push(...materializationAndAnalyzeSql());
|
|
8619
9179
|
return lines.join("\n");
|
|
@@ -8624,11 +9184,13 @@ function materializationAndAnalyzeSql() {
|
|
|
8624
9184
|
"",
|
|
8625
9185
|
materializeEstablishmentsSql(),
|
|
8626
9186
|
"",
|
|
9187
|
+
materializeSecondaryCnaesSql(),
|
|
9188
|
+
"",
|
|
8627
9189
|
materializePartnersSql(),
|
|
8628
9190
|
"",
|
|
8629
9191
|
materializeSimplesSql(),
|
|
8630
9192
|
"",
|
|
8631
|
-
"
|
|
9193
|
+
echo("Refreshing planner statistics..."),
|
|
8632
9194
|
"analyze companies;",
|
|
8633
9195
|
"analyze establishments;",
|
|
8634
9196
|
"analyze establishment_secondary_cnaes;",
|
|
@@ -8643,7 +9205,7 @@ function materializationAndAnalyzeSql() {
|
|
|
8643
9205
|
"",
|
|
8644
9206
|
"commit;",
|
|
8645
9207
|
"",
|
|
8646
|
-
"
|
|
9208
|
+
echo("CNPJ DB Loader hybrid PostgreSQL import completed."),
|
|
8647
9209
|
""
|
|
8648
9210
|
];
|
|
8649
9211
|
}
|
|
@@ -8849,7 +9411,30 @@ async function exportPostgresCsvDataset(inputPath, options = {}) {
|
|
|
8849
9411
|
// src/services/postgres-direct/generator.ts
|
|
8850
9412
|
import { mkdir as mkdir9, stat as stat7, writeFile as writeFile6 } from "fs/promises";
|
|
8851
9413
|
import path17 from "path";
|
|
8852
|
-
var DEFAULT_SOURCE_ENCODING = "
|
|
9414
|
+
var DEFAULT_SOURCE_ENCODING = "UTF8";
|
|
9415
|
+
var DEFAULT_TRANSACTION_MODE = "single";
|
|
9416
|
+
var ALL_INCLUDE_TARGETS = [
|
|
9417
|
+
"domains",
|
|
9418
|
+
"companies",
|
|
9419
|
+
"establishments",
|
|
9420
|
+
"partners",
|
|
9421
|
+
"simples",
|
|
9422
|
+
"secondary-cnaes",
|
|
9423
|
+
"indexes",
|
|
9424
|
+
"analyze"
|
|
9425
|
+
];
|
|
9426
|
+
var INCLUDE_TARGETS_BY_DATASET = {
|
|
9427
|
+
companies: "companies",
|
|
9428
|
+
establishments: "establishments",
|
|
9429
|
+
partners: "partners",
|
|
9430
|
+
simples_options: "simples",
|
|
9431
|
+
countries: "domains",
|
|
9432
|
+
cities: "domains",
|
|
9433
|
+
partner_qualifications: "domains",
|
|
9434
|
+
legal_natures: "domains",
|
|
9435
|
+
reasons: "domains",
|
|
9436
|
+
cnaes: "domains"
|
|
9437
|
+
};
|
|
8853
9438
|
function defaultPostgresDirectOutputPath(inputPath) {
|
|
8854
9439
|
const baseName = path17.basename(inputPath);
|
|
8855
9440
|
if (baseName.toLowerCase() === "sanitized") {
|
|
@@ -8858,17 +9443,52 @@ function defaultPostgresDirectOutputPath(inputPath) {
|
|
|
8858
9443
|
return path17.join(path17.dirname(inputPath), `${baseName}-postgres-direct`);
|
|
8859
9444
|
}
|
|
8860
9445
|
function inferNextStep5(scriptPath) {
|
|
8861
|
-
return `psql "postgres://postgres:postgres@localhost:5432/cnpj" -f ${scriptPath.replace(/\\/g, "/")}`;
|
|
9446
|
+
return `psql -d "postgres://postgres:postgres@localhost:5432/cnpj" -f ${scriptPath.replace(/\\/g, "/")}`;
|
|
8862
9447
|
}
|
|
8863
9448
|
function normalizeSourceEncoding(value) {
|
|
8864
9449
|
const encoding = (value ?? DEFAULT_SOURCE_ENCODING).trim();
|
|
8865
9450
|
if (!/^[A-Za-z0-9_-]+$/.test(encoding)) {
|
|
8866
9451
|
throw new ValidationError(
|
|
8867
|
-
`Invalid source encoding: ${value}. Use a PostgreSQL client encoding name such as WIN1252 or
|
|
9452
|
+
`Invalid source encoding: ${value}. Use a PostgreSQL client encoding name such as UTF8, WIN1252 or LATIN1.`
|
|
8868
9453
|
);
|
|
8869
9454
|
}
|
|
8870
9455
|
return encoding.toUpperCase();
|
|
8871
9456
|
}
|
|
9457
|
+
function normalizeTransactionMode(value) {
|
|
9458
|
+
const mode = value ?? DEFAULT_TRANSACTION_MODE;
|
|
9459
|
+
if (!["single", "phase", "none"].includes(mode)) {
|
|
9460
|
+
throw new ValidationError(
|
|
9461
|
+
`Invalid transaction mode: ${String(value)}. Use single, phase or none.`
|
|
9462
|
+
);
|
|
9463
|
+
}
|
|
9464
|
+
return mode;
|
|
9465
|
+
}
|
|
9466
|
+
function isIncludeTarget(value) {
|
|
9467
|
+
return ALL_INCLUDE_TARGETS.includes(value);
|
|
9468
|
+
}
|
|
9469
|
+
function normalizeIncludeTargets(include, dataset) {
|
|
9470
|
+
if (include && include.length > 0) {
|
|
9471
|
+
const unique = [...new Set(include)];
|
|
9472
|
+
const invalid = unique.filter((item) => !isIncludeTarget(item));
|
|
9473
|
+
if (invalid.length > 0) {
|
|
9474
|
+
throw new ValidationError(
|
|
9475
|
+
`Invalid include target(s): ${invalid.join(", ")}. Use ${ALL_INCLUDE_TARGETS.join(", ")}.`
|
|
9476
|
+
);
|
|
9477
|
+
}
|
|
9478
|
+
return unique;
|
|
9479
|
+
}
|
|
9480
|
+
if (dataset) {
|
|
9481
|
+
const target = INCLUDE_TARGETS_BY_DATASET[dataset];
|
|
9482
|
+
if (!target) {
|
|
9483
|
+
return [];
|
|
9484
|
+
}
|
|
9485
|
+
if (target === "establishments") {
|
|
9486
|
+
return ["establishments", "secondary-cnaes", "analyze"];
|
|
9487
|
+
}
|
|
9488
|
+
return [target, "analyze"];
|
|
9489
|
+
}
|
|
9490
|
+
return [...ALL_INCLUDE_TARGETS];
|
|
9491
|
+
}
|
|
8872
9492
|
async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
8873
9493
|
if (options.dataset && !isImportDatasetType(options.dataset)) {
|
|
8874
9494
|
throw new ValidationError(`Unsupported dataset type: ${options.dataset}.`);
|
|
@@ -8884,6 +9504,10 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
|
8884
9504
|
options.outputPath ?? defaultPostgresDirectOutputPath(validatedPath)
|
|
8885
9505
|
);
|
|
8886
9506
|
const sourceEncoding = normalizeSourceEncoding(options.sourceEncoding);
|
|
9507
|
+
const transactionMode = normalizeTransactionMode(options.transactionMode);
|
|
9508
|
+
const include = normalizeIncludeTargets(options.include, options.dataset);
|
|
9509
|
+
const skipIndexes = options.skipIndexes ?? false;
|
|
9510
|
+
const skipAnalyze = options.skipAnalyze ?? false;
|
|
8887
9511
|
const inspected = await inspectFiles(validatedPath);
|
|
8888
9512
|
const recognizedFiles = inspected.entries.filter((entry) => entry.entryKind === "file").flatMap((entry) => {
|
|
8889
9513
|
if (!isImportDatasetType(entry.inferredType)) {
|
|
@@ -8911,7 +9535,11 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
|
8911
9535
|
outputPath,
|
|
8912
9536
|
totalFiles: recognizedFiles.length,
|
|
8913
9537
|
datasets,
|
|
8914
|
-
sourceEncoding
|
|
9538
|
+
sourceEncoding,
|
|
9539
|
+
transactionMode,
|
|
9540
|
+
include,
|
|
9541
|
+
skipIndexes,
|
|
9542
|
+
skipAnalyze
|
|
8915
9543
|
});
|
|
8916
9544
|
await mkdir9(outputPath, { recursive: true });
|
|
8917
9545
|
const sourceFiles = [];
|
|
@@ -8947,11 +9575,21 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
|
8947
9575
|
}
|
|
8948
9576
|
const scriptName = options.scriptName ?? "import-postgres-direct.sql";
|
|
8949
9577
|
const scriptPath = path17.join(outputPath, scriptName);
|
|
8950
|
-
const
|
|
9578
|
+
const generated = generatePostgresDirectScriptFiles({
|
|
8951
9579
|
files: sourceFiles,
|
|
8952
|
-
sourceEncoding
|
|
9580
|
+
sourceEncoding,
|
|
9581
|
+
transactionMode,
|
|
9582
|
+
include,
|
|
9583
|
+
skipIndexes,
|
|
9584
|
+
skipAnalyze
|
|
8953
9585
|
});
|
|
8954
|
-
|
|
9586
|
+
const scriptFiles = [];
|
|
9587
|
+
for (const [fileName, script] of Object.entries(generated.scripts)) {
|
|
9588
|
+
const outputFileName = fileName === "import-postgres-direct.sql" ? scriptName : fileName;
|
|
9589
|
+
const outputFilePath = path17.join(outputPath, outputFileName);
|
|
9590
|
+
await writeFile6(outputFilePath, script, "utf8");
|
|
9591
|
+
scriptFiles.push(outputFilePath);
|
|
9592
|
+
}
|
|
8955
9593
|
const manifestPath = path17.join(outputPath, "manifest.json");
|
|
8956
9594
|
const summaryDatasets = [...summariesByDataset.values()].sort(
|
|
8957
9595
|
(left, right) => IMPORT_ORDER.indexOf(left.dataset) - IMPORT_ORDER.indexOf(right.dataset)
|
|
@@ -8963,13 +9601,19 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
|
8963
9601
|
const manifest = {
|
|
8964
9602
|
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
8965
9603
|
mode: "direct-sanitized-script",
|
|
9604
|
+
transactionMode,
|
|
9605
|
+
include,
|
|
9606
|
+
skipIndexes,
|
|
9607
|
+
skipAnalyze,
|
|
8966
9608
|
inputPath: path17.resolve(inputPath),
|
|
8967
9609
|
validatedPath,
|
|
8968
9610
|
outputPath,
|
|
8969
9611
|
scriptPath,
|
|
9612
|
+
scriptFiles,
|
|
8970
9613
|
sourceEncoding,
|
|
8971
9614
|
totalFiles: sourceFiles.length,
|
|
8972
9615
|
totalBytes,
|
|
9616
|
+
steps: generated.steps,
|
|
8973
9617
|
datasets: summaryDatasets
|
|
8974
9618
|
};
|
|
8975
9619
|
await writeFile6(
|
|
@@ -8992,14 +9636,19 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
|
8992
9636
|
scriptPath,
|
|
8993
9637
|
manifestPath,
|
|
8994
9638
|
sourceEncoding,
|
|
9639
|
+
transactionMode,
|
|
8995
9640
|
totalFiles: sourceFiles.length,
|
|
8996
9641
|
totalBytes,
|
|
8997
9642
|
datasets: summaryDatasets,
|
|
9643
|
+
scriptFiles,
|
|
9644
|
+
steps: generated.steps,
|
|
8998
9645
|
warnings: [
|
|
8999
9646
|
...validation.ok ? [] : validation.errors,
|
|
9000
9647
|
"This script imports sanitized Receita files directly with psql \\copy. It avoids rewriting the full dataset into a second CSV tree.",
|
|
9001
|
-
"The generated
|
|
9002
|
-
"
|
|
9648
|
+
"The generated scripts expect the database schema generated by cnpj-db-loader to be applied before execution.",
|
|
9649
|
+
"The direct PostgreSQL script now defaults to UTF8 because the sanitize command writes clean UTF-8 files.",
|
|
9650
|
+
"Use --source-encoding WIN1252 or LATIN1 only when generating scripts for legacy sanitized files produced by older loader versions.",
|
|
9651
|
+
"The generated import is now modular. Use import-postgres-direct.sql as the orchestrator or run individual phase scripts manually."
|
|
9003
9652
|
],
|
|
9004
9653
|
nextStep: inferNextStep5(scriptPath)
|
|
9005
9654
|
};
|