@danielarndt0/cnpj-db-loader 2.4.0-beta.2 → 2.4.0-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/cli.js +556 -77
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +20 -1
- package/dist/index.js +519 -76
- package/dist/index.js.map +1 -1
- package/docs/commands.md +5 -1
- package/docs/postgres-direct.md +235 -41
- package/docs/releases/v2.4.0-beta.3.md +42 -0
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -8411,6 +8411,18 @@ var STAGING_TABLE_BY_DATASET3 = {
|
|
|
8411
8411
|
partners: "staging_partners",
|
|
8412
8412
|
simples_options: "staging_simples_options"
|
|
8413
8413
|
};
|
|
8414
|
+
var STEP_ORDER = [
|
|
8415
|
+
"setup",
|
|
8416
|
+
"load-domains",
|
|
8417
|
+
"load-companies",
|
|
8418
|
+
"load-establishments",
|
|
8419
|
+
"load-partners",
|
|
8420
|
+
"load-simples",
|
|
8421
|
+
"materialize",
|
|
8422
|
+
"materialize-secondary-cnaes",
|
|
8423
|
+
"indexes",
|
|
8424
|
+
"analyze"
|
|
8425
|
+
];
|
|
8414
8426
|
function quoteSqlLiteral(value) {
|
|
8415
8427
|
return `'${value.replace(/'/g, "''")}'`;
|
|
8416
8428
|
}
|
|
@@ -8428,6 +8440,9 @@ function receitaCopyCommand(tableName, columns, filePath) {
|
|
|
8428
8440
|
const normalizedFilePath = normalizePathForPsql(filePath);
|
|
8429
8441
|
return `\\copy ${tableName} (${columns.join(", ")}) from ${quoteSqlLiteral(normalizedFilePath)} with (format csv, header false, delimiter ';', quote '"', escape '"')`;
|
|
8430
8442
|
}
|
|
8443
|
+
function echo(message) {
|
|
8444
|
+
return `\\echo ${quoteSqlLiteral(message)}`;
|
|
8445
|
+
}
|
|
8431
8446
|
function datasetColumns(dataset) {
|
|
8432
8447
|
return DATASET_LAYOUTS[dataset].fields.map((field) => field.columnName);
|
|
8433
8448
|
}
|
|
@@ -8454,7 +8469,7 @@ function partnerDedupeExpression(alias) {
|
|
|
8454
8469
|
function materializeCompaniesSql() {
|
|
8455
8470
|
const columns = companiesLayout.fields.map((field) => field.columnName);
|
|
8456
8471
|
return [
|
|
8457
|
-
"
|
|
8472
|
+
echo("[materialize] Materializing companies..."),
|
|
8458
8473
|
"with source as (",
|
|
8459
8474
|
" select",
|
|
8460
8475
|
` ${columns.map((column) => `source.${column}`).join(",\n ")},`,
|
|
@@ -8468,7 +8483,8 @@ function materializeCompaniesSql() {
|
|
|
8468
8483
|
`select ${columns.join(", ")}`,
|
|
8469
8484
|
"from deduped",
|
|
8470
8485
|
"on conflict (cnpj_root) do update set",
|
|
8471
|
-
` ${updateAssignments(columns, ["cnpj_root"])}
|
|
8486
|
+
` ${updateAssignments(columns, ["cnpj_root"])};`,
|
|
8487
|
+
echo("[materialize] Companies materialization completed.")
|
|
8472
8488
|
].join("\n");
|
|
8473
8489
|
}
|
|
8474
8490
|
function materializeEstablishmentsSql() {
|
|
@@ -8477,7 +8493,7 @@ function materializeEstablishmentsSql() {
|
|
|
8477
8493
|
);
|
|
8478
8494
|
const insertColumns = [...baseColumns, "cnpj_full"];
|
|
8479
8495
|
return [
|
|
8480
|
-
"
|
|
8496
|
+
echo("[materialize] Materializing establishments..."),
|
|
8481
8497
|
"with source as (",
|
|
8482
8498
|
" select",
|
|
8483
8499
|
` ${baseColumns.map((column) => `source.${column}`).join(",\n ")},`,
|
|
@@ -8487,14 +8503,29 @@ function materializeEstablishmentsSql() {
|
|
|
8487
8503
|
"),",
|
|
8488
8504
|
"deduped as (",
|
|
8489
8505
|
" select * from source where dedupe_rank = 1",
|
|
8506
|
+
")",
|
|
8507
|
+
`insert into establishments (${insertColumns.join(", ")})`,
|
|
8508
|
+
`select ${insertColumns.join(", ")}`,
|
|
8509
|
+
"from deduped",
|
|
8510
|
+
"on conflict (cnpj_full) do update set",
|
|
8511
|
+
` ${updateAssignments(insertColumns, ["cnpj_root", "cnpj_order", "cnpj_check_digits", "cnpj_full"])};`,
|
|
8512
|
+
echo("[materialize] Establishments materialization completed.")
|
|
8513
|
+
].join("\n");
|
|
8514
|
+
}
|
|
8515
|
+
function materializeSecondaryCnaesSql() {
|
|
8516
|
+
return [
|
|
8517
|
+
echo(
|
|
8518
|
+
"[materialize-secondary-cnaes] Materializing establishment secondary CNAEs..."
|
|
8519
|
+
),
|
|
8520
|
+
"with source as (",
|
|
8521
|
+
" select",
|
|
8522
|
+
" staging.cnpj_root || staging.cnpj_order || staging.cnpj_check_digits as cnpj_full,",
|
|
8523
|
+
" staging.secondary_cnaes_raw,",
|
|
8524
|
+
" row_number() over (partition by staging.cnpj_root || staging.cnpj_order || staging.cnpj_check_digits order by staging.staging_id desc) as dedupe_rank",
|
|
8525
|
+
" from staging_establishments staging",
|
|
8490
8526
|
"),",
|
|
8491
|
-
"
|
|
8492
|
-
|
|
8493
|
-
` select ${insertColumns.join(", ")}`,
|
|
8494
|
-
" from deduped",
|
|
8495
|
-
" on conflict (cnpj_full) do update set",
|
|
8496
|
-
` ${updateAssignments(insertColumns, ["cnpj_root", "cnpj_order", "cnpj_check_digits", "cnpj_full"])}`,
|
|
8497
|
-
" returning cnpj_full",
|
|
8527
|
+
"deduped as (",
|
|
8528
|
+
" select * from source where dedupe_rank = 1",
|
|
8498
8529
|
"),",
|
|
8499
8530
|
"deleted_secondary_cnaes as (",
|
|
8500
8531
|
" delete from establishment_secondary_cnaes target",
|
|
@@ -8515,14 +8546,17 @@ function materializeEstablishmentsSql() {
|
|
|
8515
8546
|
"insert into establishment_secondary_cnaes (cnpj_full, cnae_code)",
|
|
8516
8547
|
"select cnpj_full, cnae_code",
|
|
8517
8548
|
"from secondary_cnaes_source",
|
|
8518
|
-
"on conflict (cnpj_full, cnae_code) do nothing;"
|
|
8549
|
+
"on conflict (cnpj_full, cnae_code) do nothing;",
|
|
8550
|
+
echo(
|
|
8551
|
+
"[materialize-secondary-cnaes] Secondary CNAEs materialization completed."
|
|
8552
|
+
)
|
|
8519
8553
|
].join("\n");
|
|
8520
8554
|
}
|
|
8521
8555
|
function materializePartnersSql() {
|
|
8522
8556
|
const baseColumns = partnersLayout.fields.map((field) => field.columnName);
|
|
8523
8557
|
const insertColumns = [...baseColumns, "partner_dedupe_key"];
|
|
8524
8558
|
return [
|
|
8525
|
-
"
|
|
8559
|
+
echo("[materialize] Materializing partners..."),
|
|
8526
8560
|
"with source as (",
|
|
8527
8561
|
" select",
|
|
8528
8562
|
` ${baseColumns.map((column) => `source.${column}`).join(",\n ")},`,
|
|
@@ -8542,13 +8576,14 @@ function materializePartnersSql() {
|
|
|
8542
8576
|
`select ${insertColumns.join(", ")}`,
|
|
8543
8577
|
"from deduped",
|
|
8544
8578
|
"on conflict (partner_dedupe_key) do update set",
|
|
8545
|
-
` ${updateAssignments(insertColumns, ["partner_dedupe_key"])}
|
|
8579
|
+
` ${updateAssignments(insertColumns, ["partner_dedupe_key"])};`,
|
|
8580
|
+
echo("[materialize] Partners materialization completed.")
|
|
8546
8581
|
].join("\n");
|
|
8547
8582
|
}
|
|
8548
8583
|
function materializeSimplesSql() {
|
|
8549
8584
|
const columns = simplesLayout.fields.map((field) => field.columnName);
|
|
8550
8585
|
return [
|
|
8551
|
-
"
|
|
8586
|
+
echo("[materialize] Materializing simples options..."),
|
|
8552
8587
|
"with source as (",
|
|
8553
8588
|
" select",
|
|
8554
8589
|
` ${columns.map((column) => `source.${column}`).join(",\n ")},`,
|
|
@@ -8562,7 +8597,8 @@ function materializeSimplesSql() {
|
|
|
8562
8597
|
`select ${columns.join(", ")}`,
|
|
8563
8598
|
"from deduped",
|
|
8564
8599
|
"on conflict (cnpj_root) do update set",
|
|
8565
|
-
` ${updateAssignments(columns, ["cnpj_root"])}
|
|
8600
|
+
` ${updateAssignments(columns, ["cnpj_root"])};`,
|
|
8601
|
+
echo("[materialize] Simples options materialization completed.")
|
|
8566
8602
|
].join("\n");
|
|
8567
8603
|
}
|
|
8568
8604
|
function copyDomainSql(dataset, files) {
|
|
@@ -8572,12 +8608,20 @@ function copyDomainSql(dataset, files) {
|
|
|
8572
8608
|
const columns = datasetColumns(dataset);
|
|
8573
8609
|
const tempTable = `tmp_hybrid_${dataset}`;
|
|
8574
8610
|
const lines = [
|
|
8575
|
-
|
|
8611
|
+
echo(`[load-domains] Loading ${dataset} lookup data...`),
|
|
8576
8612
|
`drop table if exists ${tempTable};`,
|
|
8577
8613
|
`create temporary table ${tempTable} (code text, description text);`
|
|
8578
8614
|
];
|
|
8579
|
-
for (const file of files) {
|
|
8580
|
-
lines.push(
|
|
8615
|
+
for (const [index, file] of files.entries()) {
|
|
8616
|
+
lines.push(
|
|
8617
|
+
echo(
|
|
8618
|
+
`[load-domains] Loading ${dataset} file ${index + 1} of ${files.length}: ${file.relativePath}`
|
|
8619
|
+
),
|
|
8620
|
+
csvCopyCommand(tempTable, columns, file.absolutePath),
|
|
8621
|
+
echo(
|
|
8622
|
+
`[load-domains] Loaded ${dataset} file ${index + 1} of ${files.length}.`
|
|
8623
|
+
)
|
|
8624
|
+
);
|
|
8581
8625
|
}
|
|
8582
8626
|
lines.push(
|
|
8583
8627
|
`insert into ${dataset} (${columns.join(", ")})`,
|
|
@@ -8598,12 +8642,17 @@ function copyStagingSql(dataset, files) {
|
|
|
8598
8642
|
return [];
|
|
8599
8643
|
}
|
|
8600
8644
|
const columns = datasetColumns(dataset);
|
|
8601
|
-
|
|
8602
|
-
|
|
8603
|
-
|
|
8604
|
-
(
|
|
8605
|
-
|
|
8606
|
-
|
|
8645
|
+
const lines = [echo(`[load-${dataset}] Loading ${dataset} staging data...`)];
|
|
8646
|
+
for (const [index, file] of files.entries()) {
|
|
8647
|
+
lines.push(
|
|
8648
|
+
echo(
|
|
8649
|
+
`[load-${dataset}] Loading file ${index + 1} of ${files.length}: ${file.relativePath}`
|
|
8650
|
+
),
|
|
8651
|
+
csvCopyCommand(tableName, columns, file.absolutePath),
|
|
8652
|
+
echo(`[load-${dataset}] Loaded file ${index + 1} of ${files.length}.`)
|
|
8653
|
+
);
|
|
8654
|
+
}
|
|
8655
|
+
return lines;
|
|
8607
8656
|
}
|
|
8608
8657
|
function csvFilesByDataset(files) {
|
|
8609
8658
|
const grouped = {};
|
|
@@ -8629,7 +8678,9 @@ function rawTableName(dataset) {
|
|
|
8629
8678
|
function createRawTempTableSql(dataset) {
|
|
8630
8679
|
const columns = DATASET_LAYOUTS[dataset].fields.map((field) => ` ${quoteIdentifier(field.columnName)} text`).join(",\n");
|
|
8631
8680
|
return [
|
|
8681
|
+
"set client_min_messages to warning;",
|
|
8632
8682
|
`drop table if exists ${rawTableName(dataset)};`,
|
|
8683
|
+
"reset client_min_messages;",
|
|
8633
8684
|
`create temporary table ${rawTableName(dataset)} (`,
|
|
8634
8685
|
columns,
|
|
8635
8686
|
");"
|
|
@@ -8711,11 +8762,21 @@ function rawDomainSql(dataset, files) {
|
|
|
8711
8762
|
const columns = layout.fields.map((field) => field.columnName);
|
|
8712
8763
|
const tableName = rawTableName(dataset);
|
|
8713
8764
|
const lines = [
|
|
8714
|
-
|
|
8765
|
+
echo(
|
|
8766
|
+
`[load-domains] Loading ${dataset} lookup data directly from sanitized Receita files...`
|
|
8767
|
+
),
|
|
8715
8768
|
createRawTempTableSql(dataset)
|
|
8716
8769
|
];
|
|
8717
|
-
for (const file of files) {
|
|
8718
|
-
lines.push(
|
|
8770
|
+
for (const [index, file] of files.entries()) {
|
|
8771
|
+
lines.push(
|
|
8772
|
+
echo(
|
|
8773
|
+
`[load-domains] Loading ${dataset} file ${index + 1} of ${files.length}: ${file.relativePath}`
|
|
8774
|
+
),
|
|
8775
|
+
receitaCopyCommand(tableName, columns, file.absolutePath),
|
|
8776
|
+
echo(
|
|
8777
|
+
`[load-domains] Loaded ${dataset} file ${index + 1} of ${files.length}.`
|
|
8778
|
+
)
|
|
8779
|
+
);
|
|
8719
8780
|
}
|
|
8720
8781
|
lines.push(
|
|
8721
8782
|
`insert into ${dataset} (${columns.join(", ")})`,
|
|
@@ -8725,7 +8786,8 @@ function rawDomainSql(dataset, files) {
|
|
|
8725
8786
|
`from ${tableName}`,
|
|
8726
8787
|
"where nullif(btrim(code), '') is not null",
|
|
8727
8788
|
"order by code",
|
|
8728
|
-
"on conflict (code) do update set description = excluded.description;"
|
|
8789
|
+
"on conflict (code) do update set description = excluded.description;",
|
|
8790
|
+
echo(`[load-domains] ${dataset} lookup data completed.`)
|
|
8729
8791
|
);
|
|
8730
8792
|
return lines;
|
|
8731
8793
|
}
|
|
@@ -8744,70 +8806,363 @@ function rawStagingSql(dataset, files) {
|
|
|
8744
8806
|
const expressions = layout.fields.map(
|
|
8745
8807
|
(field) => ` ${fieldExpression(dataset, field, alias)} as ${field.columnName}`
|
|
8746
8808
|
);
|
|
8809
|
+
const stepName = loadStepName(dataset);
|
|
8747
8810
|
const lines = [
|
|
8748
|
-
|
|
8811
|
+
echo(
|
|
8812
|
+
`[${stepName}] Loading ${dataset} staging data directly from sanitized Receita files...`
|
|
8813
|
+
),
|
|
8814
|
+
`truncate table ${targetTable} restart identity;`,
|
|
8749
8815
|
createRawTempTableSql(dataset)
|
|
8750
8816
|
];
|
|
8751
|
-
for (const file of files) {
|
|
8752
|
-
lines.push(
|
|
8817
|
+
for (const [index, file] of files.entries()) {
|
|
8818
|
+
lines.push(
|
|
8819
|
+
echo(
|
|
8820
|
+
`[${stepName}] Loading file ${index + 1} of ${files.length}: ${file.relativePath}`
|
|
8821
|
+
),
|
|
8822
|
+
receitaCopyCommand(tableName, columns, file.absolutePath),
|
|
8823
|
+
echo(`[${stepName}] Loaded file ${index + 1} of ${files.length}.`)
|
|
8824
|
+
);
|
|
8753
8825
|
}
|
|
8754
8826
|
lines.push(
|
|
8827
|
+
echo(
|
|
8828
|
+
`[${stepName}] Transforming ${dataset} raw rows into ${targetTable}...`
|
|
8829
|
+
),
|
|
8755
8830
|
`insert into ${targetTable} (${columns.join(", ")})`,
|
|
8756
8831
|
"select",
|
|
8757
8832
|
expressions.join(",\n"),
|
|
8758
|
-
`from ${tableName} ${alias}
|
|
8833
|
+
`from ${tableName} ${alias};`,
|
|
8834
|
+
echo(`[${stepName}] ${dataset} staging load completed.`)
|
|
8759
8835
|
);
|
|
8760
8836
|
return lines;
|
|
8761
8837
|
}
|
|
8762
|
-
function
|
|
8763
|
-
|
|
8764
|
-
|
|
8765
|
-
|
|
8766
|
-
|
|
8767
|
-
|
|
8768
|
-
|
|
8769
|
-
|
|
8838
|
+
function loadStepName(dataset) {
|
|
8839
|
+
switch (dataset) {
|
|
8840
|
+
case "companies":
|
|
8841
|
+
return "load-companies";
|
|
8842
|
+
case "establishments":
|
|
8843
|
+
return "load-establishments";
|
|
8844
|
+
case "partners":
|
|
8845
|
+
return "load-partners";
|
|
8846
|
+
case "simples_options":
|
|
8847
|
+
return "load-simples";
|
|
8848
|
+
default:
|
|
8849
|
+
return `load-${dataset}`;
|
|
8850
|
+
}
|
|
8851
|
+
}
|
|
8852
|
+
function scriptHeader(title, sourceEncoding) {
|
|
8853
|
+
return [
|
|
8854
|
+
`-- ${title}`,
|
|
8855
|
+
"-- Generated by cnpj-db-loader postgres generate-script.",
|
|
8770
8856
|
"\\set ON_ERROR_STOP on",
|
|
8771
|
-
|
|
8772
|
-
|
|
8773
|
-
|
|
8774
|
-
|
|
8775
|
-
|
|
8776
|
-
|
|
8777
|
-
"truncate table staging_companies restart identity;",
|
|
8778
|
-
"truncate table staging_establishments restart identity;",
|
|
8779
|
-
"truncate table staging_partners restart identity;",
|
|
8780
|
-
"truncate table staging_simples_options restart identity;",
|
|
8857
|
+
...sourceEncoding ? [
|
|
8858
|
+
echo(
|
|
8859
|
+
`Using source file encoding ${sourceEncoding} for psql copy operations...`
|
|
8860
|
+
),
|
|
8861
|
+
`set client_encoding to ${quoteSqlLiteral(sourceEncoding)};`
|
|
8862
|
+
] : [],
|
|
8781
8863
|
""
|
|
8782
8864
|
];
|
|
8783
|
-
|
|
8784
|
-
|
|
8865
|
+
}
|
|
8866
|
+
function wrapTransaction(lines, mode, shouldWrap) {
|
|
8867
|
+
if (!shouldWrap || mode !== "phase") {
|
|
8868
|
+
return [...lines];
|
|
8785
8869
|
}
|
|
8786
|
-
|
|
8787
|
-
|
|
8870
|
+
return ["begin;", "", ...lines, "", "commit;"];
|
|
8871
|
+
}
|
|
8872
|
+
function buildStepScript(title, body, input, wrapInPhaseTransaction) {
|
|
8873
|
+
return [
|
|
8874
|
+
...scriptHeader(title, input.sourceEncoding),
|
|
8875
|
+
...wrapTransaction(body, input.transactionMode, wrapInPhaseTransaction),
|
|
8876
|
+
""
|
|
8877
|
+
].join("\n");
|
|
8878
|
+
}
|
|
8879
|
+
function includeSet(input) {
|
|
8880
|
+
const selected = new Set(input.include);
|
|
8881
|
+
if (input.skipIndexes) {
|
|
8882
|
+
selected.delete("indexes");
|
|
8788
8883
|
}
|
|
8789
|
-
|
|
8790
|
-
|
|
8884
|
+
if (input.skipAnalyze) {
|
|
8885
|
+
selected.delete("analyze");
|
|
8886
|
+
}
|
|
8887
|
+
return selected;
|
|
8888
|
+
}
|
|
8889
|
+
function hasAnyFinalMaterialization(selected) {
|
|
8890
|
+
return selected.has("companies") || selected.has("establishments") || selected.has("partners") || selected.has("simples");
|
|
8891
|
+
}
|
|
8892
|
+
function materializeSql(selected) {
|
|
8893
|
+
const lines = [echo("[materialize] Starting final table materialization...")];
|
|
8894
|
+
if (selected.has("companies")) {
|
|
8895
|
+
lines.push(materializeCompaniesSql(), "");
|
|
8896
|
+
}
|
|
8897
|
+
if (selected.has("establishments")) {
|
|
8898
|
+
lines.push(materializeEstablishmentsSql(), "");
|
|
8899
|
+
}
|
|
8900
|
+
if (selected.has("partners")) {
|
|
8901
|
+
lines.push(materializePartnersSql(), "");
|
|
8902
|
+
}
|
|
8903
|
+
if (selected.has("simples")) {
|
|
8904
|
+
lines.push(materializeSimplesSql(), "");
|
|
8905
|
+
}
|
|
8906
|
+
lines.push(echo("[materialize] Final table materialization completed."));
|
|
8907
|
+
return lines;
|
|
8908
|
+
}
|
|
8909
|
+
function indexesSql() {
|
|
8910
|
+
return [
|
|
8911
|
+
echo(
|
|
8912
|
+
"[indexes] No additional index operations are generated in this beta."
|
|
8913
|
+
),
|
|
8914
|
+
"-- Indexes are expected to be managed by the schema generated by cnpj-db-loader schema generate.",
|
|
8915
|
+
"-- A future fast-rebuild mode may generate DROP/CREATE INDEX operations here."
|
|
8916
|
+
];
|
|
8917
|
+
}
|
|
8918
|
+
function analyzeSql(selected) {
|
|
8919
|
+
const tables = /* @__PURE__ */ new Set();
|
|
8920
|
+
if (selected.has("companies")) {
|
|
8921
|
+
tables.add("companies");
|
|
8922
|
+
}
|
|
8923
|
+
if (selected.has("establishments")) {
|
|
8924
|
+
tables.add("establishments");
|
|
8925
|
+
}
|
|
8926
|
+
if (selected.has("secondary-cnaes")) {
|
|
8927
|
+
tables.add("establishment_secondary_cnaes");
|
|
8928
|
+
}
|
|
8929
|
+
if (selected.has("partners")) {
|
|
8930
|
+
tables.add("partners");
|
|
8931
|
+
}
|
|
8932
|
+
if (selected.has("simples")) {
|
|
8933
|
+
tables.add("simples_options");
|
|
8934
|
+
}
|
|
8935
|
+
if (selected.has("domains")) {
|
|
8936
|
+
for (const dataset of DOMAIN_DATASETS) {
|
|
8937
|
+
tables.add(dataset);
|
|
8938
|
+
}
|
|
8939
|
+
}
|
|
8940
|
+
return [
|
|
8941
|
+
echo("[analyze] Refreshing planner statistics..."),
|
|
8942
|
+
...[...tables].map((table) => `analyze ${table};`),
|
|
8943
|
+
echo("[analyze] Planner statistics refreshed.")
|
|
8944
|
+
];
|
|
8791
8945
|
}
|
|
8792
|
-
function
|
|
8946
|
+
function step(name, file, dependsOn, included) {
|
|
8947
|
+
return { name, file, dependsOn, included };
|
|
8948
|
+
}
|
|
8949
|
+
function generatePostgresDirectScriptFiles(input) {
|
|
8793
8950
|
const grouped = directFilesByDataset(input.files);
|
|
8794
|
-
const
|
|
8795
|
-
|
|
8951
|
+
const selected = includeSet(input);
|
|
8952
|
+
if (!DOMAIN_DATASETS.some((dataset) => (grouped[dataset] ?? []).length > 0)) {
|
|
8953
|
+
selected.delete("domains");
|
|
8954
|
+
}
|
|
8955
|
+
if ((grouped.companies ?? []).length === 0) {
|
|
8956
|
+
selected.delete("companies");
|
|
8957
|
+
}
|
|
8958
|
+
if ((grouped.establishments ?? []).length === 0) {
|
|
8959
|
+
selected.delete("establishments");
|
|
8960
|
+
selected.delete("secondary-cnaes");
|
|
8961
|
+
}
|
|
8962
|
+
if ((grouped.partners ?? []).length === 0) {
|
|
8963
|
+
selected.delete("partners");
|
|
8964
|
+
}
|
|
8965
|
+
if ((grouped.simples_options ?? []).length === 0) {
|
|
8966
|
+
selected.delete("simples");
|
|
8967
|
+
}
|
|
8968
|
+
const scripts = {};
|
|
8969
|
+
const steps = [];
|
|
8970
|
+
const setupIncluded = true;
|
|
8971
|
+
steps.push(step("setup", "setup.sql", [], setupIncluded));
|
|
8972
|
+
scripts["setup.sql"] = [
|
|
8973
|
+
...scriptHeader(
|
|
8974
|
+
"CNPJ DB Loader PostgreSQL direct import setup",
|
|
8975
|
+
input.sourceEncoding
|
|
8976
|
+
),
|
|
8977
|
+
echo("[setup] Preparing PostgreSQL direct import session..."),
|
|
8978
|
+
"-- The database schema must be applied before running these scripts.",
|
|
8979
|
+
"-- This setup script configures the psql session used by the generated orchestrator.",
|
|
8980
|
+
echo("[setup] Setup completed."),
|
|
8981
|
+
""
|
|
8982
|
+
].join("\n");
|
|
8983
|
+
const domainsIncluded = selected.has("domains") && DOMAIN_DATASETS.some((dataset) => (grouped[dataset] ?? []).length > 0);
|
|
8984
|
+
steps.push(
|
|
8985
|
+
step("load-domains", "load-domains.sql", ["setup"], domainsIncluded)
|
|
8986
|
+
);
|
|
8987
|
+
if (domainsIncluded) {
|
|
8988
|
+
const lines = [echo("[load-domains] Starting domain tables load...")];
|
|
8989
|
+
for (const dataset of DOMAIN_DATASETS) {
|
|
8990
|
+
lines.push(...rawDomainSql(dataset, grouped[dataset] ?? []), "");
|
|
8991
|
+
}
|
|
8992
|
+
lines.push(echo("[load-domains] Domain tables load completed."));
|
|
8993
|
+
scripts["load-domains.sql"] = buildStepScript(
|
|
8994
|
+
"CNPJ DB Loader PostgreSQL direct import domains step",
|
|
8995
|
+
lines,
|
|
8996
|
+
input,
|
|
8997
|
+
true
|
|
8998
|
+
);
|
|
8999
|
+
}
|
|
9000
|
+
const datasetSteps = [
|
|
9001
|
+
{
|
|
9002
|
+
dataset: "companies",
|
|
9003
|
+
name: "load-companies",
|
|
9004
|
+
file: "load-companies.sql",
|
|
9005
|
+
include: "companies"
|
|
9006
|
+
},
|
|
9007
|
+
{
|
|
9008
|
+
dataset: "establishments",
|
|
9009
|
+
name: "load-establishments",
|
|
9010
|
+
file: "load-establishments.sql",
|
|
9011
|
+
include: "establishments"
|
|
9012
|
+
},
|
|
9013
|
+
{
|
|
9014
|
+
dataset: "partners",
|
|
9015
|
+
name: "load-partners",
|
|
9016
|
+
file: "load-partners.sql",
|
|
9017
|
+
include: "partners"
|
|
9018
|
+
},
|
|
9019
|
+
{
|
|
9020
|
+
dataset: "simples_options",
|
|
9021
|
+
name: "load-simples",
|
|
9022
|
+
file: "load-simples.sql",
|
|
9023
|
+
include: "simples"
|
|
9024
|
+
}
|
|
9025
|
+
];
|
|
9026
|
+
for (const item of datasetSteps) {
|
|
9027
|
+
const files = grouped[item.dataset] ?? [];
|
|
9028
|
+
const included = selected.has(item.include) && files.length > 0;
|
|
9029
|
+
steps.push(step(item.name, item.file, ["setup"], included));
|
|
9030
|
+
if (included) {
|
|
9031
|
+
scripts[item.file] = buildStepScript(
|
|
9032
|
+
`CNPJ DB Loader PostgreSQL direct import ${item.name} step`,
|
|
9033
|
+
rawStagingSql(item.dataset, files),
|
|
9034
|
+
input,
|
|
9035
|
+
true
|
|
9036
|
+
);
|
|
9037
|
+
}
|
|
9038
|
+
}
|
|
9039
|
+
const materializeIncluded = hasAnyFinalMaterialization(selected);
|
|
9040
|
+
steps.push(
|
|
9041
|
+
step(
|
|
9042
|
+
"materialize",
|
|
9043
|
+
"materialize.sql",
|
|
9044
|
+
datasetSteps.filter((item) => selected.has(item.include)).map((item) => item.name),
|
|
9045
|
+
materializeIncluded
|
|
9046
|
+
)
|
|
9047
|
+
);
|
|
9048
|
+
if (materializeIncluded) {
|
|
9049
|
+
scripts["materialize.sql"] = buildStepScript(
|
|
9050
|
+
"CNPJ DB Loader PostgreSQL direct import materialization step",
|
|
9051
|
+
materializeSql(selected),
|
|
9052
|
+
input,
|
|
9053
|
+
true
|
|
9054
|
+
);
|
|
9055
|
+
}
|
|
9056
|
+
const secondaryIncluded = selected.has("secondary-cnaes") && selected.has("establishments");
|
|
9057
|
+
steps.push(
|
|
9058
|
+
step(
|
|
9059
|
+
"materialize-secondary-cnaes",
|
|
9060
|
+
"materialize-secondary-cnaes.sql",
|
|
9061
|
+
["load-establishments"],
|
|
9062
|
+
secondaryIncluded
|
|
9063
|
+
)
|
|
9064
|
+
);
|
|
9065
|
+
if (secondaryIncluded) {
|
|
9066
|
+
scripts["materialize-secondary-cnaes.sql"] = buildStepScript(
|
|
9067
|
+
"CNPJ DB Loader PostgreSQL direct import secondary CNAEs step",
|
|
9068
|
+
[materializeSecondaryCnaesSql()],
|
|
9069
|
+
input,
|
|
9070
|
+
true
|
|
9071
|
+
);
|
|
9072
|
+
}
|
|
9073
|
+
const indexesIncluded = selected.has("indexes");
|
|
9074
|
+
steps.push(
|
|
9075
|
+
step(
|
|
9076
|
+
"indexes",
|
|
9077
|
+
"indexes.sql",
|
|
9078
|
+
materializeIncluded ? ["materialize"] : ["setup"],
|
|
9079
|
+
indexesIncluded
|
|
9080
|
+
)
|
|
9081
|
+
);
|
|
9082
|
+
if (indexesIncluded) {
|
|
9083
|
+
scripts["indexes.sql"] = buildStepScript(
|
|
9084
|
+
"CNPJ DB Loader PostgreSQL direct import indexes step",
|
|
9085
|
+
indexesSql(),
|
|
9086
|
+
input,
|
|
9087
|
+
true
|
|
9088
|
+
);
|
|
9089
|
+
}
|
|
9090
|
+
const analyzeIncluded = selected.has("analyze");
|
|
9091
|
+
const analyzeDependencies = [
|
|
9092
|
+
...domainsIncluded ? ["load-domains"] : [],
|
|
9093
|
+
...materializeIncluded ? ["materialize"] : [],
|
|
9094
|
+
...secondaryIncluded ? ["materialize-secondary-cnaes"] : []
|
|
9095
|
+
];
|
|
9096
|
+
steps.push(
|
|
9097
|
+
step(
|
|
9098
|
+
"analyze",
|
|
9099
|
+
"analyze.sql",
|
|
9100
|
+
analyzeDependencies.length > 0 ? analyzeDependencies : ["setup"],
|
|
9101
|
+
analyzeIncluded
|
|
9102
|
+
)
|
|
9103
|
+
);
|
|
9104
|
+
if (analyzeIncluded) {
|
|
9105
|
+
scripts["analyze.sql"] = buildStepScript(
|
|
9106
|
+
"CNPJ DB Loader PostgreSQL direct import analyze step",
|
|
9107
|
+
analyzeSql(selected),
|
|
9108
|
+
input,
|
|
9109
|
+
true
|
|
9110
|
+
);
|
|
9111
|
+
}
|
|
9112
|
+
const orchestratorLines = [
|
|
9113
|
+
"-- CNPJ DB Loader direct PostgreSQL import orchestrator",
|
|
8796
9114
|
"-- Generated from sanitized Receita files by cnpj-db-loader postgres generate-script.",
|
|
8797
|
-
"-- This path avoids rewriting the dataset into a second CSV tree.",
|
|
8798
9115
|
"-- Execute with psql, for example:",
|
|
8799
|
-
'-- psql "postgres://postgres:postgres@localhost:5432/cnpj" -f import-postgres-direct.sql',
|
|
9116
|
+
'-- psql -d "postgres://postgres:postgres@localhost:5432/cnpj" -f import-postgres-direct.sql',
|
|
8800
9117
|
"",
|
|
8801
9118
|
"\\set ON_ERROR_STOP on",
|
|
8802
|
-
|
|
9119
|
+
echo(
|
|
9120
|
+
`Using source file encoding ${input.sourceEncoding} for psql copy operations...`
|
|
9121
|
+
),
|
|
8803
9122
|
`set client_encoding to ${quoteSqlLiteral(input.sourceEncoding)};`,
|
|
8804
|
-
|
|
9123
|
+
echo(
|
|
9124
|
+
`Starting CNPJ DB Loader direct PostgreSQL import using transaction mode ${input.transactionMode}...`
|
|
9125
|
+
),
|
|
9126
|
+
"",
|
|
9127
|
+
...input.transactionMode === "single" ? ["begin;", ""] : []
|
|
9128
|
+
];
|
|
9129
|
+
for (const name of STEP_ORDER) {
|
|
9130
|
+
const currentStep = steps.find((item) => item.name === name);
|
|
9131
|
+
if (!currentStep?.included) {
|
|
9132
|
+
continue;
|
|
9133
|
+
}
|
|
9134
|
+
orchestratorLines.push(
|
|
9135
|
+
echo(
|
|
9136
|
+
`[orchestrator] Running ${currentStep.name} (${currentStep.file})...`
|
|
9137
|
+
),
|
|
9138
|
+
`\\ir ${currentStep.file}`,
|
|
9139
|
+
echo(`[orchestrator] Completed ${currentStep.name}.`),
|
|
9140
|
+
""
|
|
9141
|
+
);
|
|
9142
|
+
}
|
|
9143
|
+
orchestratorLines.push(
|
|
9144
|
+
...input.transactionMode === "single" ? ["commit;", ""] : [],
|
|
9145
|
+
echo("CNPJ DB Loader hybrid PostgreSQL import completed."),
|
|
9146
|
+
""
|
|
9147
|
+
);
|
|
9148
|
+
scripts["import-postgres-direct.sql"] = orchestratorLines.join("\n");
|
|
9149
|
+
return { scripts, steps };
|
|
9150
|
+
}
|
|
9151
|
+
function generatePostgresDirectImportScript(input) {
|
|
9152
|
+
const grouped = csvFilesByDataset(input.files);
|
|
9153
|
+
const lines = [
|
|
9154
|
+
"-- CNPJ DB Loader hybrid PostgreSQL import script",
|
|
9155
|
+
"-- Generated from PostgreSQL-ready CSV files exported by cnpj-db-loader postgres export-csv.",
|
|
9156
|
+
"-- Execute with psql, for example:",
|
|
9157
|
+
'-- psql -d "postgres://postgres:postgres@localhost:5432/cnpj" -f import-postgres-direct.sql',
|
|
9158
|
+
"",
|
|
9159
|
+
"\\set ON_ERROR_STOP on",
|
|
9160
|
+
echo("Starting CNPJ DB Loader hybrid PostgreSQL import..."),
|
|
8805
9161
|
"",
|
|
8806
9162
|
"begin;",
|
|
8807
9163
|
"",
|
|
8808
9164
|
"-- Keep the final schema and seed data managed by sql/schema.sql.",
|
|
8809
|
-
"-- This script
|
|
8810
|
-
"-- transforms values inside PostgreSQL, resets staging tables and upserts final data.",
|
|
9165
|
+
"-- This script only resets staging tables and then upserts final data.",
|
|
8811
9166
|
"truncate table staging_companies restart identity;",
|
|
8812
9167
|
"truncate table staging_establishments restart identity;",
|
|
8813
9168
|
"truncate table staging_partners restart identity;",
|
|
@@ -8815,10 +9170,10 @@ function generatePostgresSanitizedDirectImportScript(input) {
|
|
|
8815
9170
|
""
|
|
8816
9171
|
];
|
|
8817
9172
|
for (const dataset of DOMAIN_DATASETS) {
|
|
8818
|
-
lines.push(...
|
|
9173
|
+
lines.push(...copyDomainSql(dataset, grouped[dataset] ?? []), "");
|
|
8819
9174
|
}
|
|
8820
9175
|
for (const dataset of STAGING_DATASETS) {
|
|
8821
|
-
lines.push(...
|
|
9176
|
+
lines.push(...copyStagingSql(dataset, grouped[dataset] ?? []), "");
|
|
8822
9177
|
}
|
|
8823
9178
|
lines.push(...materializationAndAnalyzeSql());
|
|
8824
9179
|
return lines.join("\n");
|
|
@@ -8829,11 +9184,13 @@ function materializationAndAnalyzeSql() {
|
|
|
8829
9184
|
"",
|
|
8830
9185
|
materializeEstablishmentsSql(),
|
|
8831
9186
|
"",
|
|
9187
|
+
materializeSecondaryCnaesSql(),
|
|
9188
|
+
"",
|
|
8832
9189
|
materializePartnersSql(),
|
|
8833
9190
|
"",
|
|
8834
9191
|
materializeSimplesSql(),
|
|
8835
9192
|
"",
|
|
8836
|
-
"
|
|
9193
|
+
echo("Refreshing planner statistics..."),
|
|
8837
9194
|
"analyze companies;",
|
|
8838
9195
|
"analyze establishments;",
|
|
8839
9196
|
"analyze establishment_secondary_cnaes;",
|
|
@@ -8848,7 +9205,7 @@ function materializationAndAnalyzeSql() {
|
|
|
8848
9205
|
"",
|
|
8849
9206
|
"commit;",
|
|
8850
9207
|
"",
|
|
8851
|
-
"
|
|
9208
|
+
echo("CNPJ DB Loader hybrid PostgreSQL import completed."),
|
|
8852
9209
|
""
|
|
8853
9210
|
];
|
|
8854
9211
|
}
|
|
@@ -9055,6 +9412,29 @@ async function exportPostgresCsvDataset(inputPath, options = {}) {
|
|
|
9055
9412
|
import { mkdir as mkdir9, stat as stat7, writeFile as writeFile6 } from "fs/promises";
|
|
9056
9413
|
import path17 from "path";
|
|
9057
9414
|
var DEFAULT_SOURCE_ENCODING = "UTF8";
|
|
9415
|
+
var DEFAULT_TRANSACTION_MODE = "single";
|
|
9416
|
+
var ALL_INCLUDE_TARGETS = [
|
|
9417
|
+
"domains",
|
|
9418
|
+
"companies",
|
|
9419
|
+
"establishments",
|
|
9420
|
+
"partners",
|
|
9421
|
+
"simples",
|
|
9422
|
+
"secondary-cnaes",
|
|
9423
|
+
"indexes",
|
|
9424
|
+
"analyze"
|
|
9425
|
+
];
|
|
9426
|
+
var INCLUDE_TARGETS_BY_DATASET = {
|
|
9427
|
+
companies: "companies",
|
|
9428
|
+
establishments: "establishments",
|
|
9429
|
+
partners: "partners",
|
|
9430
|
+
simples_options: "simples",
|
|
9431
|
+
countries: "domains",
|
|
9432
|
+
cities: "domains",
|
|
9433
|
+
partner_qualifications: "domains",
|
|
9434
|
+
legal_natures: "domains",
|
|
9435
|
+
reasons: "domains",
|
|
9436
|
+
cnaes: "domains"
|
|
9437
|
+
};
|
|
9058
9438
|
function defaultPostgresDirectOutputPath(inputPath) {
|
|
9059
9439
|
const baseName = path17.basename(inputPath);
|
|
9060
9440
|
if (baseName.toLowerCase() === "sanitized") {
|
|
@@ -9063,7 +9443,7 @@ function defaultPostgresDirectOutputPath(inputPath) {
|
|
|
9063
9443
|
return path17.join(path17.dirname(inputPath), `${baseName}-postgres-direct`);
|
|
9064
9444
|
}
|
|
9065
9445
|
function inferNextStep5(scriptPath) {
|
|
9066
|
-
return `psql "postgres://postgres:postgres@localhost:5432/cnpj" -f ${scriptPath.replace(/\\/g, "/")}`;
|
|
9446
|
+
return `psql -d "postgres://postgres:postgres@localhost:5432/cnpj" -f ${scriptPath.replace(/\\/g, "/")}`;
|
|
9067
9447
|
}
|
|
9068
9448
|
function normalizeSourceEncoding(value) {
|
|
9069
9449
|
const encoding = (value ?? DEFAULT_SOURCE_ENCODING).trim();
|
|
@@ -9074,6 +9454,41 @@ function normalizeSourceEncoding(value) {
|
|
|
9074
9454
|
}
|
|
9075
9455
|
return encoding.toUpperCase();
|
|
9076
9456
|
}
|
|
9457
|
+
function normalizeTransactionMode(value) {
|
|
9458
|
+
const mode = value ?? DEFAULT_TRANSACTION_MODE;
|
|
9459
|
+
if (!["single", "phase", "none"].includes(mode)) {
|
|
9460
|
+
throw new ValidationError(
|
|
9461
|
+
`Invalid transaction mode: ${String(value)}. Use single, phase or none.`
|
|
9462
|
+
);
|
|
9463
|
+
}
|
|
9464
|
+
return mode;
|
|
9465
|
+
}
|
|
9466
|
+
function isIncludeTarget(value) {
|
|
9467
|
+
return ALL_INCLUDE_TARGETS.includes(value);
|
|
9468
|
+
}
|
|
9469
|
+
function normalizeIncludeTargets(include, dataset) {
|
|
9470
|
+
if (include && include.length > 0) {
|
|
9471
|
+
const unique = [...new Set(include)];
|
|
9472
|
+
const invalid = unique.filter((item) => !isIncludeTarget(item));
|
|
9473
|
+
if (invalid.length > 0) {
|
|
9474
|
+
throw new ValidationError(
|
|
9475
|
+
`Invalid include target(s): ${invalid.join(", ")}. Use ${ALL_INCLUDE_TARGETS.join(", ")}.`
|
|
9476
|
+
);
|
|
9477
|
+
}
|
|
9478
|
+
return unique;
|
|
9479
|
+
}
|
|
9480
|
+
if (dataset) {
|
|
9481
|
+
const target = INCLUDE_TARGETS_BY_DATASET[dataset];
|
|
9482
|
+
if (!target) {
|
|
9483
|
+
return [];
|
|
9484
|
+
}
|
|
9485
|
+
if (target === "establishments") {
|
|
9486
|
+
return ["establishments", "secondary-cnaes", "analyze"];
|
|
9487
|
+
}
|
|
9488
|
+
return [target, "analyze"];
|
|
9489
|
+
}
|
|
9490
|
+
return [...ALL_INCLUDE_TARGETS];
|
|
9491
|
+
}
|
|
9077
9492
|
async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
9078
9493
|
if (options.dataset && !isImportDatasetType(options.dataset)) {
|
|
9079
9494
|
throw new ValidationError(`Unsupported dataset type: ${options.dataset}.`);
|
|
@@ -9089,6 +9504,10 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
|
9089
9504
|
options.outputPath ?? defaultPostgresDirectOutputPath(validatedPath)
|
|
9090
9505
|
);
|
|
9091
9506
|
const sourceEncoding = normalizeSourceEncoding(options.sourceEncoding);
|
|
9507
|
+
const transactionMode = normalizeTransactionMode(options.transactionMode);
|
|
9508
|
+
const include = normalizeIncludeTargets(options.include, options.dataset);
|
|
9509
|
+
const skipIndexes = options.skipIndexes ?? false;
|
|
9510
|
+
const skipAnalyze = options.skipAnalyze ?? false;
|
|
9092
9511
|
const inspected = await inspectFiles(validatedPath);
|
|
9093
9512
|
const recognizedFiles = inspected.entries.filter((entry) => entry.entryKind === "file").flatMap((entry) => {
|
|
9094
9513
|
if (!isImportDatasetType(entry.inferredType)) {
|
|
@@ -9116,7 +9535,11 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
|
9116
9535
|
outputPath,
|
|
9117
9536
|
totalFiles: recognizedFiles.length,
|
|
9118
9537
|
datasets,
|
|
9119
|
-
sourceEncoding
|
|
9538
|
+
sourceEncoding,
|
|
9539
|
+
transactionMode,
|
|
9540
|
+
include,
|
|
9541
|
+
skipIndexes,
|
|
9542
|
+
skipAnalyze
|
|
9120
9543
|
});
|
|
9121
9544
|
await mkdir9(outputPath, { recursive: true });
|
|
9122
9545
|
const sourceFiles = [];
|
|
@@ -9152,11 +9575,21 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
|
9152
9575
|
}
|
|
9153
9576
|
const scriptName = options.scriptName ?? "import-postgres-direct.sql";
|
|
9154
9577
|
const scriptPath = path17.join(outputPath, scriptName);
|
|
9155
|
-
const
|
|
9578
|
+
const generated = generatePostgresDirectScriptFiles({
|
|
9156
9579
|
files: sourceFiles,
|
|
9157
|
-
sourceEncoding
|
|
9580
|
+
sourceEncoding,
|
|
9581
|
+
transactionMode,
|
|
9582
|
+
include,
|
|
9583
|
+
skipIndexes,
|
|
9584
|
+
skipAnalyze
|
|
9158
9585
|
});
|
|
9159
|
-
|
|
9586
|
+
const scriptFiles = [];
|
|
9587
|
+
for (const [fileName, script] of Object.entries(generated.scripts)) {
|
|
9588
|
+
const outputFileName = fileName === "import-postgres-direct.sql" ? scriptName : fileName;
|
|
9589
|
+
const outputFilePath = path17.join(outputPath, outputFileName);
|
|
9590
|
+
await writeFile6(outputFilePath, script, "utf8");
|
|
9591
|
+
scriptFiles.push(outputFilePath);
|
|
9592
|
+
}
|
|
9160
9593
|
const manifestPath = path17.join(outputPath, "manifest.json");
|
|
9161
9594
|
const summaryDatasets = [...summariesByDataset.values()].sort(
|
|
9162
9595
|
(left, right) => IMPORT_ORDER.indexOf(left.dataset) - IMPORT_ORDER.indexOf(right.dataset)
|
|
@@ -9168,13 +9601,19 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
|
9168
9601
|
const manifest = {
|
|
9169
9602
|
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
9170
9603
|
mode: "direct-sanitized-script",
|
|
9604
|
+
transactionMode,
|
|
9605
|
+
include,
|
|
9606
|
+
skipIndexes,
|
|
9607
|
+
skipAnalyze,
|
|
9171
9608
|
inputPath: path17.resolve(inputPath),
|
|
9172
9609
|
validatedPath,
|
|
9173
9610
|
outputPath,
|
|
9174
9611
|
scriptPath,
|
|
9612
|
+
scriptFiles,
|
|
9175
9613
|
sourceEncoding,
|
|
9176
9614
|
totalFiles: sourceFiles.length,
|
|
9177
9615
|
totalBytes,
|
|
9616
|
+
steps: generated.steps,
|
|
9178
9617
|
datasets: summaryDatasets
|
|
9179
9618
|
};
|
|
9180
9619
|
await writeFile6(
|
|
@@ -9197,15 +9636,19 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
|
9197
9636
|
scriptPath,
|
|
9198
9637
|
manifestPath,
|
|
9199
9638
|
sourceEncoding,
|
|
9639
|
+
transactionMode,
|
|
9200
9640
|
totalFiles: sourceFiles.length,
|
|
9201
9641
|
totalBytes,
|
|
9202
9642
|
datasets: summaryDatasets,
|
|
9643
|
+
scriptFiles,
|
|
9644
|
+
steps: generated.steps,
|
|
9203
9645
|
warnings: [
|
|
9204
9646
|
...validation.ok ? [] : validation.errors,
|
|
9205
9647
|
"This script imports sanitized Receita files directly with psql \\copy. It avoids rewriting the full dataset into a second CSV tree.",
|
|
9206
|
-
"The generated
|
|
9648
|
+
"The generated scripts expect the database schema generated by cnpj-db-loader to be applied before execution.",
|
|
9207
9649
|
"The direct PostgreSQL script now defaults to UTF8 because the sanitize command writes clean UTF-8 files.",
|
|
9208
|
-
"Use --source-encoding WIN1252 or LATIN1 only when generating scripts for legacy sanitized files produced by older loader versions."
|
|
9650
|
+
"Use --source-encoding WIN1252 or LATIN1 only when generating scripts for legacy sanitized files produced by older loader versions.",
|
|
9651
|
+
"The generated import is now modular. Use import-postgres-direct.sql as the orchestrator or run individual phase scripts manually."
|
|
9209
9652
|
],
|
|
9210
9653
|
nextStep: inferNextStep5(scriptPath)
|
|
9211
9654
|
};
|