@danielarndt0/cnpj-db-loader 2.3.1 → 2.4.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -3237,6 +3237,15 @@ function createFieldValueParser(dataType) {
3237
3237
  };
3238
3238
  }
3239
3239
  }
3240
+ function toDatabaseValue(dataType, rawValue) {
3241
+ return createFieldValueParser(dataType)(rawValue);
3242
+ }
3243
+ function normalizeCode(value, fallback) {
3244
+ if (typeof value === "string" && value.trim() !== "") {
3245
+ return value.trim();
3246
+ }
3247
+ return fallback;
3248
+ }
3240
3249
  function createPartnerDedupeKeyBuilder(indices) {
3241
3250
  const orderedIndices = [
3242
3251
  indices.cnpjRoot,
@@ -3264,6 +3273,60 @@ function createEstablishmentCnpjFullBuilder(indices) {
3264
3273
  return `${root}${order}${digits}`;
3265
3274
  };
3266
3275
  }
3276
+ function buildPartnerDedupeKey(recordByColumn) {
3277
+ return [
3278
+ recordByColumn.cnpj_root,
3279
+ recordByColumn.partner_type_code,
3280
+ recordByColumn.partner_name,
3281
+ recordByColumn.partner_document,
3282
+ recordByColumn.partner_qualification_code,
3283
+ recordByColumn.entry_date,
3284
+ recordByColumn.country_code,
3285
+ recordByColumn.legal_representative_document,
3286
+ recordByColumn.legal_representative_name,
3287
+ recordByColumn.legal_representative_qualification_code,
3288
+ recordByColumn.age_group_code
3289
+ ].map((value) => value == null ? "" : String(value).trim()).join("|");
3290
+ }
3291
+ function transformRecord(dataset, layout, rawFields, schemaCapabilities, writeTarget) {
3292
+ const values = layout.fields.map(
3293
+ (field, index) => toDatabaseValue(field.dataType, rawFields[index] ?? "")
3294
+ );
3295
+ const recordByColumn = Object.fromEntries(
3296
+ layout.fields.map((field, index) => [field.columnName, values[index]])
3297
+ );
3298
+ if (dataset === "companies") {
3299
+ recordByColumn.company_size_code = normalizeCode(
3300
+ recordByColumn.company_size_code,
3301
+ "00"
3302
+ );
3303
+ }
3304
+ if (dataset === "establishments") {
3305
+ recordByColumn.branch_type_code = normalizeCode(
3306
+ recordByColumn.branch_type_code,
3307
+ "1"
3308
+ );
3309
+ recordByColumn.registration_status_code = normalizeCode(
3310
+ recordByColumn.registration_status_code,
3311
+ "01"
3312
+ );
3313
+ }
3314
+ const normalizedValues = layout.fields.map(
3315
+ (field) => recordByColumn[field.columnName]
3316
+ );
3317
+ if (writeTarget === "final") {
3318
+ if (dataset === "establishments" && schemaCapabilities.includeEstablishmentCnpjFullInInsert) {
3319
+ return [
3320
+ ...normalizedValues,
3321
+ `${recordByColumn.cnpj_root ?? ""}${recordByColumn.cnpj_order ?? ""}${recordByColumn.cnpj_check_digits ?? ""}`
3322
+ ];
3323
+ }
3324
+ if (dataset === "partners" && schemaCapabilities.includePartnerDedupeKeyInInsert) {
3325
+ return [...normalizedValues, buildPartnerDedupeKey(recordByColumn)];
3326
+ }
3327
+ }
3328
+ return normalizedValues;
3329
+ }
3267
3330
  function buildParsedPayload(columns, values) {
3268
3331
  return Object.fromEntries(
3269
3332
  columns.map((column, index) => [column, values[index] ?? null])
@@ -3403,7 +3466,7 @@ function createImportRowNormalizer(input2) {
3403
3466
  "cnpj_check_digits"
3404
3467
  )
3405
3468
  }) : null;
3406
- const buildPartnerDedupeKey = appendPartnerDedupeKey ? createPartnerDedupeKeyBuilder({
3469
+ const buildPartnerDedupeKey2 = appendPartnerDedupeKey ? createPartnerDedupeKeyBuilder({
3407
3470
  cnpjRoot: resolveLayoutColumnIndex(input2.layout, "cnpj_root"),
3408
3471
  partnerTypeCode: resolveLayoutColumnIndex(
3409
3472
  input2.layout,
@@ -3463,8 +3526,8 @@ function createImportRowNormalizer(input2) {
3463
3526
  if (buildEstablishmentCnpjFull) {
3464
3527
  values.push(buildEstablishmentCnpjFull(values));
3465
3528
  }
3466
- if (buildPartnerDedupeKey) {
3467
- values.push(buildPartnerDedupeKey(values));
3529
+ if (buildPartnerDedupeKey2) {
3530
+ values.push(buildPartnerDedupeKey2(values));
3468
3531
  }
3469
3532
  return {
3470
3533
  values,
@@ -8065,6 +8128,851 @@ async function syncFederalRevenueDataset(options = {}) {
8065
8128
  );
8066
8129
  }
8067
8130
 
8131
+ // src/services/postgres-direct/exporter.ts
8132
+ import { createWriteStream as createWriteStream3 } from "fs";
8133
+ import { mkdir as mkdir8, writeFile as writeFile5 } from "fs/promises";
8134
+ import path16 from "path";
8135
+
8136
+ // src/services/postgres-direct/csv.ts
8137
+ function formatCsvValue(value) {
8138
+ if (value === null || value === void 0) {
8139
+ return "";
8140
+ }
8141
+ if (value instanceof Date) {
8142
+ return formatCsvValue(value.toISOString());
8143
+ }
8144
+ const text = String(value);
8145
+ const shouldQuote = /[",\r\n]/.test(text);
8146
+ if (!shouldQuote) {
8147
+ return text;
8148
+ }
8149
+ return `"${text.replace(/"/g, '""')}"`;
8150
+ }
8151
+ function formatCsvRow(values) {
8152
+ return values.map(formatCsvValue).join(",");
8153
+ }
8154
+
8155
+ // src/services/postgres-direct/script.ts
8156
+ import path15 from "path";
8157
+ var STAGING_DATASETS = [
8158
+ "companies",
8159
+ "establishments",
8160
+ "partners",
8161
+ "simples_options"
8162
+ ];
8163
+ var DOMAIN_DATASETS = [
8164
+ "partner_qualifications",
8165
+ "legal_natures",
8166
+ "countries",
8167
+ "cities",
8168
+ "reasons",
8169
+ "cnaes"
8170
+ ];
8171
+ var STAGING_TABLE_BY_DATASET3 = {
8172
+ companies: "staging_companies",
8173
+ establishments: "staging_establishments",
8174
+ partners: "staging_partners",
8175
+ simples_options: "staging_simples_options"
8176
+ };
8177
+ function quoteSqlLiteral(value) {
8178
+ return `'${value.replace(/'/g, "''")}'`;
8179
+ }
8180
+ function quoteIdentifier(value) {
8181
+ return `"${value.replace(/"/g, '""')}"`;
8182
+ }
8183
+ function normalizePathForPsql(filePath) {
8184
+ return path15.resolve(filePath).replace(/\\/g, "/");
8185
+ }
8186
+ function csvCopyCommand(tableName, columns, filePath) {
8187
+ const normalizedFilePath = normalizePathForPsql(filePath);
8188
+ return `\\copy ${tableName} (${columns.join(", ")}) from ${quoteSqlLiteral(normalizedFilePath)} with (format csv, header true, delimiter ',', quote '"', escape '"', null '')`;
8189
+ }
8190
+ function receitaCopyCommand(tableName, columns, filePath) {
8191
+ const normalizedFilePath = normalizePathForPsql(filePath);
8192
+ return `\\copy ${tableName} (${columns.join(", ")}) from ${quoteSqlLiteral(normalizedFilePath)} with (format csv, header false, delimiter ';', quote '"', escape '"')`;
8193
+ }
8194
+ function datasetColumns(dataset) {
8195
+ return DATASET_LAYOUTS[dataset].fields.map((field) => field.columnName);
8196
+ }
8197
+ function updateAssignments(columns, excludedColumns) {
8198
+ return columns.filter((column) => !excludedColumns.includes(column)).map((column) => `${column} = excluded.${column}`).concat(["updated_at = now()"]).join(",\n ");
8199
+ }
8200
+ function partnerDedupeExpression(alias) {
8201
+ return [
8202
+ "md5(",
8203
+ ` coalesce(${alias}.cnpj_root, '') || '|' ||`,
8204
+ ` coalesce(${alias}.partner_type_code, '') || '|' ||`,
8205
+ ` coalesce(${alias}.partner_name, '') || '|' ||`,
8206
+ ` coalesce(${alias}.partner_document, '') || '|' ||`,
8207
+ ` coalesce(${alias}.partner_qualification_code, '') || '|' ||`,
8208
+ ` coalesce((${alias}.entry_date - date '2000-01-01')::text, '') || '|' ||`,
8209
+ ` coalesce(${alias}.country_code, '') || '|' ||`,
8210
+ ` coalesce(${alias}.legal_representative_document, '') || '|' ||`,
8211
+ ` coalesce(${alias}.legal_representative_name, '') || '|' ||`,
8212
+ ` coalesce(${alias}.legal_representative_qualification_code, '') || '|' ||`,
8213
+ ` coalesce(${alias}.age_group_code, '')`,
8214
+ ")"
8215
+ ].join("\n");
8216
+ }
8217
+ function materializeCompaniesSql() {
8218
+ const columns = companiesLayout.fields.map((field) => field.columnName);
8219
+ return [
8220
+ "\\echo 'Materializing companies...'",
8221
+ "with source as (",
8222
+ " select",
8223
+ ` ${columns.map((column) => `source.${column}`).join(",\n ")},`,
8224
+ " row_number() over (partition by source.cnpj_root order by source.staging_id desc) as dedupe_rank",
8225
+ " from staging_companies source",
8226
+ "),",
8227
+ "deduped as (",
8228
+ " select * from source where dedupe_rank = 1",
8229
+ ")",
8230
+ `insert into companies (${columns.join(", ")})`,
8231
+ `select ${columns.join(", ")}`,
8232
+ "from deduped",
8233
+ "on conflict (cnpj_root) do update set",
8234
+ ` ${updateAssignments(columns, ["cnpj_root"])};`
8235
+ ].join("\n");
8236
+ }
8237
+ function materializeEstablishmentsSql() {
8238
+ const baseColumns = establishmentsLayout.fields.map(
8239
+ (field) => field.columnName
8240
+ );
8241
+ const insertColumns = [...baseColumns, "cnpj_full"];
8242
+ return [
8243
+ "\\echo 'Materializing establishments and secondary CNAEs...'",
8244
+ "with source as (",
8245
+ " select",
8246
+ ` ${baseColumns.map((column) => `source.${column}`).join(",\n ")},`,
8247
+ " source.cnpj_root || source.cnpj_order || source.cnpj_check_digits as cnpj_full,",
8248
+ " row_number() over (partition by source.cnpj_root || source.cnpj_order || source.cnpj_check_digits order by source.staging_id desc) as dedupe_rank",
8249
+ " from staging_establishments source",
8250
+ "),",
8251
+ "deduped as (",
8252
+ " select * from source where dedupe_rank = 1",
8253
+ "),",
8254
+ "upserted as (",
8255
+ ` insert into establishments (${insertColumns.join(", ")})`,
8256
+ ` select ${insertColumns.join(", ")}`,
8257
+ " from deduped",
8258
+ " on conflict (cnpj_full) do update set",
8259
+ ` ${updateAssignments(insertColumns, ["cnpj_root", "cnpj_order", "cnpj_check_digits", "cnpj_full"])}`,
8260
+ " returning cnpj_full",
8261
+ "),",
8262
+ "deleted_secondary_cnaes as (",
8263
+ " delete from establishment_secondary_cnaes target",
8264
+ " using (select cnpj_full from deduped) source_keys",
8265
+ " where target.cnpj_full = source_keys.cnpj_full",
8266
+ " returning 1",
8267
+ "),",
8268
+ "secondary_cnaes_source as (",
8269
+ " select distinct",
8270
+ " deduped.cnpj_full,",
8271
+ " btrim(cnae_code) as cnae_code",
8272
+ " from deduped",
8273
+ " cross join lateral unnest(string_to_array(deduped.secondary_cnaes_raw, ',')) as cnae_code",
8274
+ " where deduped.secondary_cnaes_raw is not null",
8275
+ " and deduped.secondary_cnaes_raw <> ''",
8276
+ " and btrim(cnae_code) <> ''",
8277
+ ")",
8278
+ "insert into establishment_secondary_cnaes (cnpj_full, cnae_code)",
8279
+ "select cnpj_full, cnae_code",
8280
+ "from secondary_cnaes_source",
8281
+ "on conflict (cnpj_full, cnae_code) do nothing;"
8282
+ ].join("\n");
8283
+ }
8284
+ function materializePartnersSql() {
8285
+ const baseColumns = partnersLayout.fields.map((field) => field.columnName);
8286
+ const insertColumns = [...baseColumns, "partner_dedupe_key"];
8287
+ return [
8288
+ "\\echo 'Materializing partners...'",
8289
+ "with source as (",
8290
+ " select",
8291
+ ` ${baseColumns.map((column) => `source.${column}`).join(",\n ")},`,
8292
+ ` ${partnerDedupeExpression("source")} as partner_dedupe_key`,
8293
+ " from staging_partners source",
8294
+ "),",
8295
+ "ranked as (",
8296
+ " select",
8297
+ " source.*,",
8298
+ " row_number() over (partition by source.partner_dedupe_key order by source.cnpj_root asc) as dedupe_rank",
8299
+ " from source",
8300
+ "),",
8301
+ "deduped as (",
8302
+ " select * from ranked where dedupe_rank = 1",
8303
+ ")",
8304
+ `insert into partners (${insertColumns.join(", ")})`,
8305
+ `select ${insertColumns.join(", ")}`,
8306
+ "from deduped",
8307
+ "on conflict (partner_dedupe_key) do update set",
8308
+ ` ${updateAssignments(insertColumns, ["partner_dedupe_key"])};`
8309
+ ].join("\n");
8310
+ }
8311
+ function materializeSimplesSql() {
8312
+ const columns = simplesLayout.fields.map((field) => field.columnName);
8313
+ return [
8314
+ "\\echo 'Materializing simples options...'",
8315
+ "with source as (",
8316
+ " select",
8317
+ ` ${columns.map((column) => `source.${column}`).join(",\n ")},`,
8318
+ " row_number() over (partition by source.cnpj_root order by source.staging_id desc) as dedupe_rank",
8319
+ " from staging_simples_options source",
8320
+ "),",
8321
+ "deduped as (",
8322
+ " select * from source where dedupe_rank = 1",
8323
+ ")",
8324
+ `insert into simples_options (${columns.join(", ")})`,
8325
+ `select ${columns.join(", ")}`,
8326
+ "from deduped",
8327
+ "on conflict (cnpj_root) do update set",
8328
+ ` ${updateAssignments(columns, ["cnpj_root"])};`
8329
+ ].join("\n");
8330
+ }
8331
+ function copyDomainSql(dataset, files) {
8332
+ if (files.length === 0) {
8333
+ return [];
8334
+ }
8335
+ const columns = datasetColumns(dataset);
8336
+ const tempTable = `tmp_hybrid_${dataset}`;
8337
+ const lines = [
8338
+ `\\echo 'Loading ${dataset} lookup data...'`,
8339
+ `drop table if exists ${tempTable};`,
8340
+ `create temporary table ${tempTable} (code text, description text);`
8341
+ ];
8342
+ for (const file of files) {
8343
+ lines.push(csvCopyCommand(tempTable, columns, file.absolutePath));
8344
+ }
8345
+ lines.push(
8346
+ `insert into ${dataset} (${columns.join(", ")})`,
8347
+ `select distinct on (code) ${columns.join(", ")}`,
8348
+ `from ${tempTable}`,
8349
+ "where code is not null and code <> ''",
8350
+ "order by code",
8351
+ "on conflict (code) do update set description = excluded.description;"
8352
+ );
8353
+ return lines;
8354
+ }
8355
+ function copyStagingSql(dataset, files) {
8356
+ if (files.length === 0) {
8357
+ return [];
8358
+ }
8359
+ const tableName = STAGING_TABLE_BY_DATASET3[dataset];
8360
+ if (!tableName) {
8361
+ return [];
8362
+ }
8363
+ const columns = datasetColumns(dataset);
8364
+ return [
8365
+ `\\echo 'Loading ${dataset} staging data...'`,
8366
+ ...files.map(
8367
+ (file) => csvCopyCommand(tableName, columns, file.absolutePath)
8368
+ )
8369
+ ];
8370
+ }
8371
+ function csvFilesByDataset(files) {
8372
+ const grouped = {};
8373
+ for (const file of files) {
8374
+ const items = grouped[file.dataset] ?? [];
8375
+ items.push(file);
8376
+ grouped[file.dataset] = items;
8377
+ }
8378
+ return grouped;
8379
+ }
8380
+ function directFilesByDataset(files) {
8381
+ const grouped = {};
8382
+ for (const file of files) {
8383
+ const items = grouped[file.dataset] ?? [];
8384
+ items.push(file);
8385
+ grouped[file.dataset] = items;
8386
+ }
8387
+ return grouped;
8388
+ }
8389
+ function rawTableName(dataset) {
8390
+ return `tmp_hybrid_raw_${dataset}`;
8391
+ }
8392
+ function createRawTempTableSql(dataset) {
8393
+ const columns = DATASET_LAYOUTS[dataset].fields.map((field) => ` ${quoteIdentifier(field.columnName)} text`).join(",\n");
8394
+ return [
8395
+ `drop table if exists ${rawTableName(dataset)};`,
8396
+ `create temporary table ${rawTableName(dataset)} (`,
8397
+ columns,
8398
+ ");"
8399
+ ].join("\n");
8400
+ }
8401
+ function textExpression(alias, column) {
8402
+ return `nullif(btrim(${alias}.${quoteIdentifier(column)}), '')`;
8403
+ }
8404
+ function dateExpression(alias, column) {
8405
+ const value = `btrim(${alias}.${quoteIdentifier(column)})`;
8406
+ return [
8407
+ "case",
8408
+ ` when ${value} = '' or ${value} = '00000000' then null`,
8409
+ ` when ${value} ~ '^\\d{8}$' then to_date(${value}, 'YYYYMMDD')`,
8410
+ " else null",
8411
+ "end"
8412
+ ].join(" ");
8413
+ }
8414
+ function numericExpression(alias, column) {
8415
+ const value = `btrim(${alias}.${quoteIdentifier(column)})`;
8416
+ return [
8417
+ "case",
8418
+ ` when ${value} = '' then null`,
8419
+ ` when ${value} like '%,%' and ${value} like '%.%' then replace(replace(${value}, '.', ''), ',', '.')::numeric`,
8420
+ ` when ${value} like '%,%' then replace(${value}, ',', '.')::numeric`,
8421
+ ` else ${value}::numeric`,
8422
+ "end"
8423
+ ].join(" ");
8424
+ }
8425
+ function integerExpression(alias, column) {
8426
+ const value = `btrim(${alias}.${quoteIdentifier(column)})`;
8427
+ return [
8428
+ "case",
8429
+ ` when ${value} = '' then null`,
8430
+ ` when ${value} ~ '^-?\\d+$' then ${value}::integer`,
8431
+ " else null",
8432
+ "end"
8433
+ ].join(" ");
8434
+ }
8435
+ function booleanExpression(alias, column) {
8436
+ const value = `lower(btrim(${alias}.${quoteIdentifier(column)}))`;
8437
+ return [
8438
+ "case",
8439
+ ` when ${value} in ('1', 'true', 't', 'y', 'yes', 's') then true`,
8440
+ ` when ${value} in ('0', 'false', 'f', 'n', 'no') then false`,
8441
+ " else null",
8442
+ "end"
8443
+ ].join(" ");
8444
+ }
8445
+ function fieldExpression(dataset, field, alias) {
8446
+ const column = field.columnName;
8447
+ if (dataset === "companies" && column === "company_size_code") {
8448
+ return `coalesce(${textExpression(alias, column)}, '00')`;
8449
+ }
8450
+ if (dataset === "establishments" && column === "branch_type_code") {
8451
+ return `coalesce(${textExpression(alias, column)}, '1')`;
8452
+ }
8453
+ if (dataset === "establishments" && column === "registration_status_code") {
8454
+ return `coalesce(${textExpression(alias, column)}, '01')`;
8455
+ }
8456
+ switch (field.dataType) {
8457
+ case "date":
8458
+ return dateExpression(alias, column);
8459
+ case "numeric":
8460
+ return numericExpression(alias, column);
8461
+ case "integer":
8462
+ return integerExpression(alias, column);
8463
+ case "boolean":
8464
+ return booleanExpression(alias, column);
8465
+ default:
8466
+ return textExpression(alias, column);
8467
+ }
8468
+ }
8469
+ function rawDomainSql(dataset, files) {
8470
+ if (files.length === 0) {
8471
+ return [];
8472
+ }
8473
+ const layout = DATASET_LAYOUTS[dataset];
8474
+ const columns = layout.fields.map((field) => field.columnName);
8475
+ const tableName = rawTableName(dataset);
8476
+ const lines = [
8477
+ `\\echo 'Loading ${dataset} lookup data directly from sanitized Receita files...'`,
8478
+ createRawTempTableSql(dataset)
8479
+ ];
8480
+ for (const file of files) {
8481
+ lines.push(receitaCopyCommand(tableName, columns, file.absolutePath));
8482
+ }
8483
+ lines.push(
8484
+ `insert into ${dataset} (${columns.join(", ")})`,
8485
+ "select distinct on (code)",
8486
+ " nullif(btrim(code), '') as code,",
8487
+ " nullif(btrim(description), '') as description",
8488
+ `from ${tableName}`,
8489
+ "where nullif(btrim(code), '') is not null",
8490
+ "order by code",
8491
+ "on conflict (code) do update set description = excluded.description;"
8492
+ );
8493
+ return lines;
8494
+ }
8495
+ function rawStagingSql(dataset, files) {
8496
+ if (files.length === 0) {
8497
+ return [];
8498
+ }
8499
+ const targetTable = STAGING_TABLE_BY_DATASET3[dataset];
8500
+ if (!targetTable) {
8501
+ return [];
8502
+ }
8503
+ const layout = DATASET_LAYOUTS[dataset];
8504
+ const columns = layout.fields.map((field) => field.columnName);
8505
+ const tableName = rawTableName(dataset);
8506
+ const alias = "source";
8507
+ const expressions = layout.fields.map(
8508
+ (field) => ` ${fieldExpression(dataset, field, alias)} as ${field.columnName}`
8509
+ );
8510
+ const lines = [
8511
+ `\\echo 'Loading ${dataset} staging data directly from sanitized Receita files...'`,
8512
+ createRawTempTableSql(dataset)
8513
+ ];
8514
+ for (const file of files) {
8515
+ lines.push(receitaCopyCommand(tableName, columns, file.absolutePath));
8516
+ }
8517
+ lines.push(
8518
+ `insert into ${targetTable} (${columns.join(", ")})`,
8519
+ "select",
8520
+ expressions.join(",\n"),
8521
+ `from ${tableName} ${alias};`
8522
+ );
8523
+ return lines;
8524
+ }
8525
+ function generatePostgresDirectImportScript(input2) {
8526
+ const grouped = csvFilesByDataset(input2.files);
8527
+ const lines = [
8528
+ "-- CNPJ DB Loader hybrid PostgreSQL import script",
8529
+ "-- Generated from PostgreSQL-ready CSV files exported by cnpj-db-loader postgres export-csv.",
8530
+ "-- Execute with psql, for example:",
8531
+ '-- psql "postgres://postgres:postgres@localhost:5432/cnpj" -f import-postgres-direct.sql',
8532
+ "",
8533
+ "\\set ON_ERROR_STOP on",
8534
+ "\\echo 'Starting CNPJ DB Loader hybrid PostgreSQL import...'",
8535
+ "",
8536
+ "begin;",
8537
+ "",
8538
+ "-- Keep the final schema and seed data managed by sql/schema.sql.",
8539
+ "-- This script only resets staging tables and then upserts final data.",
8540
+ "truncate table staging_companies restart identity;",
8541
+ "truncate table staging_establishments restart identity;",
8542
+ "truncate table staging_partners restart identity;",
8543
+ "truncate table staging_simples_options restart identity;",
8544
+ ""
8545
+ ];
8546
+ for (const dataset of DOMAIN_DATASETS) {
8547
+ lines.push(...copyDomainSql(dataset, grouped[dataset] ?? []), "");
8548
+ }
8549
+ for (const dataset of STAGING_DATASETS) {
8550
+ lines.push(...copyStagingSql(dataset, grouped[dataset] ?? []), "");
8551
+ }
8552
+ lines.push(...materializationAndAnalyzeSql());
8553
+ return lines.join("\n");
8554
+ }
8555
+ function generatePostgresSanitizedDirectImportScript(input2) {
8556
+ const grouped = directFilesByDataset(input2.files);
8557
+ const lines = [
8558
+ "-- CNPJ DB Loader direct PostgreSQL import script",
8559
+ "-- Generated from sanitized Receita files by cnpj-db-loader postgres generate-script.",
8560
+ "-- This path avoids rewriting the dataset into a second CSV tree.",
8561
+ "-- Execute with psql, for example:",
8562
+ '-- psql "postgres://postgres:postgres@localhost:5432/cnpj" -f import-postgres-direct.sql',
8563
+ "",
8564
+ "\\set ON_ERROR_STOP on",
8565
+ `\\echo 'Using source file encoding ${input2.sourceEncoding} for psql copy operations...'`,
8566
+ `set client_encoding to ${quoteSqlLiteral(input2.sourceEncoding)};`,
8567
+ "\\echo 'Starting CNPJ DB Loader direct PostgreSQL import from sanitized files...'",
8568
+ "",
8569
+ "begin;",
8570
+ "",
8571
+ "-- Keep the final schema and seed data managed by sql/schema.sql.",
8572
+ "-- This script copies sanitized Receita files into temporary raw tables,",
8573
+ "-- transforms values inside PostgreSQL, resets staging tables and upserts final data.",
8574
+ "truncate table staging_companies restart identity;",
8575
+ "truncate table staging_establishments restart identity;",
8576
+ "truncate table staging_partners restart identity;",
8577
+ "truncate table staging_simples_options restart identity;",
8578
+ ""
8579
+ ];
8580
+ for (const dataset of DOMAIN_DATASETS) {
8581
+ lines.push(...rawDomainSql(dataset, grouped[dataset] ?? []), "");
8582
+ }
8583
+ for (const dataset of STAGING_DATASETS) {
8584
+ lines.push(...rawStagingSql(dataset, grouped[dataset] ?? []), "");
8585
+ }
8586
+ lines.push(...materializationAndAnalyzeSql());
8587
+ return lines.join("\n");
8588
+ }
8589
+ function materializationAndAnalyzeSql() {
8590
+ return [
8591
+ materializeCompaniesSql(),
8592
+ "",
8593
+ materializeEstablishmentsSql(),
8594
+ "",
8595
+ materializePartnersSql(),
8596
+ "",
8597
+ materializeSimplesSql(),
8598
+ "",
8599
+ "\\echo 'Refreshing planner statistics...'",
8600
+ "analyze companies;",
8601
+ "analyze establishments;",
8602
+ "analyze establishment_secondary_cnaes;",
8603
+ "analyze partners;",
8604
+ "analyze simples_options;",
8605
+ "analyze cnaes;",
8606
+ "analyze cities;",
8607
+ "analyze countries;",
8608
+ "analyze legal_natures;",
8609
+ "analyze partner_qualifications;",
8610
+ "analyze reasons;",
8611
+ "",
8612
+ "commit;",
8613
+ "",
8614
+ "\\echo 'CNPJ DB Loader hybrid PostgreSQL import completed.'",
8615
+ ""
8616
+ ];
8617
+ }
8618
+
8619
+ // src/services/postgres-direct/exporter.ts
8620
+ var POSTGRES_DIRECT_SCHEMA_CAPABILITIES = {
8621
+ includeEstablishmentCnpjFullInInsert: true,
8622
+ includeEstablishmentSecondaryCnaesTable: true,
8623
+ includePartnerDedupeKeyInInsert: true,
8624
+ requiresLookupReconciliation: false
8625
+ };
8626
+ function defaultPostgresCsvOutputPath(inputPath) {
8627
+ const baseName = path16.basename(inputPath);
8628
+ return path16.join(path16.dirname(inputPath), `${baseName}-postgres-csv`);
8629
+ }
8630
+ function normalizeOutputFileName(relativePath) {
8631
+ const parsed = path16.parse(relativePath);
8632
+ const baseName = parsed.name || parsed.base || "dataset";
8633
+ return path16.join(parsed.dir, `${baseName}.csv`);
8634
+ }
8635
+ function resolveDatasetOutputPath(outputPath, dataset, relativePath) {
8636
+ return path16.join(outputPath, dataset, normalizeOutputFileName(relativePath));
8637
+ }
8638
+ function inferNextStep4(scriptPath) {
8639
+ return `psql "postgres://postgres:postgres@localhost:5432/cnpj" -f ${scriptPath.replace(/\\/g, "/")}`;
8640
+ }
8641
+ async function writeCsvFile(input2) {
8642
+ const layout = DATASET_LAYOUTS[input2.dataset];
8643
+ const columns = layout.fields.map((field) => field.columnName);
8644
+ await mkdir8(path16.dirname(input2.outputFile), { recursive: true });
8645
+ const output2 = createWriteStream3(input2.outputFile, { encoding: "utf8" });
8646
+ let rows = 0;
8647
+ try {
8648
+ output2.write(`${formatCsvRow(columns)}
8649
+ `);
8650
+ for await (const sourceLine of readImportSourceLines(input2.inputFile)) {
8651
+ if (sourceLine.rawLine.trim() === "") {
8652
+ continue;
8653
+ }
8654
+ const parsed = parseImportSourceLine(sourceLine);
8655
+ const normalizedFields = normalizeFieldCount(
8656
+ parsed.fields,
8657
+ layout.fields.length,
8658
+ input2.inputFile,
8659
+ parsed.lineNumber
8660
+ );
8661
+ const values = transformRecord(
8662
+ input2.dataset,
8663
+ layout,
8664
+ normalizedFields,
8665
+ POSTGRES_DIRECT_SCHEMA_CAPABILITIES,
8666
+ "staging"
8667
+ );
8668
+ output2.write(`${formatCsvRow(values)}
8669
+ `);
8670
+ rows += 1;
8671
+ }
8672
+ } finally {
8673
+ output2.end();
8674
+ await new Promise((resolve2, reject) => {
8675
+ output2.on("finish", () => resolve2());
8676
+ output2.on("error", (error) => reject(error));
8677
+ });
8678
+ }
8679
+ return rows;
8680
+ }
8681
+ async function exportPostgresCsvDataset(inputPath, options = {}) {
8682
+ if (options.dataset && !isImportDatasetType(options.dataset)) {
8683
+ throw new ValidationError(`Unsupported dataset type: ${options.dataset}.`);
8684
+ }
8685
+ const validation = await validateInputDirectory(inputPath);
8686
+ if (!validation.ok) {
8687
+ throw new ValidationError(
8688
+ `The input directory is not ready for PostgreSQL CSV export. ${validation.errors.join(" ")}`
8689
+ );
8690
+ }
8691
+ const validatedPath = validation.validatedPath;
8692
+ const outputPath = path16.resolve(
8693
+ options.outputPath ?? defaultPostgresCsvOutputPath(validatedPath)
8694
+ );
8695
+ const inspected = await inspectFiles(validatedPath);
8696
+ const recognizedFiles = inspected.entries.filter((entry) => entry.entryKind === "file").flatMap((entry) => {
8697
+ if (!isImportDatasetType(entry.inferredType)) {
8698
+ return [];
8699
+ }
8700
+ if (options.dataset && entry.inferredType !== options.dataset) {
8701
+ return [];
8702
+ }
8703
+ return [{ ...entry, inferredType: entry.inferredType }];
8704
+ }).sort(sortEntries);
8705
+ if (recognizedFiles.length === 0) {
8706
+ throw new ValidationError(
8707
+ "No recognized dataset files were found for PostgreSQL CSV export."
8708
+ );
8709
+ }
8710
+ const datasets = [
8711
+ ...new Set(recognizedFiles.map((entry) => entry.inferredType))
8712
+ ].sort(
8713
+ (left, right) => IMPORT_ORDER.indexOf(left) - IMPORT_ORDER.indexOf(right)
8714
+ );
8715
+ options.onProgress?.({
8716
+ kind: "start",
8717
+ inputPath: path16.resolve(inputPath),
8718
+ validatedPath,
8719
+ outputPath,
8720
+ totalFiles: recognizedFiles.length,
8721
+ datasets
8722
+ });
8723
+ const exportedFiles = [];
8724
+ const summariesByDataset = /* @__PURE__ */ new Map();
8725
+ for (const [index, entry] of recognizedFiles.entries()) {
8726
+ const dataset = entry.inferredType;
8727
+ const inputFile = path16.join(validatedPath, entry.relativePath);
8728
+ const outputFile = resolveDatasetOutputPath(
8729
+ outputPath,
8730
+ dataset,
8731
+ entry.relativePath
8732
+ );
8733
+ options.onProgress?.({
8734
+ kind: "file_start",
8735
+ dataset,
8736
+ fileIndex: index + 1,
8737
+ totalFiles: recognizedFiles.length,
8738
+ inputFile: buildDisplayPath(inputFile),
8739
+ outputFile
8740
+ });
8741
+ const rowCount = await writeCsvFile({ dataset, inputFile, outputFile });
8742
+ exportedFiles.push({
8743
+ dataset,
8744
+ absolutePath: outputFile,
8745
+ relativePath: path16.relative(outputPath, outputFile),
8746
+ rowCount
8747
+ });
8748
+ const currentSummary = summariesByDataset.get(dataset) ?? {
8749
+ dataset,
8750
+ files: 0,
8751
+ rows: 0,
8752
+ outputFiles: []
8753
+ };
8754
+ currentSummary.files += 1;
8755
+ currentSummary.rows += rowCount;
8756
+ currentSummary.outputFiles.push(outputFile);
8757
+ summariesByDataset.set(dataset, currentSummary);
8758
+ options.onProgress?.({
8759
+ kind: "file_finish",
8760
+ dataset,
8761
+ fileIndex: index + 1,
8762
+ totalFiles: recognizedFiles.length,
8763
+ inputFile: buildDisplayPath(inputFile),
8764
+ outputFile,
8765
+ rows: rowCount
8766
+ });
8767
+ }
8768
+ const scriptName = options.scriptName ?? "import-postgres-direct.sql";
8769
+ const scriptPath = path16.join(outputPath, scriptName);
8770
+ const script = generatePostgresDirectImportScript({ files: exportedFiles });
8771
+ await writeFile5(scriptPath, script, "utf8");
8772
+ const manifestPath = path16.join(outputPath, "manifest.json");
8773
+ const summaryDatasets = [...summariesByDataset.values()].sort(
8774
+ (left, right) => IMPORT_ORDER.indexOf(left.dataset) - IMPORT_ORDER.indexOf(right.dataset)
8775
+ );
8776
+ const totalRows = summaryDatasets.reduce((sum, item) => sum + item.rows, 0);
8777
+ const manifest = {
8778
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
8779
+ inputPath: path16.resolve(inputPath),
8780
+ validatedPath,
8781
+ outputPath,
8782
+ scriptPath,
8783
+ totalFiles: exportedFiles.length,
8784
+ totalRows,
8785
+ datasets: summaryDatasets
8786
+ };
8787
+ await writeFile5(
8788
+ manifestPath,
8789
+ `${JSON.stringify(manifest, null, 2)}
8790
+ `,
8791
+ "utf8"
8792
+ );
8793
+ options.onProgress?.({
8794
+ kind: "finish",
8795
+ outputPath,
8796
+ scriptPath,
8797
+ totalFiles: exportedFiles.length,
8798
+ totalRows
8799
+ });
8800
+ return {
8801
+ inputPath: path16.resolve(inputPath),
8802
+ validatedPath,
8803
+ outputPath,
8804
+ scriptPath,
8805
+ manifestPath,
8806
+ totalFiles: exportedFiles.length,
8807
+ totalRows,
8808
+ datasets: summaryDatasets,
8809
+ warnings: [
8810
+ "PostgreSQL-ready CSV export is intended for hybrid bulk imports after extraction, validation and sanitization.",
8811
+ "The generated SQL script resets staging tables and then upserts final tables. Review it before running against production databases."
8812
+ ],
8813
+ nextStep: inferNextStep4(scriptPath)
8814
+ };
8815
+ }
8816
+
8817
+ // src/services/postgres-direct/generator.ts
8818
+ import { mkdir as mkdir9, stat as stat7, writeFile as writeFile6 } from "fs/promises";
8819
+ import path17 from "path";
8820
+ var DEFAULT_SOURCE_ENCODING = "WIN1252";
8821
+ function defaultPostgresDirectOutputPath(inputPath) {
8822
+ const baseName = path17.basename(inputPath);
8823
+ if (baseName.toLowerCase() === "sanitized") {
8824
+ return path17.join(path17.dirname(inputPath), "postgres-direct");
8825
+ }
8826
+ return path17.join(path17.dirname(inputPath), `${baseName}-postgres-direct`);
8827
+ }
8828
+ function inferNextStep5(scriptPath) {
8829
+ return `psql "postgres://postgres:postgres@localhost:5432/cnpj" -f ${scriptPath.replace(/\\/g, "/")}`;
8830
+ }
8831
+ function normalizeSourceEncoding(value) {
8832
+ const encoding = (value ?? DEFAULT_SOURCE_ENCODING).trim();
8833
+ if (!/^[A-Za-z0-9_-]+$/.test(encoding)) {
8834
+ throw new ValidationError(
8835
+ `Invalid source encoding: ${value}. Use a PostgreSQL client encoding name such as WIN1252 or UTF8.`
8836
+ );
8837
+ }
8838
+ return encoding.toUpperCase();
8839
+ }
8840
+ async function generatePostgresDirectScript(inputPath, options = {}) {
8841
+ if (options.dataset && !isImportDatasetType(options.dataset)) {
8842
+ throw new ValidationError(`Unsupported dataset type: ${options.dataset}.`);
8843
+ }
8844
+ const validation = await validateInputDirectory(inputPath);
8845
+ if (!validation.ok && !options.dataset) {
8846
+ throw new ValidationError(
8847
+ `The input directory is not ready for PostgreSQL direct script generation. ${validation.errors.join(" ")}`
8848
+ );
8849
+ }
8850
+ const validatedPath = validation.ok ? validation.validatedPath : path17.resolve(inputPath);
8851
+ const outputPath = path17.resolve(
8852
+ options.outputPath ?? defaultPostgresDirectOutputPath(validatedPath)
8853
+ );
8854
+ const sourceEncoding = normalizeSourceEncoding(options.sourceEncoding);
8855
+ const inspected = await inspectFiles(validatedPath);
8856
+ const recognizedFiles = inspected.entries.filter((entry) => entry.entryKind === "file").flatMap((entry) => {
8857
+ if (!isImportDatasetType(entry.inferredType)) {
8858
+ return [];
8859
+ }
8860
+ if (options.dataset && entry.inferredType !== options.dataset) {
8861
+ return [];
8862
+ }
8863
+ return [{ ...entry, inferredType: entry.inferredType }];
8864
+ }).sort(sortEntries);
8865
+ if (recognizedFiles.length === 0) {
8866
+ throw new ValidationError(
8867
+ "No recognized dataset files were found for PostgreSQL direct script generation."
8868
+ );
8869
+ }
8870
+ const datasets = [
8871
+ ...new Set(recognizedFiles.map((entry) => entry.inferredType))
8872
+ ].sort(
8873
+ (left, right) => IMPORT_ORDER.indexOf(left) - IMPORT_ORDER.indexOf(right)
8874
+ );
8875
+ options.onProgress?.({
8876
+ kind: "start",
8877
+ inputPath: path17.resolve(inputPath),
8878
+ validatedPath,
8879
+ outputPath,
8880
+ totalFiles: recognizedFiles.length,
8881
+ datasets,
8882
+ sourceEncoding
8883
+ });
8884
+ await mkdir9(outputPath, { recursive: true });
8885
+ const sourceFiles = [];
8886
+ const summariesByDataset = /* @__PURE__ */ new Map();
8887
+ for (const [index, entry] of recognizedFiles.entries()) {
8888
+ const dataset = entry.inferredType;
8889
+ const absolutePath = path17.join(validatedPath, entry.relativePath);
8890
+ const fileStats = await stat7(absolutePath);
8891
+ sourceFiles.push({
8892
+ dataset,
8893
+ absolutePath,
8894
+ relativePath: entry.relativePath,
8895
+ fileSize: fileStats.size
8896
+ });
8897
+ const currentSummary = summariesByDataset.get(dataset) ?? {
8898
+ dataset,
8899
+ files: 0,
8900
+ totalBytes: 0,
8901
+ sourceFiles: []
8902
+ };
8903
+ currentSummary.files += 1;
8904
+ currentSummary.totalBytes += fileStats.size;
8905
+ currentSummary.sourceFiles.push(absolutePath);
8906
+ summariesByDataset.set(dataset, currentSummary);
8907
+ options.onProgress?.({
8908
+ kind: "file_registered",
8909
+ dataset,
8910
+ fileIndex: index + 1,
8911
+ totalFiles: recognizedFiles.length,
8912
+ inputFile: buildDisplayPath(absolutePath),
8913
+ fileSize: fileStats.size
8914
+ });
8915
+ }
8916
+ const scriptName = options.scriptName ?? "import-postgres-direct.sql";
8917
+ const scriptPath = path17.join(outputPath, scriptName);
8918
+ const script = generatePostgresSanitizedDirectImportScript({
8919
+ files: sourceFiles,
8920
+ sourceEncoding
8921
+ });
8922
+ await writeFile6(scriptPath, script, "utf8");
8923
+ const manifestPath = path17.join(outputPath, "manifest.json");
8924
+ const summaryDatasets = [...summariesByDataset.values()].sort(
8925
+ (left, right) => IMPORT_ORDER.indexOf(left.dataset) - IMPORT_ORDER.indexOf(right.dataset)
8926
+ );
8927
+ const totalBytes = summaryDatasets.reduce(
8928
+ (sum, item) => sum + item.totalBytes,
8929
+ 0
8930
+ );
8931
+ const manifest = {
8932
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
8933
+ mode: "direct-sanitized-script",
8934
+ inputPath: path17.resolve(inputPath),
8935
+ validatedPath,
8936
+ outputPath,
8937
+ scriptPath,
8938
+ sourceEncoding,
8939
+ totalFiles: sourceFiles.length,
8940
+ totalBytes,
8941
+ datasets: summaryDatasets
8942
+ };
8943
+ await writeFile6(
8944
+ manifestPath,
8945
+ `${JSON.stringify(manifest, null, 2)}
8946
+ `,
8947
+ "utf8"
8948
+ );
8949
+ options.onProgress?.({
8950
+ kind: "finish",
8951
+ outputPath,
8952
+ scriptPath,
8953
+ totalFiles: sourceFiles.length,
8954
+ totalBytes
8955
+ });
8956
+ return {
8957
+ inputPath: path17.resolve(inputPath),
8958
+ validatedPath,
8959
+ outputPath,
8960
+ scriptPath,
8961
+ manifestPath,
8962
+ sourceEncoding,
8963
+ totalFiles: sourceFiles.length,
8964
+ totalBytes,
8965
+ datasets: summaryDatasets,
8966
+ warnings: [
8967
+ ...validation.ok ? [] : validation.errors,
8968
+ "This script imports sanitized Receita files directly with psql \\copy. It avoids rewriting the full dataset into a second CSV tree.",
8969
+ "The generated script expects the database schema generated by cnpj-db-loader to be applied before execution.",
8970
+ "Use --source-encoding UTF8 only if your sanitized files are already UTF-8. The default WIN1252 matches the usual Receita file encoding."
8971
+ ],
8972
+ nextStep: inferNextStep5(scriptPath)
8973
+ };
8974
+ }
8975
+
8068
8976
  // src/cli/ui/theme.ts
8069
8977
  var colorsEnabled = process.stdout.isTTY && !process.argv.includes("--no-color") && process.env.NO_COLOR === void 0;
8070
8978
  function paint(code, value) {
@@ -8114,9 +9022,9 @@ function handleCliError(error) {
8114
9022
  }
8115
9023
 
8116
9024
  // src/cli/ui/output/shared.ts
8117
- import path15 from "path";
9025
+ import path18 from "path";
8118
9026
  function resolveLogFilePath(logFilePath) {
8119
- return path15.resolve(logFilePath);
9027
+ return path18.resolve(logFilePath);
8120
9028
  }
8121
9029
  function formatKeyValue(label, value) {
8122
9030
  return `${theme.muted(`- ${label}:`)} ${value}`;
@@ -8656,6 +9564,59 @@ function printFederalRevenueSyncSummary(summary, logFilePath) {
8656
9564
  `${theme.muted("Import progress log:")} ${resolveLogFilePath(summary.import.progressLogPath)}`
8657
9565
  );
8658
9566
  }
9567
+ function printPostgresCsvExportSummary(summary, logFilePath) {
9568
+ console.log(
9569
+ theme.successLabel("POSTGRES"),
9570
+ "PostgreSQL-ready CSV export completed."
9571
+ );
9572
+ console.log(formatKeyValue("Input path", summary.inputPath));
9573
+ console.log(formatKeyValue("Validated path", summary.validatedPath));
9574
+ console.log(formatKeyValue("Output path", summary.outputPath));
9575
+ console.log(formatKeyValue("Generated script", summary.scriptPath));
9576
+ console.log(formatKeyValue("Manifest", summary.manifestPath));
9577
+ console.log(formatKeyValue("Exported files", summary.totalFiles));
9578
+ console.log(formatKeyValue("Exported rows", formatCount(summary.totalRows)));
9579
+ if (summary.datasets.length > 0) {
9580
+ console.log(theme.infoLabel("DATASETS"));
9581
+ for (const dataset of summary.datasets) {
9582
+ console.log(
9583
+ ` ${theme.blue("\u2022")} ${dataset.dataset}: ${dataset.files} file(s), ${formatCount(dataset.rows)} row(s)`
9584
+ );
9585
+ }
9586
+ }
9587
+ printWarnings(summary.warnings);
9588
+ if (summary.nextStep) {
9589
+ console.log(`${theme.infoLabel("NEXT")} ${summary.nextStep}`);
9590
+ }
9591
+ console.log(`${theme.muted("Log file:")} ${resolveLogFilePath(logFilePath)}`);
9592
+ }
9593
+ function printPostgresDirectScriptSummary(summary, logFilePath) {
9594
+ console.log(
9595
+ theme.successLabel("POSTGRES"),
9596
+ "Direct PostgreSQL import script generated."
9597
+ );
9598
+ console.log(formatKeyValue("Input path", summary.inputPath));
9599
+ console.log(formatKeyValue("Validated path", summary.validatedPath));
9600
+ console.log(formatKeyValue("Output path", summary.outputPath));
9601
+ console.log(formatKeyValue("Generated script", summary.scriptPath));
9602
+ console.log(formatKeyValue("Manifest", summary.manifestPath));
9603
+ console.log(formatKeyValue("Source encoding", summary.sourceEncoding));
9604
+ console.log(formatKeyValue("Source files", summary.totalFiles));
9605
+ console.log(formatKeyValue("Source bytes", formatBytes(summary.totalBytes)));
9606
+ if (summary.datasets.length > 0) {
9607
+ console.log(theme.infoLabel("DATASETS"));
9608
+ for (const dataset of summary.datasets) {
9609
+ console.log(
9610
+ ` ${theme.blue("\u2022")} ${dataset.dataset}: ${dataset.files} file(s), ${formatBytes(dataset.totalBytes)}`
9611
+ );
9612
+ }
9613
+ }
9614
+ printWarnings(summary.warnings);
9615
+ if (summary.nextStep) {
9616
+ console.log(`${theme.infoLabel("NEXT")} ${summary.nextStep}`);
9617
+ }
9618
+ console.log(`${theme.muted("Log file:")} ${resolveLogFilePath(logFilePath)}`);
9619
+ }
8659
9620
 
8660
9621
  // src/cli/ui/output/progress.ts
8661
9622
  function createExtractionProgressReporter() {
@@ -9276,6 +10237,65 @@ function createFederalRevenueDownloadProgressReporter() {
9276
10237
  );
9277
10238
  };
9278
10239
  }
10240
+ function createPostgresCsvExportProgressReporter() {
10241
+ return (event) => {
10242
+ if (event.kind === "start") {
10243
+ console.log(
10244
+ theme.infoLabel("POSTGRES"),
10245
+ "Starting PostgreSQL-ready CSV export..."
10246
+ );
10247
+ console.log(formatKeyValue("Input path", event.inputPath));
10248
+ console.log(formatKeyValue("Validated path", event.validatedPath));
10249
+ console.log(formatKeyValue("Output path", event.outputPath));
10250
+ console.log(formatKeyValue("Files queued", event.totalFiles));
10251
+ return;
10252
+ }
10253
+ if (event.kind === "file_finish") {
10254
+ console.log(
10255
+ `${theme.infoLabel("POSTGRES")} ${event.fileIndex}/${event.totalFiles} ${event.dataset} exported with ${formatCount(event.rows)} row(s).`
10256
+ );
10257
+ return;
10258
+ }
10259
+ if (event.kind === "finish") {
10260
+ console.log(
10261
+ theme.successLabel("POSTGRES"),
10262
+ `Exported ${event.totalFiles} file(s) with ${formatCount(event.totalRows)} row(s).`
10263
+ );
10264
+ console.log(formatKeyValue("Output path", event.outputPath));
10265
+ console.log(formatKeyValue("Script path", event.scriptPath));
10266
+ }
10267
+ };
10268
+ }
10269
+ function createPostgresDirectScriptProgressReporter() {
10270
+ return (event) => {
10271
+ if (event.kind === "start") {
10272
+ console.log(
10273
+ theme.infoLabel("POSTGRES"),
10274
+ "Starting direct PostgreSQL script generation..."
10275
+ );
10276
+ console.log(formatKeyValue("Input path", event.inputPath));
10277
+ console.log(formatKeyValue("Validated path", event.validatedPath));
10278
+ console.log(formatKeyValue("Output path", event.outputPath));
10279
+ console.log(formatKeyValue("Source encoding", event.sourceEncoding));
10280
+ console.log(formatKeyValue("Files queued", event.totalFiles));
10281
+ return;
10282
+ }
10283
+ if (event.kind === "file_registered") {
10284
+ console.log(
10285
+ `${theme.infoLabel("POSTGRES")} ${event.fileIndex}/${event.totalFiles} ${event.dataset} registered (${formatBytes(event.fileSize)}).`
10286
+ );
10287
+ return;
10288
+ }
10289
+ if (event.kind === "finish") {
10290
+ console.log(
10291
+ theme.successLabel("POSTGRES"),
10292
+ `Generated direct import script for ${event.totalFiles} file(s) (${formatBytes(event.totalBytes)}).`
10293
+ );
10294
+ console.log(formatKeyValue("Output path", event.outputPath));
10295
+ console.log(formatKeyValue("Script path", event.scriptPath));
10296
+ }
10297
+ };
10298
+ }
9279
10299
 
9280
10300
  // src/cli/ui/output/quarantine.ts
9281
10301
  function printAppliedFilters(summaryFilters) {
@@ -10164,8 +11184,116 @@ function registerQuarantineCommands(program) {
10164
11184
  });
10165
11185
  }
10166
11186
 
11187
+ // src/cli/commands/register-postgres.ts
11188
+ function registerPostgresCommands(program) {
11189
+ const postgres = program.command("postgres").description(
11190
+ "PostgreSQL-oriented helpers for hybrid loading and database operations."
11191
+ );
11192
+ postgres.command("generate-script").argument(
11193
+ "<input>",
11194
+ "Path to the sanitized dataset directory generated by cnpj-db-loader sanitize."
11195
+ ).option(
11196
+ "--output <path>",
11197
+ "Custom output directory for the generated psql script and manifest."
11198
+ ).option(
11199
+ "--dataset <dataset>",
11200
+ "Generate a script only for one dataset block, for example establishments or companies."
11201
+ ).option(
11202
+ "--script-name <name>",
11203
+ "Generated psql script file name. Defaults to import-postgres-direct.sql."
11204
+ ).option(
11205
+ "--source-encoding <encoding>",
11206
+ "PostgreSQL client encoding used while reading sanitized Receita files. Defaults to WIN1252."
11207
+ ).option("-f, --force", "Skip the confirmation prompt.").description(
11208
+ "Generate a direct psql import script that loads sanitized Receita files without rewriting them into new CSV files."
11209
+ ).action(
11210
+ async (input2, options) => {
11211
+ if (!options.force) {
11212
+ const confirmed = await confirm(
11213
+ `Generate a direct PostgreSQL psql import script from ${input2}? This command does not rewrite the source files; it creates only a SQL script and manifest.`
11214
+ );
11215
+ if (!confirmed) {
11216
+ console.log("PostgreSQL direct script generation cancelled.");
11217
+ return;
11218
+ }
11219
+ }
11220
+ const progress = createPostgresDirectScriptProgressReporter();
11221
+ const generateOptions = {
11222
+ onProgress: progress
11223
+ };
11224
+ if (options.output) {
11225
+ generateOptions.outputPath = options.output;
11226
+ }
11227
+ if (options.dataset) {
11228
+ generateOptions.dataset = options.dataset;
11229
+ }
11230
+ if (options.scriptName) {
11231
+ generateOptions.scriptName = options.scriptName;
11232
+ }
11233
+ if (options.sourceEncoding) {
11234
+ generateOptions.sourceEncoding = options.sourceEncoding;
11235
+ }
11236
+ const summary = await generatePostgresDirectScript(
11237
+ input2,
11238
+ generateOptions
11239
+ );
11240
+ const logFilePath = await writeCommandLog(
11241
+ "postgres-generate-script",
11242
+ summary
11243
+ );
11244
+ printPostgresDirectScriptSummary(summary, logFilePath);
11245
+ }
11246
+ );
11247
+ postgres.command("export-csv").argument(
11248
+ "<input>",
11249
+ "Path to the sanitized or validated extracted dataset directory."
11250
+ ).option(
11251
+ "--output <path>",
11252
+ "Custom output directory for PostgreSQL-ready CSV files."
11253
+ ).option(
11254
+ "--dataset <dataset>",
11255
+ "Export only one dataset block, for example establishments or companies."
11256
+ ).option(
11257
+ "--script-name <name>",
11258
+ "Generated psql script file name. Defaults to import-postgres-direct.sql."
11259
+ ).option("-f, --force", "Skip the confirmation prompt.").description(
11260
+ "Convert sanitized Receita files into real PostgreSQL-ready CSV files and generate a direct psql import script."
11261
+ ).action(
11262
+ async (input2, options) => {
11263
+ if (!options.force) {
11264
+ const confirmed = await confirm(
11265
+ `Export PostgreSQL-ready CSV files from ${input2}? This command creates normalized CSV files and a generated psql import script.`
11266
+ );
11267
+ if (!confirmed) {
11268
+ console.log("PostgreSQL CSV export cancelled.");
11269
+ return;
11270
+ }
11271
+ }
11272
+ const progress = createPostgresCsvExportProgressReporter();
11273
+ const exportOptions = {
11274
+ onProgress: progress
11275
+ };
11276
+ if (options.output) {
11277
+ exportOptions.outputPath = options.output;
11278
+ }
11279
+ if (options.dataset) {
11280
+ exportOptions.dataset = options.dataset;
11281
+ }
11282
+ if (options.scriptName) {
11283
+ exportOptions.scriptName = options.scriptName;
11284
+ }
11285
+ const summary = await exportPostgresCsvDataset(input2, exportOptions);
11286
+ const logFilePath = await writeCommandLog(
11287
+ "postgres-export-csv",
11288
+ summary
11289
+ );
11290
+ printPostgresCsvExportSummary(summary, logFilePath);
11291
+ }
11292
+ );
11293
+ }
11294
+
10167
11295
  // src/cli/commands/register-schema.ts
10168
- import path16 from "path";
11296
+ import path19 from "path";
10169
11297
  function ensureSqlExtension(fileName) {
10170
11298
  return fileName.toLowerCase().endsWith(".sql") ? fileName : `${fileName}.sql`;
10171
11299
  }
@@ -10183,7 +11311,7 @@ function resolveSchemaOutputPath(profile, name, output2) {
10183
11311
  const fileName = ensureSqlExtension(
10184
11312
  name?.trim() || getDefaultSchemaBaseName(profile)
10185
11313
  );
10186
- return path16.resolve(output2 ?? process.cwd(), fileName);
11314
+ return path19.resolve(output2 ?? process.cwd(), fileName);
10187
11315
  }
10188
11316
  function registerSchemaCommands(program) {
10189
11317
  const schema = program.command("schema").description(
@@ -10270,12 +11398,25 @@ function registerValidateCommands(program) {
10270
11398
  }
10271
11399
 
10272
11400
  // src/cli/shared/app-config.ts
10273
- import { readFileSync } from "fs";
11401
+ import { existsSync, readFileSync } from "fs";
10274
11402
  import { dirname as dirname2, resolve } from "path";
10275
11403
  import { fileURLToPath } from "url";
11404
+ function findPackageJsonPath() {
11405
+ let currentDir = dirname2(fileURLToPath(import.meta.url));
11406
+ for (let depth = 0; depth < 6; depth += 1) {
11407
+ const candidatePath = resolve(currentDir, "package.json");
11408
+ if (existsSync(candidatePath)) {
11409
+ return candidatePath;
11410
+ }
11411
+ currentDir = resolve(currentDir, "..");
11412
+ }
11413
+ return null;
11414
+ }
10276
11415
  function getPackageVersion() {
10277
- const currentDir = dirname2(fileURLToPath(import.meta.url));
10278
- const packageJsonPath = resolve(currentDir, "../package.json");
11416
+ const packageJsonPath = findPackageJsonPath();
11417
+ if (!packageJsonPath) {
11418
+ return "0.0.0";
11419
+ }
10279
11420
  const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf-8"));
10280
11421
  return packageJson.version ?? "0.0.0";
10281
11422
  }
@@ -10302,9 +11443,14 @@ ${theme.section("Recommended flow")}
10302
11443
  ${theme.command("cnpj-db-loader database cleanup staging --force")}
10303
11444
  ${theme.command("cnpj-db-loader import ./downloads/<reference>/sanitized")}
10304
11445
 
11446
+ ${theme.section("Hybrid PostgreSQL path")}
11447
+ ${theme.command("cnpj-db-loader postgres generate-script ./downloads/<reference>/sanitized --output ./downloads/<reference>/postgres-direct --force")}
11448
+ ${theme.command('psql "postgres://user:password@localhost:5432/cnpj" -f ./downloads/<reference>/postgres-direct/import-postgres-direct.sql')}
11449
+
10305
11450
  ${theme.section("Notes")}
10306
11451
  ${theme.muted("Use federal-revenue when you want the CLI to check/download the remote monthly dataset first. Use inspect first when you already have local files.")}
10307
11452
  ${theme.muted("Generate the schema only when you need to create the database structure. Sanitization is the recommended preparation step before recurring imports.")}
11453
+ ${theme.muted("Use postgres generate-script when you want PostgreSQL to run the heavy bulk load directly from sanitized Receita files through a generated psql script.")}
10308
11454
  ${theme.muted("JSON execution logs are written inside the user home directory at ~/.cnpjdbloader/logs with structured level/event metadata.")}
10309
11455
  `;
10310
11456
  }
@@ -10321,6 +11467,7 @@ function buildProgram() {
10321
11467
  registerSchemaCommands(program);
10322
11468
  registerFederalRevenueCommands(program);
10323
11469
  registerDatabaseCommands(program);
11470
+ registerPostgresCommands(program);
10324
11471
  registerImportCommands(program);
10325
11472
  registerQuarantineCommands(program);
10326
11473
  registerDoctorCommands(program);