@danielarndt0/cnpj-db-loader 2.4.0-beta.2 → 2.4.0-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -87,6 +87,222 @@ function getConfigFilePath() {
87
87
  return path2.join(os2.homedir(), ".config", "cnpj-db-loader", "config.json");
88
88
  }
89
89
 
90
+ // src/services/federal-revenue/client.ts
91
+ var DEFAULT_FEDERAL_REVENUE_WEBDAV_URL = "https://arquivos.receitafederal.gov.br/public.php/webdav";
92
+ var DEFAULT_FEDERAL_REVENUE_USER_AGENT = "cnpj-db-loader federal-revenue-client";
93
+ var REFERENCE_PATTERN = /^\d{4}-\d{2}$/;
94
+ function trimTrailingSlash(value) {
95
+ return value.replace(/\/+$/g, "");
96
+ }
97
+ function normalizeBaseUrl(value) {
98
+ return trimTrailingSlash(value ?? DEFAULT_FEDERAL_REVENUE_WEBDAV_URL);
99
+ }
100
+ function getShareToken(value) {
101
+ const shareToken = value?.trim();
102
+ if (!shareToken) {
103
+ throw new ValidationError(
104
+ "Federal Revenue public share token is not configured. Run `cnpj-db-loader federal-revenue config set share-token <token>` or pass --share-token."
105
+ );
106
+ }
107
+ return shareToken;
108
+ }
109
+ function encodePathSegment(value) {
110
+ return encodeURIComponent(value).replace(/%2F/gi, "/");
111
+ }
112
+ function decodeXml(value) {
113
+ return value.replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, '"').replace(/&#39;/g, "'");
114
+ }
115
+ function decodeHrefSegment(value) {
116
+ try {
117
+ return decodeURIComponent(value);
118
+ } catch {
119
+ return value;
120
+ }
121
+ }
122
+ function getAuthHeader(shareToken) {
123
+ return `Basic ${Buffer.from(`${shareToken}:`).toString("base64")}`;
124
+ }
125
+ function buildUrl(baseUrl, segments = []) {
126
+ if (segments.length === 0) {
127
+ return `${baseUrl}/`;
128
+ }
129
+ return `${baseUrl}/${segments.map(encodePathSegment).join("/")}`;
130
+ }
131
+ function extractFirst(block, tagName) {
132
+ const pattern = new RegExp(
133
+ `<(?:[a-zA-Z0-9_-]+:)?${tagName}\\b[^>]*>([\\s\\S]*?)<\\/(?:[a-zA-Z0-9_-]+:)?${tagName}>`,
134
+ "i"
135
+ );
136
+ const match = block.match(pattern);
137
+ return match?.[1] ? decodeXml(match[1].trim()) : void 0;
138
+ }
139
+ function isCollectionResponse(block) {
140
+ return /<(?:[a-zA-Z0-9_-]+:)?collection\b/i.test(block);
141
+ }
142
+ function getNameFromHref(href) {
143
+ const cleanHref = href.split("?")[0] ?? href;
144
+ const withoutTrailingSlash = cleanHref.replace(/\/+$/g, "");
145
+ const rawName = withoutTrailingSlash.split("/").pop() ?? withoutTrailingSlash;
146
+ return decodeHrefSegment(rawName);
147
+ }
148
+ function parsePropfindXml(xml) {
149
+ const responseBlocks = xml.match(
150
+ /<(?:[a-zA-Z0-9_-]+:)?response\b[\s\S]*?<\/(?:[a-zA-Z0-9_-]+:)?response>/gi
151
+ );
152
+ if (!responseBlocks) {
153
+ return [];
154
+ }
155
+ return responseBlocks.map((block) => {
156
+ const href = extractFirst(block, "href");
157
+ if (!href) {
158
+ return void 0;
159
+ }
160
+ const size = extractFirst(block, "getcontentlength");
161
+ const parsedSize = size ? Number.parseInt(size, 10) : void 0;
162
+ const lastModified = extractFirst(block, "getlastmodified");
163
+ const etag = extractFirst(block, "getetag");
164
+ return {
165
+ href,
166
+ name: getNameFromHref(href),
167
+ isCollection: isCollectionResponse(block),
168
+ ...Number.isFinite(parsedSize) ? { sizeInBytes: parsedSize } : {},
169
+ ...lastModified ? { lastModified } : {},
170
+ ...etag ? { etag } : {}
171
+ };
172
+ }).filter((entry) => entry !== void 0);
173
+ }
174
+ async function propfind(pathSegments, options = {}) {
175
+ const baseUrl = normalizeBaseUrl(options.baseUrl);
176
+ const shareToken = getShareToken(options.shareToken);
177
+ let response;
178
+ try {
179
+ response = await fetch(buildUrl(baseUrl, pathSegments), {
180
+ method: "PROPFIND",
181
+ headers: {
182
+ Accept: "application/xml,text/xml,*/*",
183
+ Authorization: getAuthHeader(shareToken),
184
+ Depth: "1",
185
+ "User-Agent": options.userAgent ?? DEFAULT_FEDERAL_REVENUE_USER_AGENT
186
+ }
187
+ });
188
+ } catch (error) {
189
+ throw new ValidationError(
190
+ `Federal Revenue WebDAV request failed before receiving a response: ${error instanceof Error ? error.message : String(error)}.`,
191
+ { baseUrl, pathSegments }
192
+ );
193
+ }
194
+ if (!response.ok) {
195
+ throw new ValidationError(
196
+ `Federal Revenue WebDAV request failed with status ${response.status} ${response.statusText}.`,
197
+ { status: response.status, statusText: response.statusText }
198
+ );
199
+ }
200
+ const xml = await response.text();
201
+ return {
202
+ entries: parsePropfindXml(xml),
203
+ baseUrl,
204
+ shareToken
205
+ };
206
+ }
207
+ function validateFederalRevenueReference(reference) {
208
+ if (!REFERENCE_PATTERN.test(reference)) {
209
+ throw new ValidationError(
210
+ `Federal Revenue reference is invalid: ${reference}. Expected YYYY-MM.`
211
+ );
212
+ }
213
+ }
214
+ function getCurrentFederalRevenueReference(date = /* @__PURE__ */ new Date()) {
215
+ const year = date.getFullYear();
216
+ const month = String(date.getMonth() + 1).padStart(2, "0");
217
+ return `${year}-${month}`;
218
+ }
219
+ async function listFederalRevenueReferences(options = {}) {
220
+ const result = await propfind([], options);
221
+ const references = result.entries.filter((entry) => entry.isCollection && REFERENCE_PATTERN.test(entry.name)).map((entry) => ({
222
+ reference: entry.name,
223
+ href: entry.href
224
+ })).sort((left, right) => left.reference.localeCompare(right.reference));
225
+ return {
226
+ references,
227
+ remoteBaseUrl: result.baseUrl
228
+ };
229
+ }
230
+ async function resolveFederalRevenueReference(input = {}) {
231
+ const { references } = await listFederalRevenueReferences(input);
232
+ const availableReferences = references.map((item) => item.reference);
233
+ const latest = availableReferences.at(-1);
234
+ if (!latest) {
235
+ throw new ValidationError(
236
+ "Federal Revenue reference discovery failed: no monthly references were found in the public share."
237
+ );
238
+ }
239
+ if (input.reference) {
240
+ validateFederalRevenueReference(input.reference);
241
+ if (!availableReferences.includes(input.reference)) {
242
+ throw new ValidationError(
243
+ `Federal Revenue reference not found: ${input.reference}. Latest available reference is ${latest}.`,
244
+ {
245
+ requestedReference: input.reference,
246
+ latestAvailableReference: latest,
247
+ availableReferences
248
+ }
249
+ );
250
+ }
251
+ return {
252
+ mode: "explicit",
253
+ selectedReference: input.reference,
254
+ availableReferences
255
+ };
256
+ }
257
+ if (input.current) {
258
+ const currentReference = getCurrentFederalRevenueReference();
259
+ if (!availableReferences.includes(currentReference)) {
260
+ throw new ValidationError(
261
+ `Federal Revenue current reference is not available yet: ${currentReference}. Latest available reference is ${latest}.`,
262
+ {
263
+ requestedReference: currentReference,
264
+ latestAvailableReference: latest,
265
+ availableReferences
266
+ }
267
+ );
268
+ }
269
+ return {
270
+ mode: "current",
271
+ selectedReference: currentReference,
272
+ availableReferences
273
+ };
274
+ }
275
+ return {
276
+ mode: "latest",
277
+ selectedReference: latest,
278
+ availableReferences
279
+ };
280
+ }
281
+ async function listFederalRevenueFiles(reference, options = {}) {
282
+ validateFederalRevenueReference(reference);
283
+ const result = await propfind([reference], options);
284
+ const files = result.entries.filter(
285
+ (entry) => !entry.isCollection && entry.name.toLowerCase().endsWith(".zip")
286
+ ).map((entry) => ({
287
+ name: entry.name,
288
+ href: entry.href,
289
+ downloadUrl: buildUrl(result.baseUrl, [reference, entry.name]),
290
+ ...entry.sizeInBytes !== void 0 ? { sizeInBytes: entry.sizeInBytes } : {},
291
+ ...entry.lastModified ? { lastModified: entry.lastModified } : {},
292
+ ...entry.etag ? { etag: entry.etag } : {}
293
+ })).sort((left, right) => left.name.localeCompare(right.name));
294
+ return {
295
+ files,
296
+ remoteBaseUrl: result.baseUrl
297
+ };
298
+ }
299
+ function buildFederalRevenueDownloadHeaders(options = {}) {
300
+ return {
301
+ Authorization: getAuthHeader(getShareToken(options.shareToken)),
302
+ "User-Agent": options.userAgent ?? DEFAULT_FEDERAL_REVENUE_USER_AGENT
303
+ };
304
+ }
305
+
90
306
  // src/services/config.service.ts
91
307
  async function readDatabaseConfig() {
92
308
  const raw = await safeReadText(getConfigFilePath());
@@ -114,12 +330,149 @@ function assertPostgresUrl(url) {
114
330
  );
115
331
  }
116
332
  }
333
+ function assertHttpUrl(url, label) {
334
+ let parsed;
335
+ try {
336
+ parsed = new URL(url);
337
+ } catch {
338
+ throw new ValidationError(`${label} is not a valid URL.`, { url });
339
+ }
340
+ if (!["http:", "https:"].includes(parsed.protocol)) {
341
+ throw new ValidationError(`${label} must use the http or https protocol.`, {
342
+ url
343
+ });
344
+ }
345
+ }
346
+ function assertNonEmpty(value, label) {
347
+ const trimmed = value.trim();
348
+ if (!trimmed) {
349
+ throw new ValidationError(`${label} cannot be empty.`);
350
+ }
351
+ return trimmed;
352
+ }
353
+ function normalizeFederalRevenueConfigKey(key) {
354
+ const normalized = key.trim().toLowerCase();
355
+ if (["share-token", "share_token", "token"].includes(normalized)) {
356
+ return "share-token";
357
+ }
358
+ if (["webdav-url", "webdav_url", "base-url", "base_url", "url"].includes(
359
+ normalized
360
+ )) {
361
+ return "webdav-url";
362
+ }
363
+ if (["user-agent", "user_agent"].includes(normalized)) {
364
+ return "user-agent";
365
+ }
366
+ throw new ValidationError(
367
+ `Unknown Federal Revenue config key: ${key}. Expected share-token, webdav-url, or user-agent.`
368
+ );
369
+ }
370
+ function assignFederalRevenueConfigValue(config, key, value) {
371
+ if (key === "share-token") {
372
+ return {
373
+ ...config,
374
+ shareToken: assertNonEmpty(value, "Federal Revenue share token")
375
+ };
376
+ }
377
+ if (key === "webdav-url") {
378
+ const webdavUrl = assertNonEmpty(value, "Federal Revenue WebDAV URL");
379
+ assertHttpUrl(webdavUrl, "Federal Revenue WebDAV URL");
380
+ return { ...config, webdavUrl };
381
+ }
382
+ return {
383
+ ...config,
384
+ userAgent: assertNonEmpty(value, "Federal Revenue user agent")
385
+ };
386
+ }
387
+ function deleteFederalRevenueConfigValue(config, key) {
388
+ const nextConfig = { ...config };
389
+ if (key === "share-token") {
390
+ delete nextConfig.shareToken;
391
+ }
392
+ if (key === "webdav-url") {
393
+ delete nextConfig.webdavUrl;
394
+ }
395
+ if (key === "user-agent") {
396
+ delete nextConfig.userAgent;
397
+ }
398
+ return nextConfig;
399
+ }
400
+ function isFederalRevenueConfigEmpty(config) {
401
+ return !config.shareToken && !config.webdavUrl && !config.userAgent;
402
+ }
117
403
  async function setDefaultDbUrl(url) {
118
404
  assertPostgresUrl(url);
119
- await writeDatabaseConfig({ defaultDbUrl: url });
405
+ const currentConfig = await readDatabaseConfig();
406
+ await writeDatabaseConfig({ ...currentConfig, defaultDbUrl: url });
120
407
  }
121
408
  async function resetDefaultDbUrl() {
122
- await writeDatabaseConfig({});
409
+ const currentConfig = await readDatabaseConfig();
410
+ const nextConfig = { ...currentConfig };
411
+ delete nextConfig.defaultDbUrl;
412
+ await writeDatabaseConfig(nextConfig);
413
+ }
414
+ async function setFederalRevenueConfigValue(key, value) {
415
+ const normalizedKey = normalizeFederalRevenueConfigKey(key);
416
+ const currentConfig = await readDatabaseConfig();
417
+ const federalRevenueConfig = assignFederalRevenueConfigValue(
418
+ currentConfig.federalRevenue ?? {},
419
+ normalizedKey,
420
+ value
421
+ );
422
+ await writeDatabaseConfig({
423
+ ...currentConfig,
424
+ federalRevenue: federalRevenueConfig
425
+ });
426
+ return getFederalRevenueEffectiveConfig(federalRevenueConfig);
427
+ }
428
+ async function resetFederalRevenueConfig(key) {
429
+ const currentConfig = await readDatabaseConfig();
430
+ if (!key) {
431
+ const nextConfig2 = { ...currentConfig };
432
+ delete nextConfig2.federalRevenue;
433
+ await writeDatabaseConfig(nextConfig2);
434
+ return getFederalRevenueEffectiveConfig({});
435
+ }
436
+ const normalizedKey = normalizeFederalRevenueConfigKey(key);
437
+ const federalRevenueConfig = deleteFederalRevenueConfigValue(
438
+ currentConfig.federalRevenue ?? {},
439
+ normalizedKey
440
+ );
441
+ const nextConfig = { ...currentConfig };
442
+ if (isFederalRevenueConfigEmpty(federalRevenueConfig)) {
443
+ delete nextConfig.federalRevenue;
444
+ } else {
445
+ nextConfig.federalRevenue = federalRevenueConfig;
446
+ }
447
+ await writeDatabaseConfig(nextConfig);
448
+ return getFederalRevenueEffectiveConfig(federalRevenueConfig);
449
+ }
450
+ function getFederalRevenueEffectiveConfig(config = {}) {
451
+ return {
452
+ webdavUrl: config.webdavUrl ?? DEFAULT_FEDERAL_REVENUE_WEBDAV_URL,
453
+ userAgent: config.userAgent ?? DEFAULT_FEDERAL_REVENUE_USER_AGENT,
454
+ ...config.shareToken ? { shareToken: config.shareToken } : {},
455
+ configured: {
456
+ webdavUrl: Boolean(config.webdavUrl),
457
+ userAgent: Boolean(config.userAgent),
458
+ shareToken: Boolean(config.shareToken)
459
+ }
460
+ };
461
+ }
462
+ async function readFederalRevenueEffectiveConfig() {
463
+ const currentConfig = await readDatabaseConfig();
464
+ return getFederalRevenueEffectiveConfig(currentConfig.federalRevenue ?? {});
465
+ }
466
+ async function resolveFederalRevenueClientOptions(overrides = {}) {
467
+ const currentConfig = await readDatabaseConfig();
468
+ const effectiveConfig = getFederalRevenueEffectiveConfig(
469
+ currentConfig.federalRevenue ?? {}
470
+ );
471
+ return {
472
+ baseUrl: overrides.baseUrl ?? effectiveConfig.webdavUrl,
473
+ shareToken: overrides.shareToken ?? effectiveConfig.shareToken,
474
+ userAgent: overrides.userAgent ?? effectiveConfig.userAgent
475
+ };
123
476
  }
124
477
 
125
478
  // src/services/database.service.ts
@@ -6717,217 +7070,6 @@ async function showQuarantineRow(id, options) {
6717
7070
  return record;
6718
7071
  }
6719
7072
 
6720
- // src/services/federal-revenue/client.ts
6721
- var DEFAULT_FEDERAL_REVENUE_SHARE_TOKEN = "YggdBLfdninEJX9";
6722
- var DEFAULT_FEDERAL_REVENUE_WEBDAV_URL = "https://arquivos.receitafederal.gov.br/public.php/webdav";
6723
- var DEFAULT_FEDERAL_REVENUE_USER_AGENT = "cnpj-db-loader federal-revenue-client";
6724
- var REFERENCE_PATTERN = /^\d{4}-\d{2}$/;
6725
- function trimTrailingSlash(value) {
6726
- return value.replace(/\/+$/g, "");
6727
- }
6728
- function normalizeBaseUrl(value) {
6729
- return trimTrailingSlash(value ?? DEFAULT_FEDERAL_REVENUE_WEBDAV_URL);
6730
- }
6731
- function getShareToken(value) {
6732
- return value ?? DEFAULT_FEDERAL_REVENUE_SHARE_TOKEN;
6733
- }
6734
- function encodePathSegment(value) {
6735
- return encodeURIComponent(value).replace(/%2F/gi, "/");
6736
- }
6737
- function decodeXml(value) {
6738
- return value.replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, '"').replace(/&#39;/g, "'");
6739
- }
6740
- function decodeHrefSegment(value) {
6741
- try {
6742
- return decodeURIComponent(value);
6743
- } catch {
6744
- return value;
6745
- }
6746
- }
6747
- function getAuthHeader(shareToken) {
6748
- return `Basic ${Buffer.from(`${shareToken}:`).toString("base64")}`;
6749
- }
6750
- function buildUrl(baseUrl, segments = []) {
6751
- if (segments.length === 0) {
6752
- return `${baseUrl}/`;
6753
- }
6754
- return `${baseUrl}/${segments.map(encodePathSegment).join("/")}`;
6755
- }
6756
- function extractFirst(block, tagName) {
6757
- const pattern = new RegExp(
6758
- `<(?:[a-zA-Z0-9_-]+:)?${tagName}\\b[^>]*>([\\s\\S]*?)<\\/(?:[a-zA-Z0-9_-]+:)?${tagName}>`,
6759
- "i"
6760
- );
6761
- const match = block.match(pattern);
6762
- return match?.[1] ? decodeXml(match[1].trim()) : void 0;
6763
- }
6764
- function isCollectionResponse(block) {
6765
- return /<(?:[a-zA-Z0-9_-]+:)?collection\b/i.test(block);
6766
- }
6767
- function getNameFromHref(href) {
6768
- const cleanHref = href.split("?")[0] ?? href;
6769
- const withoutTrailingSlash = cleanHref.replace(/\/+$/g, "");
6770
- const rawName = withoutTrailingSlash.split("/").pop() ?? withoutTrailingSlash;
6771
- return decodeHrefSegment(rawName);
6772
- }
6773
- function parsePropfindXml(xml) {
6774
- const responseBlocks = xml.match(
6775
- /<(?:[a-zA-Z0-9_-]+:)?response\b[\s\S]*?<\/(?:[a-zA-Z0-9_-]+:)?response>/gi
6776
- );
6777
- if (!responseBlocks) {
6778
- return [];
6779
- }
6780
- return responseBlocks.map((block) => {
6781
- const href = extractFirst(block, "href");
6782
- if (!href) {
6783
- return void 0;
6784
- }
6785
- const size = extractFirst(block, "getcontentlength");
6786
- const parsedSize = size ? Number.parseInt(size, 10) : void 0;
6787
- const lastModified = extractFirst(block, "getlastmodified");
6788
- const etag = extractFirst(block, "getetag");
6789
- return {
6790
- href,
6791
- name: getNameFromHref(href),
6792
- isCollection: isCollectionResponse(block),
6793
- ...Number.isFinite(parsedSize) ? { sizeInBytes: parsedSize } : {},
6794
- ...lastModified ? { lastModified } : {},
6795
- ...etag ? { etag } : {}
6796
- };
6797
- }).filter((entry) => entry !== void 0);
6798
- }
6799
- async function propfind(pathSegments, options = {}) {
6800
- const baseUrl = normalizeBaseUrl(options.baseUrl);
6801
- const shareToken = getShareToken(options.shareToken);
6802
- let response;
6803
- try {
6804
- response = await fetch(buildUrl(baseUrl, pathSegments), {
6805
- method: "PROPFIND",
6806
- headers: {
6807
- Accept: "application/xml,text/xml,*/*",
6808
- Authorization: getAuthHeader(shareToken),
6809
- Depth: "1",
6810
- "User-Agent": options.userAgent ?? DEFAULT_FEDERAL_REVENUE_USER_AGENT
6811
- }
6812
- });
6813
- } catch (error) {
6814
- throw new ValidationError(
6815
- `Federal Revenue WebDAV request failed before receiving a response: ${error instanceof Error ? error.message : String(error)}.`,
6816
- { baseUrl, pathSegments }
6817
- );
6818
- }
6819
- if (!response.ok) {
6820
- throw new ValidationError(
6821
- `Federal Revenue WebDAV request failed with status ${response.status} ${response.statusText}.`,
6822
- { status: response.status, statusText: response.statusText }
6823
- );
6824
- }
6825
- const xml = await response.text();
6826
- return {
6827
- entries: parsePropfindXml(xml),
6828
- baseUrl,
6829
- shareToken
6830
- };
6831
- }
6832
- function validateFederalRevenueReference(reference) {
6833
- if (!REFERENCE_PATTERN.test(reference)) {
6834
- throw new ValidationError(
6835
- `Federal Revenue reference is invalid: ${reference}. Expected YYYY-MM.`
6836
- );
6837
- }
6838
- }
6839
- function getCurrentFederalRevenueReference(date = /* @__PURE__ */ new Date()) {
6840
- const year = date.getFullYear();
6841
- const month = String(date.getMonth() + 1).padStart(2, "0");
6842
- return `${year}-${month}`;
6843
- }
6844
- async function listFederalRevenueReferences(options = {}) {
6845
- const result = await propfind([], options);
6846
- const references = result.entries.filter((entry) => entry.isCollection && REFERENCE_PATTERN.test(entry.name)).map((entry) => ({
6847
- reference: entry.name,
6848
- href: entry.href
6849
- })).sort((left, right) => left.reference.localeCompare(right.reference));
6850
- return {
6851
- references,
6852
- remoteBaseUrl: result.baseUrl
6853
- };
6854
- }
6855
- async function resolveFederalRevenueReference(input = {}) {
6856
- const { references } = await listFederalRevenueReferences(input);
6857
- const availableReferences = references.map((item) => item.reference);
6858
- const latest = availableReferences.at(-1);
6859
- if (!latest) {
6860
- throw new ValidationError(
6861
- "Federal Revenue reference discovery failed: no monthly references were found in the public share."
6862
- );
6863
- }
6864
- if (input.reference) {
6865
- validateFederalRevenueReference(input.reference);
6866
- if (!availableReferences.includes(input.reference)) {
6867
- throw new ValidationError(
6868
- `Federal Revenue reference not found: ${input.reference}. Latest available reference is ${latest}.`,
6869
- {
6870
- requestedReference: input.reference,
6871
- latestAvailableReference: latest,
6872
- availableReferences
6873
- }
6874
- );
6875
- }
6876
- return {
6877
- mode: "explicit",
6878
- selectedReference: input.reference,
6879
- availableReferences
6880
- };
6881
- }
6882
- if (input.current) {
6883
- const currentReference = getCurrentFederalRevenueReference();
6884
- if (!availableReferences.includes(currentReference)) {
6885
- throw new ValidationError(
6886
- `Federal Revenue current reference is not available yet: ${currentReference}. Latest available reference is ${latest}.`,
6887
- {
6888
- requestedReference: currentReference,
6889
- latestAvailableReference: latest,
6890
- availableReferences
6891
- }
6892
- );
6893
- }
6894
- return {
6895
- mode: "current",
6896
- selectedReference: currentReference,
6897
- availableReferences
6898
- };
6899
- }
6900
- return {
6901
- mode: "latest",
6902
- selectedReference: latest,
6903
- availableReferences
6904
- };
6905
- }
6906
- async function listFederalRevenueFiles(reference, options = {}) {
6907
- validateFederalRevenueReference(reference);
6908
- const result = await propfind([reference], options);
6909
- const files = result.entries.filter(
6910
- (entry) => !entry.isCollection && entry.name.toLowerCase().endsWith(".zip")
6911
- ).map((entry) => ({
6912
- name: entry.name,
6913
- href: entry.href,
6914
- downloadUrl: buildUrl(result.baseUrl, [reference, entry.name]),
6915
- ...entry.sizeInBytes !== void 0 ? { sizeInBytes: entry.sizeInBytes } : {},
6916
- ...entry.lastModified ? { lastModified: entry.lastModified } : {},
6917
- ...entry.etag ? { etag: entry.etag } : {}
6918
- })).sort((left, right) => left.name.localeCompare(right.name));
6919
- return {
6920
- files,
6921
- remoteBaseUrl: result.baseUrl
6922
- };
6923
- }
6924
- function buildFederalRevenueDownloadHeaders(options = {}) {
6925
- return {
6926
- Authorization: getAuthHeader(getShareToken(options.shareToken)),
6927
- "User-Agent": options.userAgent ?? DEFAULT_FEDERAL_REVENUE_USER_AGENT
6928
- };
6929
- }
6930
-
6931
7073
  // src/services/federal-revenue/download.ts
6932
7074
  import { createWriteStream } from "fs";
6933
7075
  import { mkdir as mkdir5, rename, stat as stat5, unlink } from "fs/promises";
@@ -8411,6 +8553,18 @@ var STAGING_TABLE_BY_DATASET3 = {
8411
8553
  partners: "staging_partners",
8412
8554
  simples_options: "staging_simples_options"
8413
8555
  };
8556
+ var STEP_ORDER = [
8557
+ "setup",
8558
+ "load-domains",
8559
+ "load-companies",
8560
+ "load-establishments",
8561
+ "load-partners",
8562
+ "load-simples",
8563
+ "materialize",
8564
+ "materialize-secondary-cnaes",
8565
+ "indexes",
8566
+ "analyze"
8567
+ ];
8414
8568
  function quoteSqlLiteral(value) {
8415
8569
  return `'${value.replace(/'/g, "''")}'`;
8416
8570
  }
@@ -8428,6 +8582,9 @@ function receitaCopyCommand(tableName, columns, filePath) {
8428
8582
  const normalizedFilePath = normalizePathForPsql(filePath);
8429
8583
  return `\\copy ${tableName} (${columns.join(", ")}) from ${quoteSqlLiteral(normalizedFilePath)} with (format csv, header false, delimiter ';', quote '"', escape '"')`;
8430
8584
  }
8585
+ function echo(message) {
8586
+ return `\\echo ${quoteSqlLiteral(message)}`;
8587
+ }
8431
8588
  function datasetColumns(dataset) {
8432
8589
  return DATASET_LAYOUTS[dataset].fields.map((field) => field.columnName);
8433
8590
  }
@@ -8454,7 +8611,7 @@ function partnerDedupeExpression(alias) {
8454
8611
  function materializeCompaniesSql() {
8455
8612
  const columns = companiesLayout.fields.map((field) => field.columnName);
8456
8613
  return [
8457
- "\\echo 'Materializing companies...'",
8614
+ echo("[materialize] Materializing companies..."),
8458
8615
  "with source as (",
8459
8616
  " select",
8460
8617
  ` ${columns.map((column) => `source.${column}`).join(",\n ")},`,
@@ -8468,7 +8625,8 @@ function materializeCompaniesSql() {
8468
8625
  `select ${columns.join(", ")}`,
8469
8626
  "from deduped",
8470
8627
  "on conflict (cnpj_root) do update set",
8471
- ` ${updateAssignments(columns, ["cnpj_root"])};`
8628
+ ` ${updateAssignments(columns, ["cnpj_root"])};`,
8629
+ echo("[materialize] Companies materialization completed.")
8472
8630
  ].join("\n");
8473
8631
  }
8474
8632
  function materializeEstablishmentsSql() {
@@ -8477,7 +8635,7 @@ function materializeEstablishmentsSql() {
8477
8635
  );
8478
8636
  const insertColumns = [...baseColumns, "cnpj_full"];
8479
8637
  return [
8480
- "\\echo 'Materializing establishments and secondary CNAEs...'",
8638
+ echo("[materialize] Materializing establishments..."),
8481
8639
  "with source as (",
8482
8640
  " select",
8483
8641
  ` ${baseColumns.map((column) => `source.${column}`).join(",\n ")},`,
@@ -8487,14 +8645,29 @@ function materializeEstablishmentsSql() {
8487
8645
  "),",
8488
8646
  "deduped as (",
8489
8647
  " select * from source where dedupe_rank = 1",
8648
+ ")",
8649
+ `insert into establishments (${insertColumns.join(", ")})`,
8650
+ `select ${insertColumns.join(", ")}`,
8651
+ "from deduped",
8652
+ "on conflict (cnpj_full) do update set",
8653
+ ` ${updateAssignments(insertColumns, ["cnpj_root", "cnpj_order", "cnpj_check_digits", "cnpj_full"])};`,
8654
+ echo("[materialize] Establishments materialization completed.")
8655
+ ].join("\n");
8656
+ }
8657
+ function materializeSecondaryCnaesSql() {
8658
+ return [
8659
+ echo(
8660
+ "[materialize-secondary-cnaes] Materializing establishment secondary CNAEs..."
8661
+ ),
8662
+ "with source as (",
8663
+ " select",
8664
+ " staging.cnpj_root || staging.cnpj_order || staging.cnpj_check_digits as cnpj_full,",
8665
+ " staging.secondary_cnaes_raw,",
8666
+ " row_number() over (partition by staging.cnpj_root || staging.cnpj_order || staging.cnpj_check_digits order by staging.staging_id desc) as dedupe_rank",
8667
+ " from staging_establishments staging",
8490
8668
  "),",
8491
- "upserted as (",
8492
- ` insert into establishments (${insertColumns.join(", ")})`,
8493
- ` select ${insertColumns.join(", ")}`,
8494
- " from deduped",
8495
- " on conflict (cnpj_full) do update set",
8496
- ` ${updateAssignments(insertColumns, ["cnpj_root", "cnpj_order", "cnpj_check_digits", "cnpj_full"])}`,
8497
- " returning cnpj_full",
8669
+ "deduped as (",
8670
+ " select * from source where dedupe_rank = 1",
8498
8671
  "),",
8499
8672
  "deleted_secondary_cnaes as (",
8500
8673
  " delete from establishment_secondary_cnaes target",
@@ -8515,14 +8688,17 @@ function materializeEstablishmentsSql() {
8515
8688
  "insert into establishment_secondary_cnaes (cnpj_full, cnae_code)",
8516
8689
  "select cnpj_full, cnae_code",
8517
8690
  "from secondary_cnaes_source",
8518
- "on conflict (cnpj_full, cnae_code) do nothing;"
8691
+ "on conflict (cnpj_full, cnae_code) do nothing;",
8692
+ echo(
8693
+ "[materialize-secondary-cnaes] Secondary CNAEs materialization completed."
8694
+ )
8519
8695
  ].join("\n");
8520
8696
  }
8521
8697
  function materializePartnersSql() {
8522
8698
  const baseColumns = partnersLayout.fields.map((field) => field.columnName);
8523
8699
  const insertColumns = [...baseColumns, "partner_dedupe_key"];
8524
8700
  return [
8525
- "\\echo 'Materializing partners...'",
8701
+ echo("[materialize] Materializing partners..."),
8526
8702
  "with source as (",
8527
8703
  " select",
8528
8704
  ` ${baseColumns.map((column) => `source.${column}`).join(",\n ")},`,
@@ -8542,13 +8718,14 @@ function materializePartnersSql() {
8542
8718
  `select ${insertColumns.join(", ")}`,
8543
8719
  "from deduped",
8544
8720
  "on conflict (partner_dedupe_key) do update set",
8545
- ` ${updateAssignments(insertColumns, ["partner_dedupe_key"])};`
8721
+ ` ${updateAssignments(insertColumns, ["partner_dedupe_key"])};`,
8722
+ echo("[materialize] Partners materialization completed.")
8546
8723
  ].join("\n");
8547
8724
  }
8548
8725
  function materializeSimplesSql() {
8549
8726
  const columns = simplesLayout.fields.map((field) => field.columnName);
8550
8727
  return [
8551
- "\\echo 'Materializing simples options...'",
8728
+ echo("[materialize] Materializing simples options..."),
8552
8729
  "with source as (",
8553
8730
  " select",
8554
8731
  ` ${columns.map((column) => `source.${column}`).join(",\n ")},`,
@@ -8562,7 +8739,8 @@ function materializeSimplesSql() {
8562
8739
  `select ${columns.join(", ")}`,
8563
8740
  "from deduped",
8564
8741
  "on conflict (cnpj_root) do update set",
8565
- ` ${updateAssignments(columns, ["cnpj_root"])};`
8742
+ ` ${updateAssignments(columns, ["cnpj_root"])};`,
8743
+ echo("[materialize] Simples options materialization completed.")
8566
8744
  ].join("\n");
8567
8745
  }
8568
8746
  function copyDomainSql(dataset, files) {
@@ -8572,12 +8750,20 @@ function copyDomainSql(dataset, files) {
8572
8750
  const columns = datasetColumns(dataset);
8573
8751
  const tempTable = `tmp_hybrid_${dataset}`;
8574
8752
  const lines = [
8575
- `\\echo 'Loading ${dataset} lookup data...'`,
8753
+ echo(`[load-domains] Loading ${dataset} lookup data...`),
8576
8754
  `drop table if exists ${tempTable};`,
8577
8755
  `create temporary table ${tempTable} (code text, description text);`
8578
8756
  ];
8579
- for (const file of files) {
8580
- lines.push(csvCopyCommand(tempTable, columns, file.absolutePath));
8757
+ for (const [index, file] of files.entries()) {
8758
+ lines.push(
8759
+ echo(
8760
+ `[load-domains] Loading ${dataset} file ${index + 1} of ${files.length}: ${file.relativePath}`
8761
+ ),
8762
+ csvCopyCommand(tempTable, columns, file.absolutePath),
8763
+ echo(
8764
+ `[load-domains] Loaded ${dataset} file ${index + 1} of ${files.length}.`
8765
+ )
8766
+ );
8581
8767
  }
8582
8768
  lines.push(
8583
8769
  `insert into ${dataset} (${columns.join(", ")})`,
@@ -8598,12 +8784,17 @@ function copyStagingSql(dataset, files) {
8598
8784
  return [];
8599
8785
  }
8600
8786
  const columns = datasetColumns(dataset);
8601
- return [
8602
- `\\echo 'Loading ${dataset} staging data...'`,
8603
- ...files.map(
8604
- (file) => csvCopyCommand(tableName, columns, file.absolutePath)
8605
- )
8606
- ];
8787
+ const lines = [echo(`[load-${dataset}] Loading ${dataset} staging data...`)];
8788
+ for (const [index, file] of files.entries()) {
8789
+ lines.push(
8790
+ echo(
8791
+ `[load-${dataset}] Loading file ${index + 1} of ${files.length}: ${file.relativePath}`
8792
+ ),
8793
+ csvCopyCommand(tableName, columns, file.absolutePath),
8794
+ echo(`[load-${dataset}] Loaded file ${index + 1} of ${files.length}.`)
8795
+ );
8796
+ }
8797
+ return lines;
8607
8798
  }
8608
8799
  function csvFilesByDataset(files) {
8609
8800
  const grouped = {};
@@ -8629,7 +8820,9 @@ function rawTableName(dataset) {
8629
8820
  function createRawTempTableSql(dataset) {
8630
8821
  const columns = DATASET_LAYOUTS[dataset].fields.map((field) => ` ${quoteIdentifier(field.columnName)} text`).join(",\n");
8631
8822
  return [
8823
+ "set client_min_messages to warning;",
8632
8824
  `drop table if exists ${rawTableName(dataset)};`,
8825
+ "reset client_min_messages;",
8633
8826
  `create temporary table ${rawTableName(dataset)} (`,
8634
8827
  columns,
8635
8828
  ");"
@@ -8711,11 +8904,21 @@ function rawDomainSql(dataset, files) {
8711
8904
  const columns = layout.fields.map((field) => field.columnName);
8712
8905
  const tableName = rawTableName(dataset);
8713
8906
  const lines = [
8714
- `\\echo 'Loading ${dataset} lookup data directly from sanitized Receita files...'`,
8907
+ echo(
8908
+ `[load-domains] Loading ${dataset} lookup data directly from sanitized Receita files...`
8909
+ ),
8715
8910
  createRawTempTableSql(dataset)
8716
8911
  ];
8717
- for (const file of files) {
8718
- lines.push(receitaCopyCommand(tableName, columns, file.absolutePath));
8912
+ for (const [index, file] of files.entries()) {
8913
+ lines.push(
8914
+ echo(
8915
+ `[load-domains] Loading ${dataset} file ${index + 1} of ${files.length}: ${file.relativePath}`
8916
+ ),
8917
+ receitaCopyCommand(tableName, columns, file.absolutePath),
8918
+ echo(
8919
+ `[load-domains] Loaded ${dataset} file ${index + 1} of ${files.length}.`
8920
+ )
8921
+ );
8719
8922
  }
8720
8923
  lines.push(
8721
8924
  `insert into ${dataset} (${columns.join(", ")})`,
@@ -8725,7 +8928,8 @@ function rawDomainSql(dataset, files) {
8725
8928
  `from ${tableName}`,
8726
8929
  "where nullif(btrim(code), '') is not null",
8727
8930
  "order by code",
8728
- "on conflict (code) do update set description = excluded.description;"
8931
+ "on conflict (code) do update set description = excluded.description;",
8932
+ echo(`[load-domains] ${dataset} lookup data completed.`)
8729
8933
  );
8730
8934
  return lines;
8731
8935
  }
@@ -8744,70 +8948,363 @@ function rawStagingSql(dataset, files) {
8744
8948
  const expressions = layout.fields.map(
8745
8949
  (field) => ` ${fieldExpression(dataset, field, alias)} as ${field.columnName}`
8746
8950
  );
8951
+ const stepName = loadStepName(dataset);
8747
8952
  const lines = [
8748
- `\\echo 'Loading ${dataset} staging data directly from sanitized Receita files...'`,
8953
+ echo(
8954
+ `[${stepName}] Loading ${dataset} staging data directly from sanitized Receita files...`
8955
+ ),
8956
+ `truncate table ${targetTable} restart identity;`,
8749
8957
  createRawTempTableSql(dataset)
8750
8958
  ];
8751
- for (const file of files) {
8752
- lines.push(receitaCopyCommand(tableName, columns, file.absolutePath));
8959
+ for (const [index, file] of files.entries()) {
8960
+ lines.push(
8961
+ echo(
8962
+ `[${stepName}] Loading file ${index + 1} of ${files.length}: ${file.relativePath}`
8963
+ ),
8964
+ receitaCopyCommand(tableName, columns, file.absolutePath),
8965
+ echo(`[${stepName}] Loaded file ${index + 1} of ${files.length}.`)
8966
+ );
8753
8967
  }
8754
8968
  lines.push(
8969
+ echo(
8970
+ `[${stepName}] Transforming ${dataset} raw rows into ${targetTable}...`
8971
+ ),
8755
8972
  `insert into ${targetTable} (${columns.join(", ")})`,
8756
8973
  "select",
8757
8974
  expressions.join(",\n"),
8758
- `from ${tableName} ${alias};`
8975
+ `from ${tableName} ${alias};`,
8976
+ echo(`[${stepName}] ${dataset} staging load completed.`)
8759
8977
  );
8760
8978
  return lines;
8761
8979
  }
8762
- function generatePostgresDirectImportScript(input) {
8763
- const grouped = csvFilesByDataset(input.files);
8764
- const lines = [
8765
- "-- CNPJ DB Loader hybrid PostgreSQL import script",
8766
- "-- Generated from PostgreSQL-ready CSV files exported by cnpj-db-loader postgres export-csv.",
8767
- "-- Execute with psql, for example:",
8768
- '-- psql "postgres://postgres:postgres@localhost:5432/cnpj" -f import-postgres-direct.sql',
8769
- "",
8980
+ function loadStepName(dataset) {
8981
+ switch (dataset) {
8982
+ case "companies":
8983
+ return "load-companies";
8984
+ case "establishments":
8985
+ return "load-establishments";
8986
+ case "partners":
8987
+ return "load-partners";
8988
+ case "simples_options":
8989
+ return "load-simples";
8990
+ default:
8991
+ return `load-${dataset}`;
8992
+ }
8993
+ }
8994
+ function scriptHeader(title, sourceEncoding) {
8995
+ return [
8996
+ `-- ${title}`,
8997
+ "-- Generated by cnpj-db-loader postgres generate-script.",
8770
8998
  "\\set ON_ERROR_STOP on",
8771
- "\\echo 'Starting CNPJ DB Loader hybrid PostgreSQL import...'",
8772
- "",
8773
- "begin;",
8774
- "",
8775
- "-- Keep the final schema and seed data managed by sql/schema.sql.",
8776
- "-- This script only resets staging tables and then upserts final data.",
8777
- "truncate table staging_companies restart identity;",
8778
- "truncate table staging_establishments restart identity;",
8779
- "truncate table staging_partners restart identity;",
8780
- "truncate table staging_simples_options restart identity;",
8999
+ ...sourceEncoding ? [
9000
+ echo(
9001
+ `Using source file encoding ${sourceEncoding} for psql copy operations...`
9002
+ ),
9003
+ `set client_encoding to ${quoteSqlLiteral(sourceEncoding)};`
9004
+ ] : [],
8781
9005
  ""
8782
9006
  ];
8783
- for (const dataset of DOMAIN_DATASETS) {
8784
- lines.push(...copyDomainSql(dataset, grouped[dataset] ?? []), "");
9007
+ }
9008
+ function wrapTransaction(lines, mode, shouldWrap) {
9009
+ if (!shouldWrap || mode !== "phase") {
9010
+ return [...lines];
8785
9011
  }
8786
- for (const dataset of STAGING_DATASETS) {
8787
- lines.push(...copyStagingSql(dataset, grouped[dataset] ?? []), "");
9012
+ return ["begin;", "", ...lines, "", "commit;"];
9013
+ }
9014
+ function buildStepScript(title, body, input, wrapInPhaseTransaction) {
9015
+ return [
9016
+ ...scriptHeader(title, input.sourceEncoding),
9017
+ ...wrapTransaction(body, input.transactionMode, wrapInPhaseTransaction),
9018
+ ""
9019
+ ].join("\n");
9020
+ }
9021
+ function includeSet(input) {
9022
+ const selected = new Set(input.include);
9023
+ if (input.skipIndexes) {
9024
+ selected.delete("indexes");
8788
9025
  }
8789
- lines.push(...materializationAndAnalyzeSql());
8790
- return lines.join("\n");
9026
+ if (input.skipAnalyze) {
9027
+ selected.delete("analyze");
9028
+ }
9029
+ return selected;
9030
+ }
9031
+ function hasAnyFinalMaterialization(selected) {
9032
+ return selected.has("companies") || selected.has("establishments") || selected.has("partners") || selected.has("simples");
9033
+ }
9034
+ function materializeSql(selected) {
9035
+ const lines = [echo("[materialize] Starting final table materialization...")];
9036
+ if (selected.has("companies")) {
9037
+ lines.push(materializeCompaniesSql(), "");
9038
+ }
9039
+ if (selected.has("establishments")) {
9040
+ lines.push(materializeEstablishmentsSql(), "");
9041
+ }
9042
+ if (selected.has("partners")) {
9043
+ lines.push(materializePartnersSql(), "");
9044
+ }
9045
+ if (selected.has("simples")) {
9046
+ lines.push(materializeSimplesSql(), "");
9047
+ }
9048
+ lines.push(echo("[materialize] Final table materialization completed."));
9049
+ return lines;
9050
+ }
9051
+ function indexesSql() {
9052
+ return [
9053
+ echo(
9054
+ "[indexes] No additional index operations are generated in this beta."
9055
+ ),
9056
+ "-- Indexes are expected to be managed by the schema generated by cnpj-db-loader schema generate.",
9057
+ "-- A future fast-rebuild mode may generate DROP/CREATE INDEX operations here."
9058
+ ];
9059
+ }
9060
+ function analyzeSql(selected) {
9061
+ const tables = /* @__PURE__ */ new Set();
9062
+ if (selected.has("companies")) {
9063
+ tables.add("companies");
9064
+ }
9065
+ if (selected.has("establishments")) {
9066
+ tables.add("establishments");
9067
+ }
9068
+ if (selected.has("secondary-cnaes")) {
9069
+ tables.add("establishment_secondary_cnaes");
9070
+ }
9071
+ if (selected.has("partners")) {
9072
+ tables.add("partners");
9073
+ }
9074
+ if (selected.has("simples")) {
9075
+ tables.add("simples_options");
9076
+ }
9077
+ if (selected.has("domains")) {
9078
+ for (const dataset of DOMAIN_DATASETS) {
9079
+ tables.add(dataset);
9080
+ }
9081
+ }
9082
+ return [
9083
+ echo("[analyze] Refreshing planner statistics..."),
9084
+ ...[...tables].map((table) => `analyze ${table};`),
9085
+ echo("[analyze] Planner statistics refreshed.")
9086
+ ];
8791
9087
  }
8792
- function generatePostgresSanitizedDirectImportScript(input) {
9088
+ function step(name, file, dependsOn, included) {
9089
+ return { name, file, dependsOn, included };
9090
+ }
9091
+ function generatePostgresDirectScriptFiles(input) {
8793
9092
  const grouped = directFilesByDataset(input.files);
8794
- const lines = [
8795
- "-- CNPJ DB Loader direct PostgreSQL import script",
9093
+ const selected = includeSet(input);
9094
+ if (!DOMAIN_DATASETS.some((dataset) => (grouped[dataset] ?? []).length > 0)) {
9095
+ selected.delete("domains");
9096
+ }
9097
+ if ((grouped.companies ?? []).length === 0) {
9098
+ selected.delete("companies");
9099
+ }
9100
+ if ((grouped.establishments ?? []).length === 0) {
9101
+ selected.delete("establishments");
9102
+ selected.delete("secondary-cnaes");
9103
+ }
9104
+ if ((grouped.partners ?? []).length === 0) {
9105
+ selected.delete("partners");
9106
+ }
9107
+ if ((grouped.simples_options ?? []).length === 0) {
9108
+ selected.delete("simples");
9109
+ }
9110
+ const scripts = {};
9111
+ const steps = [];
9112
+ const setupIncluded = true;
9113
+ steps.push(step("setup", "setup.sql", [], setupIncluded));
9114
+ scripts["setup.sql"] = [
9115
+ ...scriptHeader(
9116
+ "CNPJ DB Loader PostgreSQL direct import setup",
9117
+ input.sourceEncoding
9118
+ ),
9119
+ echo("[setup] Preparing PostgreSQL direct import session..."),
9120
+ "-- The database schema must be applied before running these scripts.",
9121
+ "-- This setup script configures the psql session used by the generated orchestrator.",
9122
+ echo("[setup] Setup completed."),
9123
+ ""
9124
+ ].join("\n");
9125
+ const domainsIncluded = selected.has("domains") && DOMAIN_DATASETS.some((dataset) => (grouped[dataset] ?? []).length > 0);
9126
+ steps.push(
9127
+ step("load-domains", "load-domains.sql", ["setup"], domainsIncluded)
9128
+ );
9129
+ if (domainsIncluded) {
9130
+ const lines = [echo("[load-domains] Starting domain tables load...")];
9131
+ for (const dataset of DOMAIN_DATASETS) {
9132
+ lines.push(...rawDomainSql(dataset, grouped[dataset] ?? []), "");
9133
+ }
9134
+ lines.push(echo("[load-domains] Domain tables load completed."));
9135
+ scripts["load-domains.sql"] = buildStepScript(
9136
+ "CNPJ DB Loader PostgreSQL direct import domains step",
9137
+ lines,
9138
+ input,
9139
+ true
9140
+ );
9141
+ }
9142
+ const datasetSteps = [
9143
+ {
9144
+ dataset: "companies",
9145
+ name: "load-companies",
9146
+ file: "load-companies.sql",
9147
+ include: "companies"
9148
+ },
9149
+ {
9150
+ dataset: "establishments",
9151
+ name: "load-establishments",
9152
+ file: "load-establishments.sql",
9153
+ include: "establishments"
9154
+ },
9155
+ {
9156
+ dataset: "partners",
9157
+ name: "load-partners",
9158
+ file: "load-partners.sql",
9159
+ include: "partners"
9160
+ },
9161
+ {
9162
+ dataset: "simples_options",
9163
+ name: "load-simples",
9164
+ file: "load-simples.sql",
9165
+ include: "simples"
9166
+ }
9167
+ ];
9168
+ for (const item of datasetSteps) {
9169
+ const files = grouped[item.dataset] ?? [];
9170
+ const included = selected.has(item.include) && files.length > 0;
9171
+ steps.push(step(item.name, item.file, ["setup"], included));
9172
+ if (included) {
9173
+ scripts[item.file] = buildStepScript(
9174
+ `CNPJ DB Loader PostgreSQL direct import ${item.name} step`,
9175
+ rawStagingSql(item.dataset, files),
9176
+ input,
9177
+ true
9178
+ );
9179
+ }
9180
+ }
9181
+ const materializeIncluded = hasAnyFinalMaterialization(selected);
9182
+ steps.push(
9183
+ step(
9184
+ "materialize",
9185
+ "materialize.sql",
9186
+ datasetSteps.filter((item) => selected.has(item.include)).map((item) => item.name),
9187
+ materializeIncluded
9188
+ )
9189
+ );
9190
+ if (materializeIncluded) {
9191
+ scripts["materialize.sql"] = buildStepScript(
9192
+ "CNPJ DB Loader PostgreSQL direct import materialization step",
9193
+ materializeSql(selected),
9194
+ input,
9195
+ true
9196
+ );
9197
+ }
9198
+ const secondaryIncluded = selected.has("secondary-cnaes") && selected.has("establishments");
9199
+ steps.push(
9200
+ step(
9201
+ "materialize-secondary-cnaes",
9202
+ "materialize-secondary-cnaes.sql",
9203
+ ["load-establishments"],
9204
+ secondaryIncluded
9205
+ )
9206
+ );
9207
+ if (secondaryIncluded) {
9208
+ scripts["materialize-secondary-cnaes.sql"] = buildStepScript(
9209
+ "CNPJ DB Loader PostgreSQL direct import secondary CNAEs step",
9210
+ [materializeSecondaryCnaesSql()],
9211
+ input,
9212
+ true
9213
+ );
9214
+ }
9215
+ const indexesIncluded = selected.has("indexes");
9216
+ steps.push(
9217
+ step(
9218
+ "indexes",
9219
+ "indexes.sql",
9220
+ materializeIncluded ? ["materialize"] : ["setup"],
9221
+ indexesIncluded
9222
+ )
9223
+ );
9224
+ if (indexesIncluded) {
9225
+ scripts["indexes.sql"] = buildStepScript(
9226
+ "CNPJ DB Loader PostgreSQL direct import indexes step",
9227
+ indexesSql(),
9228
+ input,
9229
+ true
9230
+ );
9231
+ }
9232
+ const analyzeIncluded = selected.has("analyze");
9233
+ const analyzeDependencies = [
9234
+ ...domainsIncluded ? ["load-domains"] : [],
9235
+ ...materializeIncluded ? ["materialize"] : [],
9236
+ ...secondaryIncluded ? ["materialize-secondary-cnaes"] : []
9237
+ ];
9238
+ steps.push(
9239
+ step(
9240
+ "analyze",
9241
+ "analyze.sql",
9242
+ analyzeDependencies.length > 0 ? analyzeDependencies : ["setup"],
9243
+ analyzeIncluded
9244
+ )
9245
+ );
9246
+ if (analyzeIncluded) {
9247
+ scripts["analyze.sql"] = buildStepScript(
9248
+ "CNPJ DB Loader PostgreSQL direct import analyze step",
9249
+ analyzeSql(selected),
9250
+ input,
9251
+ true
9252
+ );
9253
+ }
9254
+ const orchestratorLines = [
9255
+ "-- CNPJ DB Loader direct PostgreSQL import orchestrator",
8796
9256
  "-- Generated from sanitized Receita files by cnpj-db-loader postgres generate-script.",
8797
- "-- This path avoids rewriting the dataset into a second CSV tree.",
8798
9257
  "-- Execute with psql, for example:",
8799
- '-- psql "postgres://postgres:postgres@localhost:5432/cnpj" -f import-postgres-direct.sql',
9258
+ '-- psql -d "postgres://postgres:postgres@localhost:5432/cnpj" -f import-postgres-direct.sql',
8800
9259
  "",
8801
9260
  "\\set ON_ERROR_STOP on",
8802
- `\\echo 'Using source file encoding ${input.sourceEncoding} for psql copy operations...'`,
9261
+ echo(
9262
+ `Using source file encoding ${input.sourceEncoding} for psql copy operations...`
9263
+ ),
8803
9264
  `set client_encoding to ${quoteSqlLiteral(input.sourceEncoding)};`,
8804
- "\\echo 'Starting CNPJ DB Loader direct PostgreSQL import from sanitized files...'",
9265
+ echo(
9266
+ `Starting CNPJ DB Loader direct PostgreSQL import using transaction mode ${input.transactionMode}...`
9267
+ ),
9268
+ "",
9269
+ ...input.transactionMode === "single" ? ["begin;", ""] : []
9270
+ ];
9271
+ for (const name of STEP_ORDER) {
9272
+ const currentStep = steps.find((item) => item.name === name);
9273
+ if (!currentStep?.included) {
9274
+ continue;
9275
+ }
9276
+ orchestratorLines.push(
9277
+ echo(
9278
+ `[orchestrator] Running ${currentStep.name} (${currentStep.file})...`
9279
+ ),
9280
+ `\\ir ${currentStep.file}`,
9281
+ echo(`[orchestrator] Completed ${currentStep.name}.`),
9282
+ ""
9283
+ );
9284
+ }
9285
+ orchestratorLines.push(
9286
+ ...input.transactionMode === "single" ? ["commit;", ""] : [],
9287
+ echo("CNPJ DB Loader hybrid PostgreSQL import completed."),
9288
+ ""
9289
+ );
9290
+ scripts["import-postgres-direct.sql"] = orchestratorLines.join("\n");
9291
+ return { scripts, steps };
9292
+ }
9293
+ function generatePostgresDirectImportScript(input) {
9294
+ const grouped = csvFilesByDataset(input.files);
9295
+ const lines = [
9296
+ "-- CNPJ DB Loader hybrid PostgreSQL import script",
9297
+ "-- Generated from PostgreSQL-ready CSV files exported by cnpj-db-loader postgres export-csv.",
9298
+ "-- Execute with psql, for example:",
9299
+ '-- psql -d "postgres://postgres:postgres@localhost:5432/cnpj" -f import-postgres-direct.sql',
9300
+ "",
9301
+ "\\set ON_ERROR_STOP on",
9302
+ echo("Starting CNPJ DB Loader hybrid PostgreSQL import..."),
8805
9303
  "",
8806
9304
  "begin;",
8807
9305
  "",
8808
9306
  "-- Keep the final schema and seed data managed by sql/schema.sql.",
8809
- "-- This script copies sanitized Receita files into temporary raw tables,",
8810
- "-- transforms values inside PostgreSQL, resets staging tables and upserts final data.",
9307
+ "-- This script only resets staging tables and then upserts final data.",
8811
9308
  "truncate table staging_companies restart identity;",
8812
9309
  "truncate table staging_establishments restart identity;",
8813
9310
  "truncate table staging_partners restart identity;",
@@ -8815,10 +9312,10 @@ function generatePostgresSanitizedDirectImportScript(input) {
8815
9312
  ""
8816
9313
  ];
8817
9314
  for (const dataset of DOMAIN_DATASETS) {
8818
- lines.push(...rawDomainSql(dataset, grouped[dataset] ?? []), "");
9315
+ lines.push(...copyDomainSql(dataset, grouped[dataset] ?? []), "");
8819
9316
  }
8820
9317
  for (const dataset of STAGING_DATASETS) {
8821
- lines.push(...rawStagingSql(dataset, grouped[dataset] ?? []), "");
9318
+ lines.push(...copyStagingSql(dataset, grouped[dataset] ?? []), "");
8822
9319
  }
8823
9320
  lines.push(...materializationAndAnalyzeSql());
8824
9321
  return lines.join("\n");
@@ -8829,11 +9326,13 @@ function materializationAndAnalyzeSql() {
8829
9326
  "",
8830
9327
  materializeEstablishmentsSql(),
8831
9328
  "",
9329
+ materializeSecondaryCnaesSql(),
9330
+ "",
8832
9331
  materializePartnersSql(),
8833
9332
  "",
8834
9333
  materializeSimplesSql(),
8835
9334
  "",
8836
- "\\echo 'Refreshing planner statistics...'",
9335
+ echo("Refreshing planner statistics..."),
8837
9336
  "analyze companies;",
8838
9337
  "analyze establishments;",
8839
9338
  "analyze establishment_secondary_cnaes;",
@@ -8848,7 +9347,7 @@ function materializationAndAnalyzeSql() {
8848
9347
  "",
8849
9348
  "commit;",
8850
9349
  "",
8851
- "\\echo 'CNPJ DB Loader hybrid PostgreSQL import completed.'",
9350
+ echo("CNPJ DB Loader hybrid PostgreSQL import completed."),
8852
9351
  ""
8853
9352
  ];
8854
9353
  }
@@ -9055,6 +9554,29 @@ async function exportPostgresCsvDataset(inputPath, options = {}) {
9055
9554
  import { mkdir as mkdir9, stat as stat7, writeFile as writeFile6 } from "fs/promises";
9056
9555
  import path17 from "path";
9057
9556
  var DEFAULT_SOURCE_ENCODING = "UTF8";
9557
+ var DEFAULT_TRANSACTION_MODE = "single";
9558
+ var ALL_INCLUDE_TARGETS = [
9559
+ "domains",
9560
+ "companies",
9561
+ "establishments",
9562
+ "partners",
9563
+ "simples",
9564
+ "secondary-cnaes",
9565
+ "indexes",
9566
+ "analyze"
9567
+ ];
9568
+ var INCLUDE_TARGETS_BY_DATASET = {
9569
+ companies: "companies",
9570
+ establishments: "establishments",
9571
+ partners: "partners",
9572
+ simples_options: "simples",
9573
+ countries: "domains",
9574
+ cities: "domains",
9575
+ partner_qualifications: "domains",
9576
+ legal_natures: "domains",
9577
+ reasons: "domains",
9578
+ cnaes: "domains"
9579
+ };
9058
9580
  function defaultPostgresDirectOutputPath(inputPath) {
9059
9581
  const baseName = path17.basename(inputPath);
9060
9582
  if (baseName.toLowerCase() === "sanitized") {
@@ -9063,7 +9585,7 @@ function defaultPostgresDirectOutputPath(inputPath) {
9063
9585
  return path17.join(path17.dirname(inputPath), `${baseName}-postgres-direct`);
9064
9586
  }
9065
9587
  function inferNextStep5(scriptPath) {
9066
- return `psql "postgres://postgres:postgres@localhost:5432/cnpj" -f ${scriptPath.replace(/\\/g, "/")}`;
9588
+ return `psql -d "postgres://postgres:postgres@localhost:5432/cnpj" -f ${scriptPath.replace(/\\/g, "/")}`;
9067
9589
  }
9068
9590
  function normalizeSourceEncoding(value) {
9069
9591
  const encoding = (value ?? DEFAULT_SOURCE_ENCODING).trim();
@@ -9074,6 +9596,41 @@ function normalizeSourceEncoding(value) {
9074
9596
  }
9075
9597
  return encoding.toUpperCase();
9076
9598
  }
9599
+ function normalizeTransactionMode(value) {
9600
+ const mode = value ?? DEFAULT_TRANSACTION_MODE;
9601
+ if (!["single", "phase", "none"].includes(mode)) {
9602
+ throw new ValidationError(
9603
+ `Invalid transaction mode: ${String(value)}. Use single, phase or none.`
9604
+ );
9605
+ }
9606
+ return mode;
9607
+ }
9608
+ function isIncludeTarget(value) {
9609
+ return ALL_INCLUDE_TARGETS.includes(value);
9610
+ }
9611
+ function normalizeIncludeTargets(include, dataset) {
9612
+ if (include && include.length > 0) {
9613
+ const unique = [...new Set(include)];
9614
+ const invalid = unique.filter((item) => !isIncludeTarget(item));
9615
+ if (invalid.length > 0) {
9616
+ throw new ValidationError(
9617
+ `Invalid include target(s): ${invalid.join(", ")}. Use ${ALL_INCLUDE_TARGETS.join(", ")}.`
9618
+ );
9619
+ }
9620
+ return unique;
9621
+ }
9622
+ if (dataset) {
9623
+ const target = INCLUDE_TARGETS_BY_DATASET[dataset];
9624
+ if (!target) {
9625
+ return [];
9626
+ }
9627
+ if (target === "establishments") {
9628
+ return ["establishments", "secondary-cnaes", "analyze"];
9629
+ }
9630
+ return [target, "analyze"];
9631
+ }
9632
+ return [...ALL_INCLUDE_TARGETS];
9633
+ }
9077
9634
  async function generatePostgresDirectScript(inputPath, options = {}) {
9078
9635
  if (options.dataset && !isImportDatasetType(options.dataset)) {
9079
9636
  throw new ValidationError(`Unsupported dataset type: ${options.dataset}.`);
@@ -9089,6 +9646,10 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
9089
9646
  options.outputPath ?? defaultPostgresDirectOutputPath(validatedPath)
9090
9647
  );
9091
9648
  const sourceEncoding = normalizeSourceEncoding(options.sourceEncoding);
9649
+ const transactionMode = normalizeTransactionMode(options.transactionMode);
9650
+ const include = normalizeIncludeTargets(options.include, options.dataset);
9651
+ const skipIndexes = options.skipIndexes ?? false;
9652
+ const skipAnalyze = options.skipAnalyze ?? false;
9092
9653
  const inspected = await inspectFiles(validatedPath);
9093
9654
  const recognizedFiles = inspected.entries.filter((entry) => entry.entryKind === "file").flatMap((entry) => {
9094
9655
  if (!isImportDatasetType(entry.inferredType)) {
@@ -9116,7 +9677,11 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
9116
9677
  outputPath,
9117
9678
  totalFiles: recognizedFiles.length,
9118
9679
  datasets,
9119
- sourceEncoding
9680
+ sourceEncoding,
9681
+ transactionMode,
9682
+ include,
9683
+ skipIndexes,
9684
+ skipAnalyze
9120
9685
  });
9121
9686
  await mkdir9(outputPath, { recursive: true });
9122
9687
  const sourceFiles = [];
@@ -9152,11 +9717,21 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
9152
9717
  }
9153
9718
  const scriptName = options.scriptName ?? "import-postgres-direct.sql";
9154
9719
  const scriptPath = path17.join(outputPath, scriptName);
9155
- const script = generatePostgresSanitizedDirectImportScript({
9720
+ const generated = generatePostgresDirectScriptFiles({
9156
9721
  files: sourceFiles,
9157
- sourceEncoding
9722
+ sourceEncoding,
9723
+ transactionMode,
9724
+ include,
9725
+ skipIndexes,
9726
+ skipAnalyze
9158
9727
  });
9159
- await writeFile6(scriptPath, script, "utf8");
9728
+ const scriptFiles = [];
9729
+ for (const [fileName, script] of Object.entries(generated.scripts)) {
9730
+ const outputFileName = fileName === "import-postgres-direct.sql" ? scriptName : fileName;
9731
+ const outputFilePath = path17.join(outputPath, outputFileName);
9732
+ await writeFile6(outputFilePath, script, "utf8");
9733
+ scriptFiles.push(outputFilePath);
9734
+ }
9160
9735
  const manifestPath = path17.join(outputPath, "manifest.json");
9161
9736
  const summaryDatasets = [...summariesByDataset.values()].sort(
9162
9737
  (left, right) => IMPORT_ORDER.indexOf(left.dataset) - IMPORT_ORDER.indexOf(right.dataset)
@@ -9168,13 +9743,19 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
9168
9743
  const manifest = {
9169
9744
  generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
9170
9745
  mode: "direct-sanitized-script",
9746
+ transactionMode,
9747
+ include,
9748
+ skipIndexes,
9749
+ skipAnalyze,
9171
9750
  inputPath: path17.resolve(inputPath),
9172
9751
  validatedPath,
9173
9752
  outputPath,
9174
9753
  scriptPath,
9754
+ scriptFiles,
9175
9755
  sourceEncoding,
9176
9756
  totalFiles: sourceFiles.length,
9177
9757
  totalBytes,
9758
+ steps: generated.steps,
9178
9759
  datasets: summaryDatasets
9179
9760
  };
9180
9761
  await writeFile6(
@@ -9197,15 +9778,19 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
9197
9778
  scriptPath,
9198
9779
  manifestPath,
9199
9780
  sourceEncoding,
9781
+ transactionMode,
9200
9782
  totalFiles: sourceFiles.length,
9201
9783
  totalBytes,
9202
9784
  datasets: summaryDatasets,
9785
+ scriptFiles,
9786
+ steps: generated.steps,
9203
9787
  warnings: [
9204
9788
  ...validation.ok ? [] : validation.errors,
9205
9789
  "This script imports sanitized Receita files directly with psql \\copy. It avoids rewriting the full dataset into a second CSV tree.",
9206
- "The generated script expects the database schema generated by cnpj-db-loader to be applied before execution.",
9790
+ "The generated scripts expect the database schema generated by cnpj-db-loader to be applied before execution.",
9207
9791
  "The direct PostgreSQL script now defaults to UTF8 because the sanitize command writes clean UTF-8 files.",
9208
- "Use --source-encoding WIN1252 or LATIN1 only when generating scripts for legacy sanitized files produced by older loader versions."
9792
+ "Use --source-encoding WIN1252 or LATIN1 only when generating scripts for legacy sanitized files produced by older loader versions.",
9793
+ "The generated import is now modular. Use import-postgres-direct.sql as the orchestrator or run individual phase scripts manually."
9209
9794
  ],
9210
9795
  nextStep: inferNextStep5(scriptPath)
9211
9796
  };
@@ -9213,7 +9798,6 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
9213
9798
  export {
9214
9799
  AppError,
9215
9800
  DEFAULT_FEDERAL_REVENUE_DOWNLOAD_ROOT,
9216
- DEFAULT_FEDERAL_REVENUE_SHARE_TOKEN,
9217
9801
  DEFAULT_FEDERAL_REVENUE_USER_AGENT,
9218
9802
  DEFAULT_FEDERAL_REVENUE_WEBDAV_URL,
9219
9803
  FEDERAL_REVENUE_CONTROL_DIR,
@@ -9247,6 +9831,7 @@ export {
9247
9831
  getAllLayouts,
9248
9832
  getCurrentFederalRevenueReference,
9249
9833
  getFederalRevenueControlDirectory,
9834
+ getFederalRevenueEffectiveConfig,
9250
9835
  getFederalRevenueManifestPath,
9251
9836
  getFederalRevenueStatus,
9252
9837
  getFederalRevenueSyncLockPath,
@@ -9263,10 +9848,13 @@ export {
9263
9848
  materializeImportedData,
9264
9849
  prettyJson,
9265
9850
  readDatabaseConfig,
9851
+ readFederalRevenueEffectiveConfig,
9266
9852
  readFederalRevenueManifest,
9267
9853
  resetDefaultDbUrl,
9854
+ resetFederalRevenueConfig,
9268
9855
  resolveDatabaseUrl,
9269
9856
  resolveDbUrl,
9857
+ resolveFederalRevenueClientOptions,
9270
9858
  resolveFederalRevenueReference,
9271
9859
  resolveInputMode,
9272
9860
  resolveSchemaProfile,
@@ -9276,6 +9864,7 @@ export {
9276
9864
  safeWriteText,
9277
9865
  sanitizeInputDirectory,
9278
9866
  setDefaultDbUrl,
9867
+ setFederalRevenueConfigValue,
9279
9868
  showQuarantineRow,
9280
9869
  syncFederalRevenueDataset,
9281
9870
  testDatabaseConnection,