@danielarndt0/cnpj-db-loader 2.4.0-beta.2 → 2.4.0-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -6
- package/dist/cli.js +1037 -296
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +337 -297
- package/dist/index.js +879 -290
- package/dist/index.js.map +1 -1
- package/docs/commands.md +11 -1
- package/docs/federal-revenue.md +36 -2
- package/docs/postgres-direct.md +235 -41
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -100,6 +100,222 @@ function getConfigFilePath() {
|
|
|
100
100
|
return path2.join(os2.homedir(), ".config", "cnpj-db-loader", "config.json");
|
|
101
101
|
}
|
|
102
102
|
|
|
103
|
+
// src/services/federal-revenue/client.ts
|
|
104
|
+
var DEFAULT_FEDERAL_REVENUE_WEBDAV_URL = "https://arquivos.receitafederal.gov.br/public.php/webdav";
|
|
105
|
+
var DEFAULT_FEDERAL_REVENUE_USER_AGENT = "cnpj-db-loader federal-revenue-client";
|
|
106
|
+
var REFERENCE_PATTERN = /^\d{4}-\d{2}$/;
|
|
107
|
+
function trimTrailingSlash(value) {
|
|
108
|
+
return value.replace(/\/+$/g, "");
|
|
109
|
+
}
|
|
110
|
+
function normalizeBaseUrl(value) {
|
|
111
|
+
return trimTrailingSlash(value ?? DEFAULT_FEDERAL_REVENUE_WEBDAV_URL);
|
|
112
|
+
}
|
|
113
|
+
function getShareToken(value) {
|
|
114
|
+
const shareToken = value?.trim();
|
|
115
|
+
if (!shareToken) {
|
|
116
|
+
throw new ValidationError(
|
|
117
|
+
"Federal Revenue public share token is not configured. Run `cnpj-db-loader federal-revenue config set share-token <token>` or pass --share-token."
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
return shareToken;
|
|
121
|
+
}
|
|
122
|
+
function encodePathSegment(value) {
|
|
123
|
+
return encodeURIComponent(value).replace(/%2F/gi, "/");
|
|
124
|
+
}
|
|
125
|
+
function decodeXml(value) {
|
|
126
|
+
return value.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, '"').replace(/'/g, "'");
|
|
127
|
+
}
|
|
128
|
+
function decodeHrefSegment(value) {
|
|
129
|
+
try {
|
|
130
|
+
return decodeURIComponent(value);
|
|
131
|
+
} catch {
|
|
132
|
+
return value;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
function getAuthHeader(shareToken) {
|
|
136
|
+
return `Basic ${Buffer.from(`${shareToken}:`).toString("base64")}`;
|
|
137
|
+
}
|
|
138
|
+
function buildUrl(baseUrl, segments = []) {
|
|
139
|
+
if (segments.length === 0) {
|
|
140
|
+
return `${baseUrl}/`;
|
|
141
|
+
}
|
|
142
|
+
return `${baseUrl}/${segments.map(encodePathSegment).join("/")}`;
|
|
143
|
+
}
|
|
144
|
+
function extractFirst(block, tagName) {
|
|
145
|
+
const pattern = new RegExp(
|
|
146
|
+
`<(?:[a-zA-Z0-9_-]+:)?${tagName}\\b[^>]*>([\\s\\S]*?)<\\/(?:[a-zA-Z0-9_-]+:)?${tagName}>`,
|
|
147
|
+
"i"
|
|
148
|
+
);
|
|
149
|
+
const match = block.match(pattern);
|
|
150
|
+
return match?.[1] ? decodeXml(match[1].trim()) : void 0;
|
|
151
|
+
}
|
|
152
|
+
function isCollectionResponse(block) {
|
|
153
|
+
return /<(?:[a-zA-Z0-9_-]+:)?collection\b/i.test(block);
|
|
154
|
+
}
|
|
155
|
+
function getNameFromHref(href) {
|
|
156
|
+
const cleanHref = href.split("?")[0] ?? href;
|
|
157
|
+
const withoutTrailingSlash = cleanHref.replace(/\/+$/g, "");
|
|
158
|
+
const rawName = withoutTrailingSlash.split("/").pop() ?? withoutTrailingSlash;
|
|
159
|
+
return decodeHrefSegment(rawName);
|
|
160
|
+
}
|
|
161
|
+
function parsePropfindXml(xml) {
|
|
162
|
+
const responseBlocks = xml.match(
|
|
163
|
+
/<(?:[a-zA-Z0-9_-]+:)?response\b[\s\S]*?<\/(?:[a-zA-Z0-9_-]+:)?response>/gi
|
|
164
|
+
);
|
|
165
|
+
if (!responseBlocks) {
|
|
166
|
+
return [];
|
|
167
|
+
}
|
|
168
|
+
return responseBlocks.map((block) => {
|
|
169
|
+
const href = extractFirst(block, "href");
|
|
170
|
+
if (!href) {
|
|
171
|
+
return void 0;
|
|
172
|
+
}
|
|
173
|
+
const size = extractFirst(block, "getcontentlength");
|
|
174
|
+
const parsedSize = size ? Number.parseInt(size, 10) : void 0;
|
|
175
|
+
const lastModified = extractFirst(block, "getlastmodified");
|
|
176
|
+
const etag = extractFirst(block, "getetag");
|
|
177
|
+
return {
|
|
178
|
+
href,
|
|
179
|
+
name: getNameFromHref(href),
|
|
180
|
+
isCollection: isCollectionResponse(block),
|
|
181
|
+
...Number.isFinite(parsedSize) ? { sizeInBytes: parsedSize } : {},
|
|
182
|
+
...lastModified ? { lastModified } : {},
|
|
183
|
+
...etag ? { etag } : {}
|
|
184
|
+
};
|
|
185
|
+
}).filter((entry) => entry !== void 0);
|
|
186
|
+
}
|
|
187
|
+
async function propfind(pathSegments, options = {}) {
|
|
188
|
+
const baseUrl = normalizeBaseUrl(options.baseUrl);
|
|
189
|
+
const shareToken = getShareToken(options.shareToken);
|
|
190
|
+
let response;
|
|
191
|
+
try {
|
|
192
|
+
response = await fetch(buildUrl(baseUrl, pathSegments), {
|
|
193
|
+
method: "PROPFIND",
|
|
194
|
+
headers: {
|
|
195
|
+
Accept: "application/xml,text/xml,*/*",
|
|
196
|
+
Authorization: getAuthHeader(shareToken),
|
|
197
|
+
Depth: "1",
|
|
198
|
+
"User-Agent": options.userAgent ?? DEFAULT_FEDERAL_REVENUE_USER_AGENT
|
|
199
|
+
}
|
|
200
|
+
});
|
|
201
|
+
} catch (error) {
|
|
202
|
+
throw new ValidationError(
|
|
203
|
+
`Federal Revenue WebDAV request failed before receiving a response: ${error instanceof Error ? error.message : String(error)}.`,
|
|
204
|
+
{ baseUrl, pathSegments }
|
|
205
|
+
);
|
|
206
|
+
}
|
|
207
|
+
if (!response.ok) {
|
|
208
|
+
throw new ValidationError(
|
|
209
|
+
`Federal Revenue WebDAV request failed with status ${response.status} ${response.statusText}.`,
|
|
210
|
+
{ status: response.status, statusText: response.statusText }
|
|
211
|
+
);
|
|
212
|
+
}
|
|
213
|
+
const xml = await response.text();
|
|
214
|
+
return {
|
|
215
|
+
entries: parsePropfindXml(xml),
|
|
216
|
+
baseUrl,
|
|
217
|
+
shareToken
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
function validateFederalRevenueReference(reference) {
|
|
221
|
+
if (!REFERENCE_PATTERN.test(reference)) {
|
|
222
|
+
throw new ValidationError(
|
|
223
|
+
`Federal Revenue reference is invalid: ${reference}. Expected YYYY-MM.`
|
|
224
|
+
);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
function getCurrentFederalRevenueReference(date = /* @__PURE__ */ new Date()) {
|
|
228
|
+
const year = date.getFullYear();
|
|
229
|
+
const month = String(date.getMonth() + 1).padStart(2, "0");
|
|
230
|
+
return `${year}-${month}`;
|
|
231
|
+
}
|
|
232
|
+
async function listFederalRevenueReferences(options = {}) {
|
|
233
|
+
const result = await propfind([], options);
|
|
234
|
+
const references = result.entries.filter((entry) => entry.isCollection && REFERENCE_PATTERN.test(entry.name)).map((entry) => ({
|
|
235
|
+
reference: entry.name,
|
|
236
|
+
href: entry.href
|
|
237
|
+
})).sort((left, right) => left.reference.localeCompare(right.reference));
|
|
238
|
+
return {
|
|
239
|
+
references,
|
|
240
|
+
remoteBaseUrl: result.baseUrl
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
async function resolveFederalRevenueReference(input2 = {}) {
|
|
244
|
+
const { references } = await listFederalRevenueReferences(input2);
|
|
245
|
+
const availableReferences = references.map((item) => item.reference);
|
|
246
|
+
const latest = availableReferences.at(-1);
|
|
247
|
+
if (!latest) {
|
|
248
|
+
throw new ValidationError(
|
|
249
|
+
"Federal Revenue reference discovery failed: no monthly references were found in the public share."
|
|
250
|
+
);
|
|
251
|
+
}
|
|
252
|
+
if (input2.reference) {
|
|
253
|
+
validateFederalRevenueReference(input2.reference);
|
|
254
|
+
if (!availableReferences.includes(input2.reference)) {
|
|
255
|
+
throw new ValidationError(
|
|
256
|
+
`Federal Revenue reference not found: ${input2.reference}. Latest available reference is ${latest}.`,
|
|
257
|
+
{
|
|
258
|
+
requestedReference: input2.reference,
|
|
259
|
+
latestAvailableReference: latest,
|
|
260
|
+
availableReferences
|
|
261
|
+
}
|
|
262
|
+
);
|
|
263
|
+
}
|
|
264
|
+
return {
|
|
265
|
+
mode: "explicit",
|
|
266
|
+
selectedReference: input2.reference,
|
|
267
|
+
availableReferences
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
if (input2.current) {
|
|
271
|
+
const currentReference = getCurrentFederalRevenueReference();
|
|
272
|
+
if (!availableReferences.includes(currentReference)) {
|
|
273
|
+
throw new ValidationError(
|
|
274
|
+
`Federal Revenue current reference is not available yet: ${currentReference}. Latest available reference is ${latest}.`,
|
|
275
|
+
{
|
|
276
|
+
requestedReference: currentReference,
|
|
277
|
+
latestAvailableReference: latest,
|
|
278
|
+
availableReferences
|
|
279
|
+
}
|
|
280
|
+
);
|
|
281
|
+
}
|
|
282
|
+
return {
|
|
283
|
+
mode: "current",
|
|
284
|
+
selectedReference: currentReference,
|
|
285
|
+
availableReferences
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
return {
|
|
289
|
+
mode: "latest",
|
|
290
|
+
selectedReference: latest,
|
|
291
|
+
availableReferences
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
async function listFederalRevenueFiles(reference, options = {}) {
|
|
295
|
+
validateFederalRevenueReference(reference);
|
|
296
|
+
const result = await propfind([reference], options);
|
|
297
|
+
const files = result.entries.filter(
|
|
298
|
+
(entry) => !entry.isCollection && entry.name.toLowerCase().endsWith(".zip")
|
|
299
|
+
).map((entry) => ({
|
|
300
|
+
name: entry.name,
|
|
301
|
+
href: entry.href,
|
|
302
|
+
downloadUrl: buildUrl(result.baseUrl, [reference, entry.name]),
|
|
303
|
+
...entry.sizeInBytes !== void 0 ? { sizeInBytes: entry.sizeInBytes } : {},
|
|
304
|
+
...entry.lastModified ? { lastModified: entry.lastModified } : {},
|
|
305
|
+
...entry.etag ? { etag: entry.etag } : {}
|
|
306
|
+
})).sort((left, right) => left.name.localeCompare(right.name));
|
|
307
|
+
return {
|
|
308
|
+
files,
|
|
309
|
+
remoteBaseUrl: result.baseUrl
|
|
310
|
+
};
|
|
311
|
+
}
|
|
312
|
+
function buildFederalRevenueDownloadHeaders(options = {}) {
|
|
313
|
+
return {
|
|
314
|
+
Authorization: getAuthHeader(getShareToken(options.shareToken)),
|
|
315
|
+
"User-Agent": options.userAgent ?? DEFAULT_FEDERAL_REVENUE_USER_AGENT
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
|
|
103
319
|
// src/services/config.service.ts
|
|
104
320
|
async function readDatabaseConfig() {
|
|
105
321
|
const raw = await safeReadText(getConfigFilePath());
|
|
@@ -127,12 +343,149 @@ function assertPostgresUrl(url) {
|
|
|
127
343
|
);
|
|
128
344
|
}
|
|
129
345
|
}
|
|
346
|
+
function assertHttpUrl(url, label) {
|
|
347
|
+
let parsed;
|
|
348
|
+
try {
|
|
349
|
+
parsed = new URL(url);
|
|
350
|
+
} catch {
|
|
351
|
+
throw new ValidationError(`${label} is not a valid URL.`, { url });
|
|
352
|
+
}
|
|
353
|
+
if (!["http:", "https:"].includes(parsed.protocol)) {
|
|
354
|
+
throw new ValidationError(`${label} must use the http or https protocol.`, {
|
|
355
|
+
url
|
|
356
|
+
});
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
function assertNonEmpty(value, label) {
|
|
360
|
+
const trimmed = value.trim();
|
|
361
|
+
if (!trimmed) {
|
|
362
|
+
throw new ValidationError(`${label} cannot be empty.`);
|
|
363
|
+
}
|
|
364
|
+
return trimmed;
|
|
365
|
+
}
|
|
366
|
+
function normalizeFederalRevenueConfigKey(key) {
|
|
367
|
+
const normalized = key.trim().toLowerCase();
|
|
368
|
+
if (["share-token", "share_token", "token"].includes(normalized)) {
|
|
369
|
+
return "share-token";
|
|
370
|
+
}
|
|
371
|
+
if (["webdav-url", "webdav_url", "base-url", "base_url", "url"].includes(
|
|
372
|
+
normalized
|
|
373
|
+
)) {
|
|
374
|
+
return "webdav-url";
|
|
375
|
+
}
|
|
376
|
+
if (["user-agent", "user_agent"].includes(normalized)) {
|
|
377
|
+
return "user-agent";
|
|
378
|
+
}
|
|
379
|
+
throw new ValidationError(
|
|
380
|
+
`Unknown Federal Revenue config key: ${key}. Expected share-token, webdav-url, or user-agent.`
|
|
381
|
+
);
|
|
382
|
+
}
|
|
383
|
+
function assignFederalRevenueConfigValue(config, key, value) {
|
|
384
|
+
if (key === "share-token") {
|
|
385
|
+
return {
|
|
386
|
+
...config,
|
|
387
|
+
shareToken: assertNonEmpty(value, "Federal Revenue share token")
|
|
388
|
+
};
|
|
389
|
+
}
|
|
390
|
+
if (key === "webdav-url") {
|
|
391
|
+
const webdavUrl = assertNonEmpty(value, "Federal Revenue WebDAV URL");
|
|
392
|
+
assertHttpUrl(webdavUrl, "Federal Revenue WebDAV URL");
|
|
393
|
+
return { ...config, webdavUrl };
|
|
394
|
+
}
|
|
395
|
+
return {
|
|
396
|
+
...config,
|
|
397
|
+
userAgent: assertNonEmpty(value, "Federal Revenue user agent")
|
|
398
|
+
};
|
|
399
|
+
}
|
|
400
|
+
function deleteFederalRevenueConfigValue(config, key) {
|
|
401
|
+
const nextConfig = { ...config };
|
|
402
|
+
if (key === "share-token") {
|
|
403
|
+
delete nextConfig.shareToken;
|
|
404
|
+
}
|
|
405
|
+
if (key === "webdav-url") {
|
|
406
|
+
delete nextConfig.webdavUrl;
|
|
407
|
+
}
|
|
408
|
+
if (key === "user-agent") {
|
|
409
|
+
delete nextConfig.userAgent;
|
|
410
|
+
}
|
|
411
|
+
return nextConfig;
|
|
412
|
+
}
|
|
413
|
+
function isFederalRevenueConfigEmpty(config) {
|
|
414
|
+
return !config.shareToken && !config.webdavUrl && !config.userAgent;
|
|
415
|
+
}
|
|
130
416
|
async function setDefaultDbUrl(url) {
|
|
131
417
|
assertPostgresUrl(url);
|
|
132
|
-
|
|
418
|
+
const currentConfig = await readDatabaseConfig();
|
|
419
|
+
await writeDatabaseConfig({ ...currentConfig, defaultDbUrl: url });
|
|
133
420
|
}
|
|
134
421
|
async function resetDefaultDbUrl() {
|
|
135
|
-
await
|
|
422
|
+
const currentConfig = await readDatabaseConfig();
|
|
423
|
+
const nextConfig = { ...currentConfig };
|
|
424
|
+
delete nextConfig.defaultDbUrl;
|
|
425
|
+
await writeDatabaseConfig(nextConfig);
|
|
426
|
+
}
|
|
427
|
+
async function setFederalRevenueConfigValue(key, value) {
|
|
428
|
+
const normalizedKey = normalizeFederalRevenueConfigKey(key);
|
|
429
|
+
const currentConfig = await readDatabaseConfig();
|
|
430
|
+
const federalRevenueConfig = assignFederalRevenueConfigValue(
|
|
431
|
+
currentConfig.federalRevenue ?? {},
|
|
432
|
+
normalizedKey,
|
|
433
|
+
value
|
|
434
|
+
);
|
|
435
|
+
await writeDatabaseConfig({
|
|
436
|
+
...currentConfig,
|
|
437
|
+
federalRevenue: federalRevenueConfig
|
|
438
|
+
});
|
|
439
|
+
return getFederalRevenueEffectiveConfig(federalRevenueConfig);
|
|
440
|
+
}
|
|
441
|
+
async function resetFederalRevenueConfig(key) {
|
|
442
|
+
const currentConfig = await readDatabaseConfig();
|
|
443
|
+
if (!key) {
|
|
444
|
+
const nextConfig2 = { ...currentConfig };
|
|
445
|
+
delete nextConfig2.federalRevenue;
|
|
446
|
+
await writeDatabaseConfig(nextConfig2);
|
|
447
|
+
return getFederalRevenueEffectiveConfig({});
|
|
448
|
+
}
|
|
449
|
+
const normalizedKey = normalizeFederalRevenueConfigKey(key);
|
|
450
|
+
const federalRevenueConfig = deleteFederalRevenueConfigValue(
|
|
451
|
+
currentConfig.federalRevenue ?? {},
|
|
452
|
+
normalizedKey
|
|
453
|
+
);
|
|
454
|
+
const nextConfig = { ...currentConfig };
|
|
455
|
+
if (isFederalRevenueConfigEmpty(federalRevenueConfig)) {
|
|
456
|
+
delete nextConfig.federalRevenue;
|
|
457
|
+
} else {
|
|
458
|
+
nextConfig.federalRevenue = federalRevenueConfig;
|
|
459
|
+
}
|
|
460
|
+
await writeDatabaseConfig(nextConfig);
|
|
461
|
+
return getFederalRevenueEffectiveConfig(federalRevenueConfig);
|
|
462
|
+
}
|
|
463
|
+
function getFederalRevenueEffectiveConfig(config = {}) {
|
|
464
|
+
return {
|
|
465
|
+
webdavUrl: config.webdavUrl ?? DEFAULT_FEDERAL_REVENUE_WEBDAV_URL,
|
|
466
|
+
userAgent: config.userAgent ?? DEFAULT_FEDERAL_REVENUE_USER_AGENT,
|
|
467
|
+
...config.shareToken ? { shareToken: config.shareToken } : {},
|
|
468
|
+
configured: {
|
|
469
|
+
webdavUrl: Boolean(config.webdavUrl),
|
|
470
|
+
userAgent: Boolean(config.userAgent),
|
|
471
|
+
shareToken: Boolean(config.shareToken)
|
|
472
|
+
}
|
|
473
|
+
};
|
|
474
|
+
}
|
|
475
|
+
async function readFederalRevenueEffectiveConfig() {
|
|
476
|
+
const currentConfig = await readDatabaseConfig();
|
|
477
|
+
return getFederalRevenueEffectiveConfig(currentConfig.federalRevenue ?? {});
|
|
478
|
+
}
|
|
479
|
+
async function resolveFederalRevenueClientOptions(overrides = {}) {
|
|
480
|
+
const currentConfig = await readDatabaseConfig();
|
|
481
|
+
const effectiveConfig = getFederalRevenueEffectiveConfig(
|
|
482
|
+
currentConfig.federalRevenue ?? {}
|
|
483
|
+
);
|
|
484
|
+
return {
|
|
485
|
+
baseUrl: overrides.baseUrl ?? effectiveConfig.webdavUrl,
|
|
486
|
+
shareToken: overrides.shareToken ?? effectiveConfig.shareToken,
|
|
487
|
+
userAgent: overrides.userAgent ?? effectiveConfig.userAgent
|
|
488
|
+
};
|
|
136
489
|
}
|
|
137
490
|
|
|
138
491
|
// src/services/database.service.ts
|
|
@@ -6685,217 +7038,6 @@ async function showQuarantineRow(id, options) {
|
|
|
6685
7038
|
return record;
|
|
6686
7039
|
}
|
|
6687
7040
|
|
|
6688
|
-
// src/services/federal-revenue/client.ts
|
|
6689
|
-
var DEFAULT_FEDERAL_REVENUE_SHARE_TOKEN = "YggdBLfdninEJX9";
|
|
6690
|
-
var DEFAULT_FEDERAL_REVENUE_WEBDAV_URL = "https://arquivos.receitafederal.gov.br/public.php/webdav";
|
|
6691
|
-
var DEFAULT_FEDERAL_REVENUE_USER_AGENT = "cnpj-db-loader federal-revenue-client";
|
|
6692
|
-
var REFERENCE_PATTERN = /^\d{4}-\d{2}$/;
|
|
6693
|
-
function trimTrailingSlash(value) {
|
|
6694
|
-
return value.replace(/\/+$/g, "");
|
|
6695
|
-
}
|
|
6696
|
-
function normalizeBaseUrl(value) {
|
|
6697
|
-
return trimTrailingSlash(value ?? DEFAULT_FEDERAL_REVENUE_WEBDAV_URL);
|
|
6698
|
-
}
|
|
6699
|
-
function getShareToken(value) {
|
|
6700
|
-
return value ?? DEFAULT_FEDERAL_REVENUE_SHARE_TOKEN;
|
|
6701
|
-
}
|
|
6702
|
-
function encodePathSegment(value) {
|
|
6703
|
-
return encodeURIComponent(value).replace(/%2F/gi, "/");
|
|
6704
|
-
}
|
|
6705
|
-
function decodeXml(value) {
|
|
6706
|
-
return value.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, '"').replace(/'/g, "'");
|
|
6707
|
-
}
|
|
6708
|
-
function decodeHrefSegment(value) {
|
|
6709
|
-
try {
|
|
6710
|
-
return decodeURIComponent(value);
|
|
6711
|
-
} catch {
|
|
6712
|
-
return value;
|
|
6713
|
-
}
|
|
6714
|
-
}
|
|
6715
|
-
function getAuthHeader(shareToken) {
|
|
6716
|
-
return `Basic ${Buffer.from(`${shareToken}:`).toString("base64")}`;
|
|
6717
|
-
}
|
|
6718
|
-
function buildUrl(baseUrl, segments = []) {
|
|
6719
|
-
if (segments.length === 0) {
|
|
6720
|
-
return `${baseUrl}/`;
|
|
6721
|
-
}
|
|
6722
|
-
return `${baseUrl}/${segments.map(encodePathSegment).join("/")}`;
|
|
6723
|
-
}
|
|
6724
|
-
function extractFirst(block, tagName) {
|
|
6725
|
-
const pattern = new RegExp(
|
|
6726
|
-
`<(?:[a-zA-Z0-9_-]+:)?${tagName}\\b[^>]*>([\\s\\S]*?)<\\/(?:[a-zA-Z0-9_-]+:)?${tagName}>`,
|
|
6727
|
-
"i"
|
|
6728
|
-
);
|
|
6729
|
-
const match = block.match(pattern);
|
|
6730
|
-
return match?.[1] ? decodeXml(match[1].trim()) : void 0;
|
|
6731
|
-
}
|
|
6732
|
-
function isCollectionResponse(block) {
|
|
6733
|
-
return /<(?:[a-zA-Z0-9_-]+:)?collection\b/i.test(block);
|
|
6734
|
-
}
|
|
6735
|
-
function getNameFromHref(href) {
|
|
6736
|
-
const cleanHref = href.split("?")[0] ?? href;
|
|
6737
|
-
const withoutTrailingSlash = cleanHref.replace(/\/+$/g, "");
|
|
6738
|
-
const rawName = withoutTrailingSlash.split("/").pop() ?? withoutTrailingSlash;
|
|
6739
|
-
return decodeHrefSegment(rawName);
|
|
6740
|
-
}
|
|
6741
|
-
function parsePropfindXml(xml) {
|
|
6742
|
-
const responseBlocks = xml.match(
|
|
6743
|
-
/<(?:[a-zA-Z0-9_-]+:)?response\b[\s\S]*?<\/(?:[a-zA-Z0-9_-]+:)?response>/gi
|
|
6744
|
-
);
|
|
6745
|
-
if (!responseBlocks) {
|
|
6746
|
-
return [];
|
|
6747
|
-
}
|
|
6748
|
-
return responseBlocks.map((block) => {
|
|
6749
|
-
const href = extractFirst(block, "href");
|
|
6750
|
-
if (!href) {
|
|
6751
|
-
return void 0;
|
|
6752
|
-
}
|
|
6753
|
-
const size = extractFirst(block, "getcontentlength");
|
|
6754
|
-
const parsedSize = size ? Number.parseInt(size, 10) : void 0;
|
|
6755
|
-
const lastModified = extractFirst(block, "getlastmodified");
|
|
6756
|
-
const etag = extractFirst(block, "getetag");
|
|
6757
|
-
return {
|
|
6758
|
-
href,
|
|
6759
|
-
name: getNameFromHref(href),
|
|
6760
|
-
isCollection: isCollectionResponse(block),
|
|
6761
|
-
...Number.isFinite(parsedSize) ? { sizeInBytes: parsedSize } : {},
|
|
6762
|
-
...lastModified ? { lastModified } : {},
|
|
6763
|
-
...etag ? { etag } : {}
|
|
6764
|
-
};
|
|
6765
|
-
}).filter((entry) => entry !== void 0);
|
|
6766
|
-
}
|
|
6767
|
-
async function propfind(pathSegments, options = {}) {
|
|
6768
|
-
const baseUrl = normalizeBaseUrl(options.baseUrl);
|
|
6769
|
-
const shareToken = getShareToken(options.shareToken);
|
|
6770
|
-
let response;
|
|
6771
|
-
try {
|
|
6772
|
-
response = await fetch(buildUrl(baseUrl, pathSegments), {
|
|
6773
|
-
method: "PROPFIND",
|
|
6774
|
-
headers: {
|
|
6775
|
-
Accept: "application/xml,text/xml,*/*",
|
|
6776
|
-
Authorization: getAuthHeader(shareToken),
|
|
6777
|
-
Depth: "1",
|
|
6778
|
-
"User-Agent": options.userAgent ?? DEFAULT_FEDERAL_REVENUE_USER_AGENT
|
|
6779
|
-
}
|
|
6780
|
-
});
|
|
6781
|
-
} catch (error) {
|
|
6782
|
-
throw new ValidationError(
|
|
6783
|
-
`Federal Revenue WebDAV request failed before receiving a response: ${error instanceof Error ? error.message : String(error)}.`,
|
|
6784
|
-
{ baseUrl, pathSegments }
|
|
6785
|
-
);
|
|
6786
|
-
}
|
|
6787
|
-
if (!response.ok) {
|
|
6788
|
-
throw new ValidationError(
|
|
6789
|
-
`Federal Revenue WebDAV request failed with status ${response.status} ${response.statusText}.`,
|
|
6790
|
-
{ status: response.status, statusText: response.statusText }
|
|
6791
|
-
);
|
|
6792
|
-
}
|
|
6793
|
-
const xml = await response.text();
|
|
6794
|
-
return {
|
|
6795
|
-
entries: parsePropfindXml(xml),
|
|
6796
|
-
baseUrl,
|
|
6797
|
-
shareToken
|
|
6798
|
-
};
|
|
6799
|
-
}
|
|
6800
|
-
function validateFederalRevenueReference(reference) {
|
|
6801
|
-
if (!REFERENCE_PATTERN.test(reference)) {
|
|
6802
|
-
throw new ValidationError(
|
|
6803
|
-
`Federal Revenue reference is invalid: ${reference}. Expected YYYY-MM.`
|
|
6804
|
-
);
|
|
6805
|
-
}
|
|
6806
|
-
}
|
|
6807
|
-
function getCurrentFederalRevenueReference(date = /* @__PURE__ */ new Date()) {
|
|
6808
|
-
const year = date.getFullYear();
|
|
6809
|
-
const month = String(date.getMonth() + 1).padStart(2, "0");
|
|
6810
|
-
return `${year}-${month}`;
|
|
6811
|
-
}
|
|
6812
|
-
async function listFederalRevenueReferences(options = {}) {
|
|
6813
|
-
const result = await propfind([], options);
|
|
6814
|
-
const references = result.entries.filter((entry) => entry.isCollection && REFERENCE_PATTERN.test(entry.name)).map((entry) => ({
|
|
6815
|
-
reference: entry.name,
|
|
6816
|
-
href: entry.href
|
|
6817
|
-
})).sort((left, right) => left.reference.localeCompare(right.reference));
|
|
6818
|
-
return {
|
|
6819
|
-
references,
|
|
6820
|
-
remoteBaseUrl: result.baseUrl
|
|
6821
|
-
};
|
|
6822
|
-
}
|
|
6823
|
-
async function resolveFederalRevenueReference(input2 = {}) {
|
|
6824
|
-
const { references } = await listFederalRevenueReferences(input2);
|
|
6825
|
-
const availableReferences = references.map((item) => item.reference);
|
|
6826
|
-
const latest = availableReferences.at(-1);
|
|
6827
|
-
if (!latest) {
|
|
6828
|
-
throw new ValidationError(
|
|
6829
|
-
"Federal Revenue reference discovery failed: no monthly references were found in the public share."
|
|
6830
|
-
);
|
|
6831
|
-
}
|
|
6832
|
-
if (input2.reference) {
|
|
6833
|
-
validateFederalRevenueReference(input2.reference);
|
|
6834
|
-
if (!availableReferences.includes(input2.reference)) {
|
|
6835
|
-
throw new ValidationError(
|
|
6836
|
-
`Federal Revenue reference not found: ${input2.reference}. Latest available reference is ${latest}.`,
|
|
6837
|
-
{
|
|
6838
|
-
requestedReference: input2.reference,
|
|
6839
|
-
latestAvailableReference: latest,
|
|
6840
|
-
availableReferences
|
|
6841
|
-
}
|
|
6842
|
-
);
|
|
6843
|
-
}
|
|
6844
|
-
return {
|
|
6845
|
-
mode: "explicit",
|
|
6846
|
-
selectedReference: input2.reference,
|
|
6847
|
-
availableReferences
|
|
6848
|
-
};
|
|
6849
|
-
}
|
|
6850
|
-
if (input2.current) {
|
|
6851
|
-
const currentReference = getCurrentFederalRevenueReference();
|
|
6852
|
-
if (!availableReferences.includes(currentReference)) {
|
|
6853
|
-
throw new ValidationError(
|
|
6854
|
-
`Federal Revenue current reference is not available yet: ${currentReference}. Latest available reference is ${latest}.`,
|
|
6855
|
-
{
|
|
6856
|
-
requestedReference: currentReference,
|
|
6857
|
-
latestAvailableReference: latest,
|
|
6858
|
-
availableReferences
|
|
6859
|
-
}
|
|
6860
|
-
);
|
|
6861
|
-
}
|
|
6862
|
-
return {
|
|
6863
|
-
mode: "current",
|
|
6864
|
-
selectedReference: currentReference,
|
|
6865
|
-
availableReferences
|
|
6866
|
-
};
|
|
6867
|
-
}
|
|
6868
|
-
return {
|
|
6869
|
-
mode: "latest",
|
|
6870
|
-
selectedReference: latest,
|
|
6871
|
-
availableReferences
|
|
6872
|
-
};
|
|
6873
|
-
}
|
|
6874
|
-
async function listFederalRevenueFiles(reference, options = {}) {
|
|
6875
|
-
validateFederalRevenueReference(reference);
|
|
6876
|
-
const result = await propfind([reference], options);
|
|
6877
|
-
const files = result.entries.filter(
|
|
6878
|
-
(entry) => !entry.isCollection && entry.name.toLowerCase().endsWith(".zip")
|
|
6879
|
-
).map((entry) => ({
|
|
6880
|
-
name: entry.name,
|
|
6881
|
-
href: entry.href,
|
|
6882
|
-
downloadUrl: buildUrl(result.baseUrl, [reference, entry.name]),
|
|
6883
|
-
...entry.sizeInBytes !== void 0 ? { sizeInBytes: entry.sizeInBytes } : {},
|
|
6884
|
-
...entry.lastModified ? { lastModified: entry.lastModified } : {},
|
|
6885
|
-
...entry.etag ? { etag: entry.etag } : {}
|
|
6886
|
-
})).sort((left, right) => left.name.localeCompare(right.name));
|
|
6887
|
-
return {
|
|
6888
|
-
files,
|
|
6889
|
-
remoteBaseUrl: result.baseUrl
|
|
6890
|
-
};
|
|
6891
|
-
}
|
|
6892
|
-
function buildFederalRevenueDownloadHeaders(options = {}) {
|
|
6893
|
-
return {
|
|
6894
|
-
Authorization: getAuthHeader(getShareToken(options.shareToken)),
|
|
6895
|
-
"User-Agent": options.userAgent ?? DEFAULT_FEDERAL_REVENUE_USER_AGENT
|
|
6896
|
-
};
|
|
6897
|
-
}
|
|
6898
|
-
|
|
6899
7041
|
// src/services/federal-revenue/download.ts
|
|
6900
7042
|
import { createWriteStream } from "fs";
|
|
6901
7043
|
import { mkdir as mkdir5, rename, stat as stat5, unlink } from "fs/promises";
|
|
@@ -8379,6 +8521,18 @@ var STAGING_TABLE_BY_DATASET3 = {
|
|
|
8379
8521
|
partners: "staging_partners",
|
|
8380
8522
|
simples_options: "staging_simples_options"
|
|
8381
8523
|
};
|
|
8524
|
+
var STEP_ORDER = [
|
|
8525
|
+
"setup",
|
|
8526
|
+
"load-domains",
|
|
8527
|
+
"load-companies",
|
|
8528
|
+
"load-establishments",
|
|
8529
|
+
"load-partners",
|
|
8530
|
+
"load-simples",
|
|
8531
|
+
"materialize",
|
|
8532
|
+
"materialize-secondary-cnaes",
|
|
8533
|
+
"indexes",
|
|
8534
|
+
"analyze"
|
|
8535
|
+
];
|
|
8382
8536
|
function quoteSqlLiteral(value) {
|
|
8383
8537
|
return `'${value.replace(/'/g, "''")}'`;
|
|
8384
8538
|
}
|
|
@@ -8396,6 +8550,9 @@ function receitaCopyCommand(tableName, columns, filePath) {
|
|
|
8396
8550
|
const normalizedFilePath = normalizePathForPsql(filePath);
|
|
8397
8551
|
return `\\copy ${tableName} (${columns.join(", ")}) from ${quoteSqlLiteral(normalizedFilePath)} with (format csv, header false, delimiter ';', quote '"', escape '"')`;
|
|
8398
8552
|
}
|
|
8553
|
+
function echo(message) {
|
|
8554
|
+
return `\\echo ${quoteSqlLiteral(message)}`;
|
|
8555
|
+
}
|
|
8399
8556
|
function datasetColumns(dataset) {
|
|
8400
8557
|
return DATASET_LAYOUTS[dataset].fields.map((field) => field.columnName);
|
|
8401
8558
|
}
|
|
@@ -8422,7 +8579,7 @@ function partnerDedupeExpression(alias) {
|
|
|
8422
8579
|
function materializeCompaniesSql() {
|
|
8423
8580
|
const columns = companiesLayout.fields.map((field) => field.columnName);
|
|
8424
8581
|
return [
|
|
8425
|
-
"
|
|
8582
|
+
echo("[materialize] Materializing companies..."),
|
|
8426
8583
|
"with source as (",
|
|
8427
8584
|
" select",
|
|
8428
8585
|
` ${columns.map((column) => `source.${column}`).join(",\n ")},`,
|
|
@@ -8436,7 +8593,8 @@ function materializeCompaniesSql() {
|
|
|
8436
8593
|
`select ${columns.join(", ")}`,
|
|
8437
8594
|
"from deduped",
|
|
8438
8595
|
"on conflict (cnpj_root) do update set",
|
|
8439
|
-
` ${updateAssignments(columns, ["cnpj_root"])}
|
|
8596
|
+
` ${updateAssignments(columns, ["cnpj_root"])};`,
|
|
8597
|
+
echo("[materialize] Companies materialization completed.")
|
|
8440
8598
|
].join("\n");
|
|
8441
8599
|
}
|
|
8442
8600
|
function materializeEstablishmentsSql() {
|
|
@@ -8445,7 +8603,7 @@ function materializeEstablishmentsSql() {
|
|
|
8445
8603
|
);
|
|
8446
8604
|
const insertColumns = [...baseColumns, "cnpj_full"];
|
|
8447
8605
|
return [
|
|
8448
|
-
"
|
|
8606
|
+
echo("[materialize] Materializing establishments..."),
|
|
8449
8607
|
"with source as (",
|
|
8450
8608
|
" select",
|
|
8451
8609
|
` ${baseColumns.map((column) => `source.${column}`).join(",\n ")},`,
|
|
@@ -8455,14 +8613,29 @@ function materializeEstablishmentsSql() {
|
|
|
8455
8613
|
"),",
|
|
8456
8614
|
"deduped as (",
|
|
8457
8615
|
" select * from source where dedupe_rank = 1",
|
|
8616
|
+
")",
|
|
8617
|
+
`insert into establishments (${insertColumns.join(", ")})`,
|
|
8618
|
+
`select ${insertColumns.join(", ")}`,
|
|
8619
|
+
"from deduped",
|
|
8620
|
+
"on conflict (cnpj_full) do update set",
|
|
8621
|
+
` ${updateAssignments(insertColumns, ["cnpj_root", "cnpj_order", "cnpj_check_digits", "cnpj_full"])};`,
|
|
8622
|
+
echo("[materialize] Establishments materialization completed.")
|
|
8623
|
+
].join("\n");
|
|
8624
|
+
}
|
|
8625
|
+
function materializeSecondaryCnaesSql() {
|
|
8626
|
+
return [
|
|
8627
|
+
echo(
|
|
8628
|
+
"[materialize-secondary-cnaes] Materializing establishment secondary CNAEs..."
|
|
8629
|
+
),
|
|
8630
|
+
"with source as (",
|
|
8631
|
+
" select",
|
|
8632
|
+
" staging.cnpj_root || staging.cnpj_order || staging.cnpj_check_digits as cnpj_full,",
|
|
8633
|
+
" staging.secondary_cnaes_raw,",
|
|
8634
|
+
" row_number() over (partition by staging.cnpj_root || staging.cnpj_order || staging.cnpj_check_digits order by staging.staging_id desc) as dedupe_rank",
|
|
8635
|
+
" from staging_establishments staging",
|
|
8458
8636
|
"),",
|
|
8459
|
-
"
|
|
8460
|
-
|
|
8461
|
-
` select ${insertColumns.join(", ")}`,
|
|
8462
|
-
" from deduped",
|
|
8463
|
-
" on conflict (cnpj_full) do update set",
|
|
8464
|
-
` ${updateAssignments(insertColumns, ["cnpj_root", "cnpj_order", "cnpj_check_digits", "cnpj_full"])}`,
|
|
8465
|
-
" returning cnpj_full",
|
|
8637
|
+
"deduped as (",
|
|
8638
|
+
" select * from source where dedupe_rank = 1",
|
|
8466
8639
|
"),",
|
|
8467
8640
|
"deleted_secondary_cnaes as (",
|
|
8468
8641
|
" delete from establishment_secondary_cnaes target",
|
|
@@ -8483,14 +8656,17 @@ function materializeEstablishmentsSql() {
|
|
|
8483
8656
|
"insert into establishment_secondary_cnaes (cnpj_full, cnae_code)",
|
|
8484
8657
|
"select cnpj_full, cnae_code",
|
|
8485
8658
|
"from secondary_cnaes_source",
|
|
8486
|
-
"on conflict (cnpj_full, cnae_code) do nothing;"
|
|
8659
|
+
"on conflict (cnpj_full, cnae_code) do nothing;",
|
|
8660
|
+
echo(
|
|
8661
|
+
"[materialize-secondary-cnaes] Secondary CNAEs materialization completed."
|
|
8662
|
+
)
|
|
8487
8663
|
].join("\n");
|
|
8488
8664
|
}
|
|
8489
8665
|
function materializePartnersSql() {
|
|
8490
8666
|
const baseColumns = partnersLayout.fields.map((field) => field.columnName);
|
|
8491
8667
|
const insertColumns = [...baseColumns, "partner_dedupe_key"];
|
|
8492
8668
|
return [
|
|
8493
|
-
"
|
|
8669
|
+
echo("[materialize] Materializing partners..."),
|
|
8494
8670
|
"with source as (",
|
|
8495
8671
|
" select",
|
|
8496
8672
|
` ${baseColumns.map((column) => `source.${column}`).join(",\n ")},`,
|
|
@@ -8510,13 +8686,14 @@ function materializePartnersSql() {
|
|
|
8510
8686
|
`select ${insertColumns.join(", ")}`,
|
|
8511
8687
|
"from deduped",
|
|
8512
8688
|
"on conflict (partner_dedupe_key) do update set",
|
|
8513
|
-
` ${updateAssignments(insertColumns, ["partner_dedupe_key"])}
|
|
8689
|
+
` ${updateAssignments(insertColumns, ["partner_dedupe_key"])};`,
|
|
8690
|
+
echo("[materialize] Partners materialization completed.")
|
|
8514
8691
|
].join("\n");
|
|
8515
8692
|
}
|
|
8516
8693
|
function materializeSimplesSql() {
|
|
8517
8694
|
const columns = simplesLayout.fields.map((field) => field.columnName);
|
|
8518
8695
|
return [
|
|
8519
|
-
"
|
|
8696
|
+
echo("[materialize] Materializing simples options..."),
|
|
8520
8697
|
"with source as (",
|
|
8521
8698
|
" select",
|
|
8522
8699
|
` ${columns.map((column) => `source.${column}`).join(",\n ")},`,
|
|
@@ -8530,7 +8707,8 @@ function materializeSimplesSql() {
|
|
|
8530
8707
|
`select ${columns.join(", ")}`,
|
|
8531
8708
|
"from deduped",
|
|
8532
8709
|
"on conflict (cnpj_root) do update set",
|
|
8533
|
-
` ${updateAssignments(columns, ["cnpj_root"])}
|
|
8710
|
+
` ${updateAssignments(columns, ["cnpj_root"])};`,
|
|
8711
|
+
echo("[materialize] Simples options materialization completed.")
|
|
8534
8712
|
].join("\n");
|
|
8535
8713
|
}
|
|
8536
8714
|
function copyDomainSql(dataset, files) {
|
|
@@ -8540,12 +8718,20 @@ function copyDomainSql(dataset, files) {
|
|
|
8540
8718
|
const columns = datasetColumns(dataset);
|
|
8541
8719
|
const tempTable = `tmp_hybrid_${dataset}`;
|
|
8542
8720
|
const lines = [
|
|
8543
|
-
|
|
8721
|
+
echo(`[load-domains] Loading ${dataset} lookup data...`),
|
|
8544
8722
|
`drop table if exists ${tempTable};`,
|
|
8545
8723
|
`create temporary table ${tempTable} (code text, description text);`
|
|
8546
8724
|
];
|
|
8547
|
-
for (const file of files) {
|
|
8548
|
-
lines.push(
|
|
8725
|
+
for (const [index, file] of files.entries()) {
|
|
8726
|
+
lines.push(
|
|
8727
|
+
echo(
|
|
8728
|
+
`[load-domains] Loading ${dataset} file ${index + 1} of ${files.length}: ${file.relativePath}`
|
|
8729
|
+
),
|
|
8730
|
+
csvCopyCommand(tempTable, columns, file.absolutePath),
|
|
8731
|
+
echo(
|
|
8732
|
+
`[load-domains] Loaded ${dataset} file ${index + 1} of ${files.length}.`
|
|
8733
|
+
)
|
|
8734
|
+
);
|
|
8549
8735
|
}
|
|
8550
8736
|
lines.push(
|
|
8551
8737
|
`insert into ${dataset} (${columns.join(", ")})`,
|
|
@@ -8566,12 +8752,17 @@ function copyStagingSql(dataset, files) {
|
|
|
8566
8752
|
return [];
|
|
8567
8753
|
}
|
|
8568
8754
|
const columns = datasetColumns(dataset);
|
|
8569
|
-
|
|
8570
|
-
|
|
8571
|
-
|
|
8572
|
-
(
|
|
8573
|
-
|
|
8574
|
-
|
|
8755
|
+
const lines = [echo(`[load-${dataset}] Loading ${dataset} staging data...`)];
|
|
8756
|
+
for (const [index, file] of files.entries()) {
|
|
8757
|
+
lines.push(
|
|
8758
|
+
echo(
|
|
8759
|
+
`[load-${dataset}] Loading file ${index + 1} of ${files.length}: ${file.relativePath}`
|
|
8760
|
+
),
|
|
8761
|
+
csvCopyCommand(tableName, columns, file.absolutePath),
|
|
8762
|
+
echo(`[load-${dataset}] Loaded file ${index + 1} of ${files.length}.`)
|
|
8763
|
+
);
|
|
8764
|
+
}
|
|
8765
|
+
return lines;
|
|
8575
8766
|
}
|
|
8576
8767
|
function csvFilesByDataset(files) {
|
|
8577
8768
|
const grouped = {};
|
|
@@ -8597,7 +8788,9 @@ function rawTableName(dataset) {
|
|
|
8597
8788
|
function createRawTempTableSql(dataset) {
|
|
8598
8789
|
const columns = DATASET_LAYOUTS[dataset].fields.map((field) => ` ${quoteIdentifier(field.columnName)} text`).join(",\n");
|
|
8599
8790
|
return [
|
|
8791
|
+
"set client_min_messages to warning;",
|
|
8600
8792
|
`drop table if exists ${rawTableName(dataset)};`,
|
|
8793
|
+
"reset client_min_messages;",
|
|
8601
8794
|
`create temporary table ${rawTableName(dataset)} (`,
|
|
8602
8795
|
columns,
|
|
8603
8796
|
");"
|
|
@@ -8679,11 +8872,21 @@ function rawDomainSql(dataset, files) {
|
|
|
8679
8872
|
const columns = layout.fields.map((field) => field.columnName);
|
|
8680
8873
|
const tableName = rawTableName(dataset);
|
|
8681
8874
|
const lines = [
|
|
8682
|
-
|
|
8875
|
+
echo(
|
|
8876
|
+
`[load-domains] Loading ${dataset} lookup data directly from sanitized Receita files...`
|
|
8877
|
+
),
|
|
8683
8878
|
createRawTempTableSql(dataset)
|
|
8684
8879
|
];
|
|
8685
|
-
for (const file of files) {
|
|
8686
|
-
lines.push(
|
|
8880
|
+
for (const [index, file] of files.entries()) {
|
|
8881
|
+
lines.push(
|
|
8882
|
+
echo(
|
|
8883
|
+
`[load-domains] Loading ${dataset} file ${index + 1} of ${files.length}: ${file.relativePath}`
|
|
8884
|
+
),
|
|
8885
|
+
receitaCopyCommand(tableName, columns, file.absolutePath),
|
|
8886
|
+
echo(
|
|
8887
|
+
`[load-domains] Loaded ${dataset} file ${index + 1} of ${files.length}.`
|
|
8888
|
+
)
|
|
8889
|
+
);
|
|
8687
8890
|
}
|
|
8688
8891
|
lines.push(
|
|
8689
8892
|
`insert into ${dataset} (${columns.join(", ")})`,
|
|
@@ -8693,7 +8896,8 @@ function rawDomainSql(dataset, files) {
|
|
|
8693
8896
|
`from ${tableName}`,
|
|
8694
8897
|
"where nullif(btrim(code), '') is not null",
|
|
8695
8898
|
"order by code",
|
|
8696
|
-
"on conflict (code) do update set description = excluded.description;"
|
|
8899
|
+
"on conflict (code) do update set description = excluded.description;",
|
|
8900
|
+
echo(`[load-domains] ${dataset} lookup data completed.`)
|
|
8697
8901
|
);
|
|
8698
8902
|
return lines;
|
|
8699
8903
|
}
|
|
@@ -8712,70 +8916,363 @@ function rawStagingSql(dataset, files) {
|
|
|
8712
8916
|
const expressions = layout.fields.map(
|
|
8713
8917
|
(field) => ` ${fieldExpression(dataset, field, alias)} as ${field.columnName}`
|
|
8714
8918
|
);
|
|
8919
|
+
const stepName = loadStepName(dataset);
|
|
8715
8920
|
const lines = [
|
|
8716
|
-
|
|
8921
|
+
echo(
|
|
8922
|
+
`[${stepName}] Loading ${dataset} staging data directly from sanitized Receita files...`
|
|
8923
|
+
),
|
|
8924
|
+
`truncate table ${targetTable} restart identity;`,
|
|
8717
8925
|
createRawTempTableSql(dataset)
|
|
8718
8926
|
];
|
|
8719
|
-
for (const file of files) {
|
|
8720
|
-
lines.push(
|
|
8927
|
+
for (const [index, file] of files.entries()) {
|
|
8928
|
+
lines.push(
|
|
8929
|
+
echo(
|
|
8930
|
+
`[${stepName}] Loading file ${index + 1} of ${files.length}: ${file.relativePath}`
|
|
8931
|
+
),
|
|
8932
|
+
receitaCopyCommand(tableName, columns, file.absolutePath),
|
|
8933
|
+
echo(`[${stepName}] Loaded file ${index + 1} of ${files.length}.`)
|
|
8934
|
+
);
|
|
8721
8935
|
}
|
|
8722
8936
|
lines.push(
|
|
8937
|
+
echo(
|
|
8938
|
+
`[${stepName}] Transforming ${dataset} raw rows into ${targetTable}...`
|
|
8939
|
+
),
|
|
8723
8940
|
`insert into ${targetTable} (${columns.join(", ")})`,
|
|
8724
8941
|
"select",
|
|
8725
8942
|
expressions.join(",\n"),
|
|
8726
|
-
`from ${tableName} ${alias}
|
|
8943
|
+
`from ${tableName} ${alias};`,
|
|
8944
|
+
echo(`[${stepName}] ${dataset} staging load completed.`)
|
|
8727
8945
|
);
|
|
8728
8946
|
return lines;
|
|
8729
8947
|
}
|
|
8730
|
-
function
|
|
8731
|
-
|
|
8732
|
-
|
|
8733
|
-
|
|
8734
|
-
|
|
8735
|
-
|
|
8736
|
-
|
|
8737
|
-
|
|
8948
|
+
function loadStepName(dataset) {
|
|
8949
|
+
switch (dataset) {
|
|
8950
|
+
case "companies":
|
|
8951
|
+
return "load-companies";
|
|
8952
|
+
case "establishments":
|
|
8953
|
+
return "load-establishments";
|
|
8954
|
+
case "partners":
|
|
8955
|
+
return "load-partners";
|
|
8956
|
+
case "simples_options":
|
|
8957
|
+
return "load-simples";
|
|
8958
|
+
default:
|
|
8959
|
+
return `load-${dataset}`;
|
|
8960
|
+
}
|
|
8961
|
+
}
|
|
8962
|
+
function scriptHeader(title, sourceEncoding) {
|
|
8963
|
+
return [
|
|
8964
|
+
`-- ${title}`,
|
|
8965
|
+
"-- Generated by cnpj-db-loader postgres generate-script.",
|
|
8738
8966
|
"\\set ON_ERROR_STOP on",
|
|
8739
|
-
|
|
8740
|
-
|
|
8741
|
-
|
|
8742
|
-
|
|
8743
|
-
|
|
8744
|
-
|
|
8745
|
-
"truncate table staging_companies restart identity;",
|
|
8746
|
-
"truncate table staging_establishments restart identity;",
|
|
8747
|
-
"truncate table staging_partners restart identity;",
|
|
8748
|
-
"truncate table staging_simples_options restart identity;",
|
|
8967
|
+
...sourceEncoding ? [
|
|
8968
|
+
echo(
|
|
8969
|
+
`Using source file encoding ${sourceEncoding} for psql copy operations...`
|
|
8970
|
+
),
|
|
8971
|
+
`set client_encoding to ${quoteSqlLiteral(sourceEncoding)};`
|
|
8972
|
+
] : [],
|
|
8749
8973
|
""
|
|
8750
8974
|
];
|
|
8751
|
-
|
|
8752
|
-
|
|
8975
|
+
}
|
|
8976
|
+
function wrapTransaction(lines, mode, shouldWrap) {
|
|
8977
|
+
if (!shouldWrap || mode !== "phase") {
|
|
8978
|
+
return [...lines];
|
|
8753
8979
|
}
|
|
8754
|
-
|
|
8755
|
-
|
|
8980
|
+
return ["begin;", "", ...lines, "", "commit;"];
|
|
8981
|
+
}
|
|
8982
|
+
function buildStepScript(title, body, input2, wrapInPhaseTransaction) {
|
|
8983
|
+
return [
|
|
8984
|
+
...scriptHeader(title, input2.sourceEncoding),
|
|
8985
|
+
...wrapTransaction(body, input2.transactionMode, wrapInPhaseTransaction),
|
|
8986
|
+
""
|
|
8987
|
+
].join("\n");
|
|
8988
|
+
}
|
|
8989
|
+
function includeSet(input2) {
|
|
8990
|
+
const selected = new Set(input2.include);
|
|
8991
|
+
if (input2.skipIndexes) {
|
|
8992
|
+
selected.delete("indexes");
|
|
8756
8993
|
}
|
|
8757
|
-
|
|
8758
|
-
|
|
8994
|
+
if (input2.skipAnalyze) {
|
|
8995
|
+
selected.delete("analyze");
|
|
8996
|
+
}
|
|
8997
|
+
return selected;
|
|
8998
|
+
}
|
|
8999
|
+
function hasAnyFinalMaterialization(selected) {
|
|
9000
|
+
return selected.has("companies") || selected.has("establishments") || selected.has("partners") || selected.has("simples");
|
|
8759
9001
|
}
|
|
8760
|
-
function
|
|
9002
|
+
function materializeSql(selected) {
|
|
9003
|
+
const lines = [echo("[materialize] Starting final table materialization...")];
|
|
9004
|
+
if (selected.has("companies")) {
|
|
9005
|
+
lines.push(materializeCompaniesSql(), "");
|
|
9006
|
+
}
|
|
9007
|
+
if (selected.has("establishments")) {
|
|
9008
|
+
lines.push(materializeEstablishmentsSql(), "");
|
|
9009
|
+
}
|
|
9010
|
+
if (selected.has("partners")) {
|
|
9011
|
+
lines.push(materializePartnersSql(), "");
|
|
9012
|
+
}
|
|
9013
|
+
if (selected.has("simples")) {
|
|
9014
|
+
lines.push(materializeSimplesSql(), "");
|
|
9015
|
+
}
|
|
9016
|
+
lines.push(echo("[materialize] Final table materialization completed."));
|
|
9017
|
+
return lines;
|
|
9018
|
+
}
|
|
9019
|
+
function indexesSql() {
|
|
9020
|
+
return [
|
|
9021
|
+
echo(
|
|
9022
|
+
"[indexes] No additional index operations are generated in this beta."
|
|
9023
|
+
),
|
|
9024
|
+
"-- Indexes are expected to be managed by the schema generated by cnpj-db-loader schema generate.",
|
|
9025
|
+
"-- A future fast-rebuild mode may generate DROP/CREATE INDEX operations here."
|
|
9026
|
+
];
|
|
9027
|
+
}
|
|
9028
|
+
function analyzeSql(selected) {
|
|
9029
|
+
const tables = /* @__PURE__ */ new Set();
|
|
9030
|
+
if (selected.has("companies")) {
|
|
9031
|
+
tables.add("companies");
|
|
9032
|
+
}
|
|
9033
|
+
if (selected.has("establishments")) {
|
|
9034
|
+
tables.add("establishments");
|
|
9035
|
+
}
|
|
9036
|
+
if (selected.has("secondary-cnaes")) {
|
|
9037
|
+
tables.add("establishment_secondary_cnaes");
|
|
9038
|
+
}
|
|
9039
|
+
if (selected.has("partners")) {
|
|
9040
|
+
tables.add("partners");
|
|
9041
|
+
}
|
|
9042
|
+
if (selected.has("simples")) {
|
|
9043
|
+
tables.add("simples_options");
|
|
9044
|
+
}
|
|
9045
|
+
if (selected.has("domains")) {
|
|
9046
|
+
for (const dataset of DOMAIN_DATASETS) {
|
|
9047
|
+
tables.add(dataset);
|
|
9048
|
+
}
|
|
9049
|
+
}
|
|
9050
|
+
return [
|
|
9051
|
+
echo("[analyze] Refreshing planner statistics..."),
|
|
9052
|
+
...[...tables].map((table) => `analyze ${table};`),
|
|
9053
|
+
echo("[analyze] Planner statistics refreshed.")
|
|
9054
|
+
];
|
|
9055
|
+
}
|
|
9056
|
+
function step(name, file, dependsOn, included) {
|
|
9057
|
+
return { name, file, dependsOn, included };
|
|
9058
|
+
}
|
|
9059
|
+
function generatePostgresDirectScriptFiles(input2) {
|
|
8761
9060
|
const grouped = directFilesByDataset(input2.files);
|
|
8762
|
-
const
|
|
8763
|
-
|
|
9061
|
+
const selected = includeSet(input2);
|
|
9062
|
+
if (!DOMAIN_DATASETS.some((dataset) => (grouped[dataset] ?? []).length > 0)) {
|
|
9063
|
+
selected.delete("domains");
|
|
9064
|
+
}
|
|
9065
|
+
if ((grouped.companies ?? []).length === 0) {
|
|
9066
|
+
selected.delete("companies");
|
|
9067
|
+
}
|
|
9068
|
+
if ((grouped.establishments ?? []).length === 0) {
|
|
9069
|
+
selected.delete("establishments");
|
|
9070
|
+
selected.delete("secondary-cnaes");
|
|
9071
|
+
}
|
|
9072
|
+
if ((grouped.partners ?? []).length === 0) {
|
|
9073
|
+
selected.delete("partners");
|
|
9074
|
+
}
|
|
9075
|
+
if ((grouped.simples_options ?? []).length === 0) {
|
|
9076
|
+
selected.delete("simples");
|
|
9077
|
+
}
|
|
9078
|
+
const scripts = {};
|
|
9079
|
+
const steps = [];
|
|
9080
|
+
const setupIncluded = true;
|
|
9081
|
+
steps.push(step("setup", "setup.sql", [], setupIncluded));
|
|
9082
|
+
scripts["setup.sql"] = [
|
|
9083
|
+
...scriptHeader(
|
|
9084
|
+
"CNPJ DB Loader PostgreSQL direct import setup",
|
|
9085
|
+
input2.sourceEncoding
|
|
9086
|
+
),
|
|
9087
|
+
echo("[setup] Preparing PostgreSQL direct import session..."),
|
|
9088
|
+
"-- The database schema must be applied before running these scripts.",
|
|
9089
|
+
"-- This setup script configures the psql session used by the generated orchestrator.",
|
|
9090
|
+
echo("[setup] Setup completed."),
|
|
9091
|
+
""
|
|
9092
|
+
].join("\n");
|
|
9093
|
+
const domainsIncluded = selected.has("domains") && DOMAIN_DATASETS.some((dataset) => (grouped[dataset] ?? []).length > 0);
|
|
9094
|
+
steps.push(
|
|
9095
|
+
step("load-domains", "load-domains.sql", ["setup"], domainsIncluded)
|
|
9096
|
+
);
|
|
9097
|
+
if (domainsIncluded) {
|
|
9098
|
+
const lines = [echo("[load-domains] Starting domain tables load...")];
|
|
9099
|
+
for (const dataset of DOMAIN_DATASETS) {
|
|
9100
|
+
lines.push(...rawDomainSql(dataset, grouped[dataset] ?? []), "");
|
|
9101
|
+
}
|
|
9102
|
+
lines.push(echo("[load-domains] Domain tables load completed."));
|
|
9103
|
+
scripts["load-domains.sql"] = buildStepScript(
|
|
9104
|
+
"CNPJ DB Loader PostgreSQL direct import domains step",
|
|
9105
|
+
lines,
|
|
9106
|
+
input2,
|
|
9107
|
+
true
|
|
9108
|
+
);
|
|
9109
|
+
}
|
|
9110
|
+
const datasetSteps = [
|
|
9111
|
+
{
|
|
9112
|
+
dataset: "companies",
|
|
9113
|
+
name: "load-companies",
|
|
9114
|
+
file: "load-companies.sql",
|
|
9115
|
+
include: "companies"
|
|
9116
|
+
},
|
|
9117
|
+
{
|
|
9118
|
+
dataset: "establishments",
|
|
9119
|
+
name: "load-establishments",
|
|
9120
|
+
file: "load-establishments.sql",
|
|
9121
|
+
include: "establishments"
|
|
9122
|
+
},
|
|
9123
|
+
{
|
|
9124
|
+
dataset: "partners",
|
|
9125
|
+
name: "load-partners",
|
|
9126
|
+
file: "load-partners.sql",
|
|
9127
|
+
include: "partners"
|
|
9128
|
+
},
|
|
9129
|
+
{
|
|
9130
|
+
dataset: "simples_options",
|
|
9131
|
+
name: "load-simples",
|
|
9132
|
+
file: "load-simples.sql",
|
|
9133
|
+
include: "simples"
|
|
9134
|
+
}
|
|
9135
|
+
];
|
|
9136
|
+
for (const item of datasetSteps) {
|
|
9137
|
+
const files = grouped[item.dataset] ?? [];
|
|
9138
|
+
const included = selected.has(item.include) && files.length > 0;
|
|
9139
|
+
steps.push(step(item.name, item.file, ["setup"], included));
|
|
9140
|
+
if (included) {
|
|
9141
|
+
scripts[item.file] = buildStepScript(
|
|
9142
|
+
`CNPJ DB Loader PostgreSQL direct import ${item.name} step`,
|
|
9143
|
+
rawStagingSql(item.dataset, files),
|
|
9144
|
+
input2,
|
|
9145
|
+
true
|
|
9146
|
+
);
|
|
9147
|
+
}
|
|
9148
|
+
}
|
|
9149
|
+
const materializeIncluded = hasAnyFinalMaterialization(selected);
|
|
9150
|
+
steps.push(
|
|
9151
|
+
step(
|
|
9152
|
+
"materialize",
|
|
9153
|
+
"materialize.sql",
|
|
9154
|
+
datasetSteps.filter((item) => selected.has(item.include)).map((item) => item.name),
|
|
9155
|
+
materializeIncluded
|
|
9156
|
+
)
|
|
9157
|
+
);
|
|
9158
|
+
if (materializeIncluded) {
|
|
9159
|
+
scripts["materialize.sql"] = buildStepScript(
|
|
9160
|
+
"CNPJ DB Loader PostgreSQL direct import materialization step",
|
|
9161
|
+
materializeSql(selected),
|
|
9162
|
+
input2,
|
|
9163
|
+
true
|
|
9164
|
+
);
|
|
9165
|
+
}
|
|
9166
|
+
const secondaryIncluded = selected.has("secondary-cnaes") && selected.has("establishments");
|
|
9167
|
+
steps.push(
|
|
9168
|
+
step(
|
|
9169
|
+
"materialize-secondary-cnaes",
|
|
9170
|
+
"materialize-secondary-cnaes.sql",
|
|
9171
|
+
["load-establishments"],
|
|
9172
|
+
secondaryIncluded
|
|
9173
|
+
)
|
|
9174
|
+
);
|
|
9175
|
+
if (secondaryIncluded) {
|
|
9176
|
+
scripts["materialize-secondary-cnaes.sql"] = buildStepScript(
|
|
9177
|
+
"CNPJ DB Loader PostgreSQL direct import secondary CNAEs step",
|
|
9178
|
+
[materializeSecondaryCnaesSql()],
|
|
9179
|
+
input2,
|
|
9180
|
+
true
|
|
9181
|
+
);
|
|
9182
|
+
}
|
|
9183
|
+
const indexesIncluded = selected.has("indexes");
|
|
9184
|
+
steps.push(
|
|
9185
|
+
step(
|
|
9186
|
+
"indexes",
|
|
9187
|
+
"indexes.sql",
|
|
9188
|
+
materializeIncluded ? ["materialize"] : ["setup"],
|
|
9189
|
+
indexesIncluded
|
|
9190
|
+
)
|
|
9191
|
+
);
|
|
9192
|
+
if (indexesIncluded) {
|
|
9193
|
+
scripts["indexes.sql"] = buildStepScript(
|
|
9194
|
+
"CNPJ DB Loader PostgreSQL direct import indexes step",
|
|
9195
|
+
indexesSql(),
|
|
9196
|
+
input2,
|
|
9197
|
+
true
|
|
9198
|
+
);
|
|
9199
|
+
}
|
|
9200
|
+
const analyzeIncluded = selected.has("analyze");
|
|
9201
|
+
const analyzeDependencies = [
|
|
9202
|
+
...domainsIncluded ? ["load-domains"] : [],
|
|
9203
|
+
...materializeIncluded ? ["materialize"] : [],
|
|
9204
|
+
...secondaryIncluded ? ["materialize-secondary-cnaes"] : []
|
|
9205
|
+
];
|
|
9206
|
+
steps.push(
|
|
9207
|
+
step(
|
|
9208
|
+
"analyze",
|
|
9209
|
+
"analyze.sql",
|
|
9210
|
+
analyzeDependencies.length > 0 ? analyzeDependencies : ["setup"],
|
|
9211
|
+
analyzeIncluded
|
|
9212
|
+
)
|
|
9213
|
+
);
|
|
9214
|
+
if (analyzeIncluded) {
|
|
9215
|
+
scripts["analyze.sql"] = buildStepScript(
|
|
9216
|
+
"CNPJ DB Loader PostgreSQL direct import analyze step",
|
|
9217
|
+
analyzeSql(selected),
|
|
9218
|
+
input2,
|
|
9219
|
+
true
|
|
9220
|
+
);
|
|
9221
|
+
}
|
|
9222
|
+
const orchestratorLines = [
|
|
9223
|
+
"-- CNPJ DB Loader direct PostgreSQL import orchestrator",
|
|
8764
9224
|
"-- Generated from sanitized Receita files by cnpj-db-loader postgres generate-script.",
|
|
8765
|
-
"-- This path avoids rewriting the dataset into a second CSV tree.",
|
|
8766
9225
|
"-- Execute with psql, for example:",
|
|
8767
|
-
'-- psql "postgres://postgres:postgres@localhost:5432/cnpj" -f import-postgres-direct.sql',
|
|
9226
|
+
'-- psql -d "postgres://postgres:postgres@localhost:5432/cnpj" -f import-postgres-direct.sql',
|
|
8768
9227
|
"",
|
|
8769
9228
|
"\\set ON_ERROR_STOP on",
|
|
8770
|
-
|
|
9229
|
+
echo(
|
|
9230
|
+
`Using source file encoding ${input2.sourceEncoding} for psql copy operations...`
|
|
9231
|
+
),
|
|
8771
9232
|
`set client_encoding to ${quoteSqlLiteral(input2.sourceEncoding)};`,
|
|
8772
|
-
|
|
9233
|
+
echo(
|
|
9234
|
+
`Starting CNPJ DB Loader direct PostgreSQL import using transaction mode ${input2.transactionMode}...`
|
|
9235
|
+
),
|
|
9236
|
+
"",
|
|
9237
|
+
...input2.transactionMode === "single" ? ["begin;", ""] : []
|
|
9238
|
+
];
|
|
9239
|
+
for (const name of STEP_ORDER) {
|
|
9240
|
+
const currentStep = steps.find((item) => item.name === name);
|
|
9241
|
+
if (!currentStep?.included) {
|
|
9242
|
+
continue;
|
|
9243
|
+
}
|
|
9244
|
+
orchestratorLines.push(
|
|
9245
|
+
echo(
|
|
9246
|
+
`[orchestrator] Running ${currentStep.name} (${currentStep.file})...`
|
|
9247
|
+
),
|
|
9248
|
+
`\\ir ${currentStep.file}`,
|
|
9249
|
+
echo(`[orchestrator] Completed ${currentStep.name}.`),
|
|
9250
|
+
""
|
|
9251
|
+
);
|
|
9252
|
+
}
|
|
9253
|
+
orchestratorLines.push(
|
|
9254
|
+
...input2.transactionMode === "single" ? ["commit;", ""] : [],
|
|
9255
|
+
echo("CNPJ DB Loader hybrid PostgreSQL import completed."),
|
|
9256
|
+
""
|
|
9257
|
+
);
|
|
9258
|
+
scripts["import-postgres-direct.sql"] = orchestratorLines.join("\n");
|
|
9259
|
+
return { scripts, steps };
|
|
9260
|
+
}
|
|
9261
|
+
function generatePostgresDirectImportScript(input2) {
|
|
9262
|
+
const grouped = csvFilesByDataset(input2.files);
|
|
9263
|
+
const lines = [
|
|
9264
|
+
"-- CNPJ DB Loader hybrid PostgreSQL import script",
|
|
9265
|
+
"-- Generated from PostgreSQL-ready CSV files exported by cnpj-db-loader postgres export-csv.",
|
|
9266
|
+
"-- Execute with psql, for example:",
|
|
9267
|
+
'-- psql -d "postgres://postgres:postgres@localhost:5432/cnpj" -f import-postgres-direct.sql',
|
|
9268
|
+
"",
|
|
9269
|
+
"\\set ON_ERROR_STOP on",
|
|
9270
|
+
echo("Starting CNPJ DB Loader hybrid PostgreSQL import..."),
|
|
8773
9271
|
"",
|
|
8774
9272
|
"begin;",
|
|
8775
9273
|
"",
|
|
8776
9274
|
"-- Keep the final schema and seed data managed by sql/schema.sql.",
|
|
8777
|
-
"-- This script
|
|
8778
|
-
"-- transforms values inside PostgreSQL, resets staging tables and upserts final data.",
|
|
9275
|
+
"-- This script only resets staging tables and then upserts final data.",
|
|
8779
9276
|
"truncate table staging_companies restart identity;",
|
|
8780
9277
|
"truncate table staging_establishments restart identity;",
|
|
8781
9278
|
"truncate table staging_partners restart identity;",
|
|
@@ -8783,10 +9280,10 @@ function generatePostgresSanitizedDirectImportScript(input2) {
|
|
|
8783
9280
|
""
|
|
8784
9281
|
];
|
|
8785
9282
|
for (const dataset of DOMAIN_DATASETS) {
|
|
8786
|
-
lines.push(...
|
|
9283
|
+
lines.push(...copyDomainSql(dataset, grouped[dataset] ?? []), "");
|
|
8787
9284
|
}
|
|
8788
9285
|
for (const dataset of STAGING_DATASETS) {
|
|
8789
|
-
lines.push(...
|
|
9286
|
+
lines.push(...copyStagingSql(dataset, grouped[dataset] ?? []), "");
|
|
8790
9287
|
}
|
|
8791
9288
|
lines.push(...materializationAndAnalyzeSql());
|
|
8792
9289
|
return lines.join("\n");
|
|
@@ -8797,11 +9294,13 @@ function materializationAndAnalyzeSql() {
|
|
|
8797
9294
|
"",
|
|
8798
9295
|
materializeEstablishmentsSql(),
|
|
8799
9296
|
"",
|
|
9297
|
+
materializeSecondaryCnaesSql(),
|
|
9298
|
+
"",
|
|
8800
9299
|
materializePartnersSql(),
|
|
8801
9300
|
"",
|
|
8802
9301
|
materializeSimplesSql(),
|
|
8803
9302
|
"",
|
|
8804
|
-
"
|
|
9303
|
+
echo("Refreshing planner statistics..."),
|
|
8805
9304
|
"analyze companies;",
|
|
8806
9305
|
"analyze establishments;",
|
|
8807
9306
|
"analyze establishment_secondary_cnaes;",
|
|
@@ -8816,7 +9315,7 @@ function materializationAndAnalyzeSql() {
|
|
|
8816
9315
|
"",
|
|
8817
9316
|
"commit;",
|
|
8818
9317
|
"",
|
|
8819
|
-
"
|
|
9318
|
+
echo("CNPJ DB Loader hybrid PostgreSQL import completed."),
|
|
8820
9319
|
""
|
|
8821
9320
|
];
|
|
8822
9321
|
}
|
|
@@ -9023,6 +9522,29 @@ async function exportPostgresCsvDataset(inputPath, options = {}) {
|
|
|
9023
9522
|
import { mkdir as mkdir9, stat as stat7, writeFile as writeFile6 } from "fs/promises";
|
|
9024
9523
|
import path17 from "path";
|
|
9025
9524
|
var DEFAULT_SOURCE_ENCODING = "UTF8";
|
|
9525
|
+
var DEFAULT_TRANSACTION_MODE = "single";
|
|
9526
|
+
var ALL_INCLUDE_TARGETS = [
|
|
9527
|
+
"domains",
|
|
9528
|
+
"companies",
|
|
9529
|
+
"establishments",
|
|
9530
|
+
"partners",
|
|
9531
|
+
"simples",
|
|
9532
|
+
"secondary-cnaes",
|
|
9533
|
+
"indexes",
|
|
9534
|
+
"analyze"
|
|
9535
|
+
];
|
|
9536
|
+
var INCLUDE_TARGETS_BY_DATASET = {
|
|
9537
|
+
companies: "companies",
|
|
9538
|
+
establishments: "establishments",
|
|
9539
|
+
partners: "partners",
|
|
9540
|
+
simples_options: "simples",
|
|
9541
|
+
countries: "domains",
|
|
9542
|
+
cities: "domains",
|
|
9543
|
+
partner_qualifications: "domains",
|
|
9544
|
+
legal_natures: "domains",
|
|
9545
|
+
reasons: "domains",
|
|
9546
|
+
cnaes: "domains"
|
|
9547
|
+
};
|
|
9026
9548
|
function defaultPostgresDirectOutputPath(inputPath) {
|
|
9027
9549
|
const baseName = path17.basename(inputPath);
|
|
9028
9550
|
if (baseName.toLowerCase() === "sanitized") {
|
|
@@ -9031,7 +9553,7 @@ function defaultPostgresDirectOutputPath(inputPath) {
|
|
|
9031
9553
|
return path17.join(path17.dirname(inputPath), `${baseName}-postgres-direct`);
|
|
9032
9554
|
}
|
|
9033
9555
|
function inferNextStep5(scriptPath) {
|
|
9034
|
-
return `psql "postgres://postgres:postgres@localhost:5432/cnpj" -f ${scriptPath.replace(/\\/g, "/")}`;
|
|
9556
|
+
return `psql -d "postgres://postgres:postgres@localhost:5432/cnpj" -f ${scriptPath.replace(/\\/g, "/")}`;
|
|
9035
9557
|
}
|
|
9036
9558
|
function normalizeSourceEncoding(value) {
|
|
9037
9559
|
const encoding = (value ?? DEFAULT_SOURCE_ENCODING).trim();
|
|
@@ -9042,6 +9564,41 @@ function normalizeSourceEncoding(value) {
|
|
|
9042
9564
|
}
|
|
9043
9565
|
return encoding.toUpperCase();
|
|
9044
9566
|
}
|
|
9567
|
+
function normalizeTransactionMode(value) {
|
|
9568
|
+
const mode = value ?? DEFAULT_TRANSACTION_MODE;
|
|
9569
|
+
if (!["single", "phase", "none"].includes(mode)) {
|
|
9570
|
+
throw new ValidationError(
|
|
9571
|
+
`Invalid transaction mode: ${String(value)}. Use single, phase or none.`
|
|
9572
|
+
);
|
|
9573
|
+
}
|
|
9574
|
+
return mode;
|
|
9575
|
+
}
|
|
9576
|
+
function isIncludeTarget(value) {
|
|
9577
|
+
return ALL_INCLUDE_TARGETS.includes(value);
|
|
9578
|
+
}
|
|
9579
|
+
function normalizeIncludeTargets(include, dataset) {
|
|
9580
|
+
if (include && include.length > 0) {
|
|
9581
|
+
const unique = [...new Set(include)];
|
|
9582
|
+
const invalid = unique.filter((item) => !isIncludeTarget(item));
|
|
9583
|
+
if (invalid.length > 0) {
|
|
9584
|
+
throw new ValidationError(
|
|
9585
|
+
`Invalid include target(s): ${invalid.join(", ")}. Use ${ALL_INCLUDE_TARGETS.join(", ")}.`
|
|
9586
|
+
);
|
|
9587
|
+
}
|
|
9588
|
+
return unique;
|
|
9589
|
+
}
|
|
9590
|
+
if (dataset) {
|
|
9591
|
+
const target = INCLUDE_TARGETS_BY_DATASET[dataset];
|
|
9592
|
+
if (!target) {
|
|
9593
|
+
return [];
|
|
9594
|
+
}
|
|
9595
|
+
if (target === "establishments") {
|
|
9596
|
+
return ["establishments", "secondary-cnaes", "analyze"];
|
|
9597
|
+
}
|
|
9598
|
+
return [target, "analyze"];
|
|
9599
|
+
}
|
|
9600
|
+
return [...ALL_INCLUDE_TARGETS];
|
|
9601
|
+
}
|
|
9045
9602
|
async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
9046
9603
|
if (options.dataset && !isImportDatasetType(options.dataset)) {
|
|
9047
9604
|
throw new ValidationError(`Unsupported dataset type: ${options.dataset}.`);
|
|
@@ -9057,6 +9614,10 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
|
9057
9614
|
options.outputPath ?? defaultPostgresDirectOutputPath(validatedPath)
|
|
9058
9615
|
);
|
|
9059
9616
|
const sourceEncoding = normalizeSourceEncoding(options.sourceEncoding);
|
|
9617
|
+
const transactionMode = normalizeTransactionMode(options.transactionMode);
|
|
9618
|
+
const include = normalizeIncludeTargets(options.include, options.dataset);
|
|
9619
|
+
const skipIndexes = options.skipIndexes ?? false;
|
|
9620
|
+
const skipAnalyze = options.skipAnalyze ?? false;
|
|
9060
9621
|
const inspected = await inspectFiles(validatedPath);
|
|
9061
9622
|
const recognizedFiles = inspected.entries.filter((entry) => entry.entryKind === "file").flatMap((entry) => {
|
|
9062
9623
|
if (!isImportDatasetType(entry.inferredType)) {
|
|
@@ -9084,7 +9645,11 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
|
9084
9645
|
outputPath,
|
|
9085
9646
|
totalFiles: recognizedFiles.length,
|
|
9086
9647
|
datasets,
|
|
9087
|
-
sourceEncoding
|
|
9648
|
+
sourceEncoding,
|
|
9649
|
+
transactionMode,
|
|
9650
|
+
include,
|
|
9651
|
+
skipIndexes,
|
|
9652
|
+
skipAnalyze
|
|
9088
9653
|
});
|
|
9089
9654
|
await mkdir9(outputPath, { recursive: true });
|
|
9090
9655
|
const sourceFiles = [];
|
|
@@ -9120,11 +9685,21 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
|
9120
9685
|
}
|
|
9121
9686
|
const scriptName = options.scriptName ?? "import-postgres-direct.sql";
|
|
9122
9687
|
const scriptPath = path17.join(outputPath, scriptName);
|
|
9123
|
-
const
|
|
9688
|
+
const generated = generatePostgresDirectScriptFiles({
|
|
9124
9689
|
files: sourceFiles,
|
|
9125
|
-
sourceEncoding
|
|
9690
|
+
sourceEncoding,
|
|
9691
|
+
transactionMode,
|
|
9692
|
+
include,
|
|
9693
|
+
skipIndexes,
|
|
9694
|
+
skipAnalyze
|
|
9126
9695
|
});
|
|
9127
|
-
|
|
9696
|
+
const scriptFiles = [];
|
|
9697
|
+
for (const [fileName, script] of Object.entries(generated.scripts)) {
|
|
9698
|
+
const outputFileName = fileName === "import-postgres-direct.sql" ? scriptName : fileName;
|
|
9699
|
+
const outputFilePath = path17.join(outputPath, outputFileName);
|
|
9700
|
+
await writeFile6(outputFilePath, script, "utf8");
|
|
9701
|
+
scriptFiles.push(outputFilePath);
|
|
9702
|
+
}
|
|
9128
9703
|
const manifestPath = path17.join(outputPath, "manifest.json");
|
|
9129
9704
|
const summaryDatasets = [...summariesByDataset.values()].sort(
|
|
9130
9705
|
(left, right) => IMPORT_ORDER.indexOf(left.dataset) - IMPORT_ORDER.indexOf(right.dataset)
|
|
@@ -9136,13 +9711,19 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
|
9136
9711
|
const manifest = {
|
|
9137
9712
|
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
9138
9713
|
mode: "direct-sanitized-script",
|
|
9714
|
+
transactionMode,
|
|
9715
|
+
include,
|
|
9716
|
+
skipIndexes,
|
|
9717
|
+
skipAnalyze,
|
|
9139
9718
|
inputPath: path17.resolve(inputPath),
|
|
9140
9719
|
validatedPath,
|
|
9141
9720
|
outputPath,
|
|
9142
9721
|
scriptPath,
|
|
9722
|
+
scriptFiles,
|
|
9143
9723
|
sourceEncoding,
|
|
9144
9724
|
totalFiles: sourceFiles.length,
|
|
9145
9725
|
totalBytes,
|
|
9726
|
+
steps: generated.steps,
|
|
9146
9727
|
datasets: summaryDatasets
|
|
9147
9728
|
};
|
|
9148
9729
|
await writeFile6(
|
|
@@ -9165,15 +9746,19 @@ async function generatePostgresDirectScript(inputPath, options = {}) {
|
|
|
9165
9746
|
scriptPath,
|
|
9166
9747
|
manifestPath,
|
|
9167
9748
|
sourceEncoding,
|
|
9749
|
+
transactionMode,
|
|
9168
9750
|
totalFiles: sourceFiles.length,
|
|
9169
9751
|
totalBytes,
|
|
9170
9752
|
datasets: summaryDatasets,
|
|
9753
|
+
scriptFiles,
|
|
9754
|
+
steps: generated.steps,
|
|
9171
9755
|
warnings: [
|
|
9172
9756
|
...validation.ok ? [] : validation.errors,
|
|
9173
9757
|
"This script imports sanitized Receita files directly with psql \\copy. It avoids rewriting the full dataset into a second CSV tree.",
|
|
9174
|
-
"The generated
|
|
9758
|
+
"The generated scripts expect the database schema generated by cnpj-db-loader to be applied before execution.",
|
|
9175
9759
|
"The direct PostgreSQL script now defaults to UTF8 because the sanitize command writes clean UTF-8 files.",
|
|
9176
|
-
"Use --source-encoding WIN1252 or LATIN1 only when generating scripts for legacy sanitized files produced by older loader versions."
|
|
9760
|
+
"Use --source-encoding WIN1252 or LATIN1 only when generating scripts for legacy sanitized files produced by older loader versions.",
|
|
9761
|
+
"The generated import is now modular. Use import-postgres-direct.sql as the orchestrator or run individual phase scripts manually."
|
|
9177
9762
|
],
|
|
9178
9763
|
nextStep: inferNextStep5(scriptPath)
|
|
9179
9764
|
};
|
|
@@ -9409,6 +9994,28 @@ function printDatabaseConfigSummary(config, logFilePath) {
|
|
|
9409
9994
|
);
|
|
9410
9995
|
console.log(`${theme.muted("Log file:")} ${resolveLogFilePath(logFilePath)}`);
|
|
9411
9996
|
}
|
|
9997
|
+
function printFederalRevenueConfigSummary(config, logFilePath) {
|
|
9998
|
+
console.log(
|
|
9999
|
+
theme.successLabel("FEDERAL REVENUE"),
|
|
10000
|
+
"Federal Revenue configuration loaded."
|
|
10001
|
+
);
|
|
10002
|
+
console.log(
|
|
10003
|
+
formatKeyValue(
|
|
10004
|
+
"WebDAV URL",
|
|
10005
|
+
`${config.webdavUrl}${config.configured.webdavUrl ? "" : " (default)"}`
|
|
10006
|
+
)
|
|
10007
|
+
);
|
|
10008
|
+
console.log(
|
|
10009
|
+
formatKeyValue(
|
|
10010
|
+
"User agent",
|
|
10011
|
+
`${config.userAgent}${config.configured.userAgent ? "" : " (default)"}`
|
|
10012
|
+
)
|
|
10013
|
+
);
|
|
10014
|
+
console.log(
|
|
10015
|
+
formatKeyValue("Share token", config.shareToken ?? "not configured")
|
|
10016
|
+
);
|
|
10017
|
+
console.log(`${theme.muted("Log file:")} ${resolveLogFilePath(logFilePath)}`);
|
|
10018
|
+
}
|
|
9412
10019
|
function printDatabaseCleanupSummary(summary, logFilePath) {
|
|
9413
10020
|
console.log(
|
|
9414
10021
|
theme.successLabel("DATABASE"),
|
|
@@ -9821,6 +10428,16 @@ function printPostgresDirectScriptSummary(summary, logFilePath) {
|
|
|
9821
10428
|
console.log(formatKeyValue("Generated script", summary.scriptPath));
|
|
9822
10429
|
console.log(formatKeyValue("Manifest", summary.manifestPath));
|
|
9823
10430
|
console.log(formatKeyValue("Source encoding", summary.sourceEncoding));
|
|
10431
|
+
console.log(formatKeyValue("Transaction mode", summary.transactionMode));
|
|
10432
|
+
console.log(
|
|
10433
|
+
formatKeyValue("Generated SQL files", summary.scriptFiles.length)
|
|
10434
|
+
);
|
|
10435
|
+
console.log(
|
|
10436
|
+
formatKeyValue(
|
|
10437
|
+
"Included steps",
|
|
10438
|
+
summary.steps.filter((step2) => step2.included).map((step2) => step2.name).join(", ")
|
|
10439
|
+
)
|
|
10440
|
+
);
|
|
9824
10441
|
console.log(formatKeyValue("Source files", summary.totalFiles));
|
|
9825
10442
|
console.log(formatKeyValue("Source bytes", formatBytes(summary.totalBytes)));
|
|
9826
10443
|
if (summary.datasets.length > 0) {
|
|
@@ -10511,6 +11128,14 @@ function createPostgresDirectScriptProgressReporter() {
|
|
|
10511
11128
|
console.log(formatKeyValue("Validated path", event.validatedPath));
|
|
10512
11129
|
console.log(formatKeyValue("Output path", event.outputPath));
|
|
10513
11130
|
console.log(formatKeyValue("Source encoding", event.sourceEncoding));
|
|
11131
|
+
console.log(formatKeyValue("Transaction mode", event.transactionMode));
|
|
11132
|
+
console.log(formatKeyValue("Included steps", event.include.join(", ")));
|
|
11133
|
+
console.log(
|
|
11134
|
+
formatKeyValue("Skip indexes", event.skipIndexes ? "yes" : "no")
|
|
11135
|
+
);
|
|
11136
|
+
console.log(
|
|
11137
|
+
formatKeyValue("Skip analyze", event.skipAnalyze ? "yes" : "no")
|
|
11138
|
+
);
|
|
10514
11139
|
console.log(formatKeyValue("Files queued", event.totalFiles));
|
|
10515
11140
|
return;
|
|
10516
11141
|
}
|
|
@@ -10916,8 +11541,21 @@ function applySharedOptions(options, target) {
|
|
|
10916
11541
|
if (options.shareToken) {
|
|
10917
11542
|
target.shareToken = options.shareToken;
|
|
10918
11543
|
}
|
|
11544
|
+
if (options.userAgent) {
|
|
11545
|
+
target.userAgent = options.userAgent;
|
|
11546
|
+
}
|
|
10919
11547
|
return target;
|
|
10920
11548
|
}
|
|
11549
|
+
async function resolveSharedOptions(referenceArgument, options) {
|
|
11550
|
+
const mergedOptions = mergeSharedOptions(referenceArgument, options);
|
|
11551
|
+
const clientOptions = await resolveFederalRevenueClientOptions(mergedOptions);
|
|
11552
|
+
return {
|
|
11553
|
+
...mergedOptions,
|
|
11554
|
+
...clientOptions.baseUrl ? { baseUrl: clientOptions.baseUrl } : {},
|
|
11555
|
+
...clientOptions.shareToken ? { shareToken: clientOptions.shareToken } : {},
|
|
11556
|
+
...clientOptions.userAgent ? { userAgent: clientOptions.userAgent } : {}
|
|
11557
|
+
};
|
|
11558
|
+
}
|
|
10921
11559
|
function buildDownloadOptions(options) {
|
|
10922
11560
|
const downloadOptions = applySharedOptions(
|
|
10923
11561
|
options,
|
|
@@ -11002,6 +11640,9 @@ function registerSharedOptions(command) {
|
|
|
11002
11640
|
).option(
|
|
11003
11641
|
"--share-token <token>",
|
|
11004
11642
|
"Override the public Federal Revenue share token."
|
|
11643
|
+
).option(
|
|
11644
|
+
"--user-agent <value>",
|
|
11645
|
+
"Override the Federal Revenue HTTP user agent."
|
|
11005
11646
|
);
|
|
11006
11647
|
}
|
|
11007
11648
|
function registerDownloadOptions(command) {
|
|
@@ -11036,6 +11677,70 @@ function registerFederalRevenueCommands(program) {
|
|
|
11036
11677
|
const federalRevenue = program.command("federal-revenue").alias("revenue").description(
|
|
11037
11678
|
"Check, download, sync, and maintain CNPJ monthly files from the Federal Revenue public share."
|
|
11038
11679
|
);
|
|
11680
|
+
const config = federalRevenue.command("config").description(
|
|
11681
|
+
"Read, persist, test, or reset Federal Revenue public share settings."
|
|
11682
|
+
);
|
|
11683
|
+
config.command("set").argument(
|
|
11684
|
+
"<key>",
|
|
11685
|
+
"Configuration key: share-token, webdav-url, or user-agent."
|
|
11686
|
+
).argument("<value>", "Configuration value to persist.").description(
|
|
11687
|
+
"Persist a Federal Revenue setting in the local CNPJ DB Loader config file."
|
|
11688
|
+
).action(async (key, value) => {
|
|
11689
|
+
const effectiveConfig = await setFederalRevenueConfigValue(key, value);
|
|
11690
|
+
const logFilePath = await writeCommandLog("federal-revenue-config-set", {
|
|
11691
|
+
key,
|
|
11692
|
+
effectiveConfig
|
|
11693
|
+
});
|
|
11694
|
+
printFederalRevenueConfigSummary(effectiveConfig, logFilePath);
|
|
11695
|
+
});
|
|
11696
|
+
config.command("show").description("Show the currently persisted Federal Revenue configuration.").action(async () => {
|
|
11697
|
+
const effectiveConfig = await readFederalRevenueEffectiveConfig();
|
|
11698
|
+
const logFilePath = await writeCommandLog(
|
|
11699
|
+
"federal-revenue-config-show",
|
|
11700
|
+
effectiveConfig
|
|
11701
|
+
);
|
|
11702
|
+
printFederalRevenueConfigSummary(effectiveConfig, logFilePath);
|
|
11703
|
+
});
|
|
11704
|
+
config.command("test").description("Test the configured Federal Revenue WebDAV connection.").action(async () => {
|
|
11705
|
+
const clientOptions = await resolveFederalRevenueClientOptions();
|
|
11706
|
+
const result = await listFederalRevenueReferences(clientOptions);
|
|
11707
|
+
const references = result.references.map((item) => item.reference);
|
|
11708
|
+
const latestReference = references.at(-1) ?? "not found";
|
|
11709
|
+
const logFilePath = await writeCommandLog("federal-revenue-config-test", {
|
|
11710
|
+
remoteBaseUrl: result.remoteBaseUrl,
|
|
11711
|
+
referencesFound: references.length,
|
|
11712
|
+
latestReference
|
|
11713
|
+
});
|
|
11714
|
+
printFederalRevenueConfigSummary(
|
|
11715
|
+
await readFederalRevenueEffectiveConfig(),
|
|
11716
|
+
logFilePath
|
|
11717
|
+
);
|
|
11718
|
+
console.log(
|
|
11719
|
+
`Federal Revenue WebDAV connection succeeded. References found: ${references.length}. Latest reference: ${latestReference}.`
|
|
11720
|
+
);
|
|
11721
|
+
});
|
|
11722
|
+
config.command("reset").argument(
|
|
11723
|
+
"[key]",
|
|
11724
|
+
"Optional key to reset: share-token, webdav-url, or user-agent. When omitted, all Federal Revenue settings are reset."
|
|
11725
|
+
).option("-f, --force", "Skip the confirmation prompt.").description(
|
|
11726
|
+
"Reset one Federal Revenue setting or all persisted Federal Revenue settings."
|
|
11727
|
+
).action(async (key, options) => {
|
|
11728
|
+
const target = key ? `Federal Revenue ${key}` : "all Federal Revenue";
|
|
11729
|
+
const confirmed = await confirmFederalRevenueAction(
|
|
11730
|
+
`Reset ${target} configuration?`,
|
|
11731
|
+
options.force
|
|
11732
|
+
);
|
|
11733
|
+
if (!confirmed) {
|
|
11734
|
+
console.log("Federal Revenue config reset cancelled.");
|
|
11735
|
+
return;
|
|
11736
|
+
}
|
|
11737
|
+
const effectiveConfig = await resetFederalRevenueConfig(key);
|
|
11738
|
+
const logFilePath = await writeCommandLog(
|
|
11739
|
+
"federal-revenue-config-reset",
|
|
11740
|
+
{ key: key ?? "all", effectiveConfig }
|
|
11741
|
+
);
|
|
11742
|
+
printFederalRevenueConfigSummary(effectiveConfig, logFilePath);
|
|
11743
|
+
});
|
|
11039
11744
|
registerSharedOptions(
|
|
11040
11745
|
federalRevenue.command("check").argument(
|
|
11041
11746
|
"[reference]",
|
|
@@ -11045,7 +11750,10 @@ function registerFederalRevenueCommands(program) {
|
|
|
11045
11750
|
)
|
|
11046
11751
|
).action(
|
|
11047
11752
|
async (referenceArgument, options) => {
|
|
11048
|
-
const resolvedOptions =
|
|
11753
|
+
const resolvedOptions = await resolveSharedOptions(
|
|
11754
|
+
referenceArgument,
|
|
11755
|
+
options
|
|
11756
|
+
);
|
|
11049
11757
|
const summary = await checkFederalRevenueDataset(
|
|
11050
11758
|
applySharedOptions(resolvedOptions, {})
|
|
11051
11759
|
);
|
|
@@ -11065,7 +11773,10 @@ function registerFederalRevenueCommands(program) {
|
|
|
11065
11773
|
)
|
|
11066
11774
|
).action(
|
|
11067
11775
|
async (referenceArgument, options) => {
|
|
11068
|
-
const resolvedOptions =
|
|
11776
|
+
const resolvedOptions = await resolveSharedOptions(
|
|
11777
|
+
referenceArgument,
|
|
11778
|
+
options
|
|
11779
|
+
);
|
|
11069
11780
|
const confirmed = await confirmFederalRevenueAction(
|
|
11070
11781
|
"Download Federal Revenue CNPJ ZIP files now? Existing completed files are skipped unless --overwrite is used.",
|
|
11071
11782
|
options.force
|
|
@@ -11098,7 +11809,10 @@ function registerFederalRevenueCommands(program) {
|
|
|
11098
11809
|
)
|
|
11099
11810
|
).action(
|
|
11100
11811
|
async (referenceArgument, options) => {
|
|
11101
|
-
const resolvedOptions =
|
|
11812
|
+
const resolvedOptions = await resolveSharedOptions(
|
|
11813
|
+
referenceArgument,
|
|
11814
|
+
options
|
|
11815
|
+
);
|
|
11102
11816
|
const summary = await getFederalRevenueStatus(
|
|
11103
11817
|
buildStatusOptions({ ...options, ...resolvedOptions })
|
|
11104
11818
|
);
|
|
@@ -11121,7 +11835,10 @@ function registerFederalRevenueCommands(program) {
|
|
|
11121
11835
|
)
|
|
11122
11836
|
).action(
|
|
11123
11837
|
async (referenceArgument, options) => {
|
|
11124
|
-
const resolvedOptions =
|
|
11838
|
+
const resolvedOptions = await resolveSharedOptions(
|
|
11839
|
+
referenceArgument,
|
|
11840
|
+
options
|
|
11841
|
+
);
|
|
11125
11842
|
const confirmed = await confirmFederalRevenueAction(
|
|
11126
11843
|
"Retry incomplete Federal Revenue files now? Completed files are kept.",
|
|
11127
11844
|
options.force
|
|
@@ -11154,7 +11871,10 @@ function registerFederalRevenueCommands(program) {
|
|
|
11154
11871
|
)
|
|
11155
11872
|
).action(
|
|
11156
11873
|
async (referenceArgument, options) => {
|
|
11157
|
-
const resolvedOptions =
|
|
11874
|
+
const resolvedOptions = await resolveSharedOptions(
|
|
11875
|
+
referenceArgument,
|
|
11876
|
+
options
|
|
11877
|
+
);
|
|
11158
11878
|
const actionLabel = options.all ? "remove the entire selected Federal Revenue reference folder" : options.failed ? "remove failed and partial Federal Revenue files" : "remove Federal Revenue .part files";
|
|
11159
11879
|
const confirmed = await confirmFederalRevenueAction(
|
|
11160
11880
|
`This will ${actionLabel}. Continue?`,
|
|
@@ -11209,7 +11929,10 @@ function registerFederalRevenueCommands(program) {
|
|
|
11209
11929
|
)
|
|
11210
11930
|
).action(
|
|
11211
11931
|
async (referenceArgument, options) => {
|
|
11212
|
-
const resolvedOptions =
|
|
11932
|
+
const resolvedOptions = await resolveSharedOptions(
|
|
11933
|
+
referenceArgument,
|
|
11934
|
+
options
|
|
11935
|
+
);
|
|
11213
11936
|
const confirmed = await confirmFederalRevenueAction(
|
|
11214
11937
|
"Run the full Federal Revenue sync now? This downloads files, extracts archives, sanitizes the dataset, and imports it into PostgreSQL.",
|
|
11215
11938
|
options.force
|
|
@@ -11438,7 +12161,13 @@ function registerPostgresCommands(program) {
|
|
|
11438
12161
|
).option(
|
|
11439
12162
|
"--source-encoding <encoding>",
|
|
11440
12163
|
"PostgreSQL client encoding used while reading sanitized Receita files. Defaults to UTF8."
|
|
11441
|
-
).option(
|
|
12164
|
+
).option(
|
|
12165
|
+
"--transaction-mode <mode>",
|
|
12166
|
+
"Transaction mode for generated scripts: single, phase or none. Defaults to single."
|
|
12167
|
+
).option(
|
|
12168
|
+
"--include <items>",
|
|
12169
|
+
"Comma-separated steps to include: domains,companies,establishments,partners,simples,secondary-cnaes,indexes,analyze."
|
|
12170
|
+
).option("--skip-indexes", "Do not generate the indexes step.").option("--skip-analyze", "Do not generate the analyze step.").option("-f, --force", "Skip the confirmation prompt.").description(
|
|
11442
12171
|
"Generate a direct psql import script that loads sanitized Receita files without rewriting them into new CSV files."
|
|
11443
12172
|
).action(
|
|
11444
12173
|
async (input2, options) => {
|
|
@@ -11467,6 +12196,18 @@ function registerPostgresCommands(program) {
|
|
|
11467
12196
|
if (options.sourceEncoding) {
|
|
11468
12197
|
generateOptions.sourceEncoding = options.sourceEncoding;
|
|
11469
12198
|
}
|
|
12199
|
+
if (options.transactionMode) {
|
|
12200
|
+
generateOptions.transactionMode = options.transactionMode;
|
|
12201
|
+
}
|
|
12202
|
+
if (options.include) {
|
|
12203
|
+
generateOptions.include = options.include.split(",").map((item) => item.trim()).filter(Boolean);
|
|
12204
|
+
}
|
|
12205
|
+
if (options.skipIndexes) {
|
|
12206
|
+
generateOptions.skipIndexes = true;
|
|
12207
|
+
}
|
|
12208
|
+
if (options.skipAnalyze) {
|
|
12209
|
+
generateOptions.skipAnalyze = true;
|
|
12210
|
+
}
|
|
11470
12211
|
const summary = await generatePostgresDirectScript(
|
|
11471
12212
|
input2,
|
|
11472
12213
|
generateOptions
|