@danielarndt0/cnpj-db-loader 2.4.0-beta.2 → 2.4.0-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -6
- package/dist/cli.js +1037 -296
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +337 -297
- package/dist/index.js +879 -290
- package/dist/index.js.map +1 -1
- package/docs/commands.md +11 -1
- package/docs/federal-revenue.md +36 -2
- package/docs/postgres-direct.md +235 -41
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -7,7 +7,7 @@ CNPJ DB Loader is a practical CLI for preparing Brazilian Federal Revenue CNPJ d
|
|
|
7
7
|
This version focuses on the real loading workflow:
|
|
8
8
|
|
|
9
9
|
- inspect a downloaded directory
|
|
10
|
-
- check, download, retry, clean, and inspect the latest Federal Revenue CNPJ monthly ZIP archives from the public share
|
|
10
|
+
- configure, check, download, retry, clean, and inspect the latest Federal Revenue CNPJ monthly ZIP archives from the public share
|
|
11
11
|
- extract Receita Federal ZIP archives
|
|
12
12
|
- validate an extracted tree
|
|
13
13
|
- sanitize validated files into clean UTF-8 before import, removing NUL bytes, invalid bytes and problematic control characters
|
|
@@ -39,6 +39,8 @@ npm run cli -- --help
|
|
|
39
39
|
## Quick start
|
|
40
40
|
|
|
41
41
|
```bash
|
|
42
|
+
cnpj-db-loader federal-revenue config set share-token "<public-share-token>"
|
|
43
|
+
cnpj-db-loader federal-revenue config test
|
|
42
44
|
cnpj-db-loader federal-revenue check
|
|
43
45
|
cnpj-db-loader federal-revenue download --output ./downloads
|
|
44
46
|
cnpj-db-loader federal-revenue status --output ./downloads
|
|
@@ -51,13 +53,15 @@ cnpj-db-loader schema generate --profile full
|
|
|
51
53
|
cnpj-db-loader import ./downloads/<reference>/sanitized --load-batch-size 500 --materialize-batch-size 50000 --verbose-progress
|
|
52
54
|
|
|
53
55
|
# Optional hybrid path for PostgreSQL direct loading
|
|
54
|
-
cnpj-db-loader postgres generate-script ./downloads/<reference>/sanitized --output ./downloads/<reference>/postgres-direct --source-encoding UTF8 --force
|
|
55
|
-
psql "postgres://postgres:postgres@localhost:5432/cnpj" -f ./downloads/<reference>/postgres-direct/import-postgres-direct.sql
|
|
56
|
+
cnpj-db-loader postgres generate-script ./downloads/<reference>/sanitized --output ./downloads/<reference>/postgres-direct --source-encoding UTF8 --transaction-mode phase --force
|
|
57
|
+
psql -d "postgres://postgres:postgres@localhost:5432/cnpj" -f ./downloads/<reference>/postgres-direct/import-postgres-direct.sql
|
|
56
58
|
```
|
|
57
59
|
|
|
58
60
|
## Stable commands
|
|
59
61
|
|
|
60
62
|
```bash
|
|
63
|
+
cnpj-db-loader federal-revenue config set share-token "<public-share-token>"
|
|
64
|
+
cnpj-db-loader federal-revenue config test
|
|
61
65
|
cnpj-db-loader federal-revenue check [reference] [--reference <yyyy-mm>] [--current]
|
|
62
66
|
cnpj-db-loader federal-revenue download [reference] [--reference <yyyy-mm>] [--current] [--output <path>] [--retries <number>] [--overwrite] [-f]
|
|
63
67
|
cnpj-db-loader federal-revenue status [reference] [--reference <yyyy-mm>] [--current] [--output <path>]
|
|
@@ -78,7 +82,7 @@ cnpj-db-loader database cleanup staging [--db-url <url>] [--dataset <name>] [--v
|
|
|
78
82
|
cnpj-db-loader database cleanup materialized [--db-url <url>] [--dataset <name>] [--force]
|
|
79
83
|
cnpj-db-loader database cleanup checkpoints [--db-url <url>] [--phase <phase>] [--dataset <name>] [--validated-path <path>] [--plan-id <id>] [--force]
|
|
80
84
|
cnpj-db-loader database cleanup plans [--db-url <url>] [--validated-path <path>] [--plan-id <id>] [--force]
|
|
81
|
-
cnpj-db-loader postgres generate-script <input> [--output <path>] [--dataset <name>] [--script-name <name>] [--source-encoding <encoding>] [-f]
|
|
85
|
+
cnpj-db-loader postgres generate-script <input> [--output <path>] [--dataset <name>] [--script-name <name>] [--source-encoding <encoding>] [--transaction-mode <mode>] [--include <items>] [--skip-indexes] [--skip-analyze] [-f]
|
|
82
86
|
cnpj-db-loader postgres export-csv <input> [--output <path>] [--dataset <name>] [--script-name <name>] [-f]
|
|
83
87
|
cnpj-db-loader import <input> [--db-url <url>] [--dataset <name>] [--load-batch-size <size>] [--materialize-batch-size <size>] [--verbose-progress] [-f]
|
|
84
88
|
cnpj-db-loader import load <input> [--db-url <url>] [--dataset <name>] [--load-batch-size <size>] [--verbose-progress] [-f]
|
|
@@ -95,8 +99,8 @@ For local benchmarks or controlled full loads, the CLI can now generate a direct
|
|
|
95
99
|
|
|
96
100
|
```bash
|
|
97
101
|
cnpj-db-loader sanitize ./downloads/<reference>/extracted
|
|
98
|
-
cnpj-db-loader postgres generate-script ./downloads/<reference>/sanitized --output ./downloads/<reference>/postgres-direct --source-encoding UTF8 --force
|
|
99
|
-
psql "postgres://postgres:postgres@localhost:5432/cnpj" -f ./downloads/<reference>/postgres-direct/import-postgres-direct.sql
|
|
102
|
+
cnpj-db-loader postgres generate-script ./downloads/<reference>/sanitized --output ./downloads/<reference>/postgres-direct --source-encoding UTF8 --transaction-mode phase --force
|
|
103
|
+
psql -d "postgres://postgres:postgres@localhost:5432/cnpj" -f ./downloads/<reference>/postgres-direct/import-postgres-direct.sql
|
|
100
104
|
```
|
|
101
105
|
|
|
102
106
|
This path keeps download, extraction, validation and robust UTF-8 sanitization inside the loader, then lets PostgreSQL load the sanitized Receita files directly through `\copy`, convert values into staging tables and materialize the final tables with set-based SQL. The standard `import` command remains the safest path when checkpoint resume and quarantine recovery are required.
|