@danielarndt0/cnpj-db-loader 2.4.0-beta.1 → 2.4.0-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -8
- package/dist/cli.js +856 -137
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +33 -1
- package/dist/index.js +782 -133
- package/dist/index.js.map +1 -1
- package/docs/architecture.md +1 -1
- package/docs/cli.md +1 -1
- package/docs/commands.md +6 -2
- package/docs/postgres-direct.md +239 -45
- package/docs/releases/v2.4.0-beta.3.md +42 -0
- package/docs/sanitize.md +52 -16
- package/package.json +3 -3
- package/docs/releases/v2.4.0.md +0 -40
package/README.md
CHANGED
|
@@ -10,7 +10,7 @@ This version focuses on the real loading workflow:
|
|
|
10
10
|
- check, download, retry, clean, and inspect the latest Federal Revenue CNPJ monthly ZIP archives from the public share
|
|
11
11
|
- extract Receita Federal ZIP archives
|
|
12
12
|
- validate an extracted tree
|
|
13
|
-
- sanitize validated files before import
|
|
13
|
+
- sanitize validated files into clean UTF-8 before import, removing NUL bytes, invalid bytes and problematic control characters
|
|
14
14
|
- print or generate final, staging, or combined SQL schemas
|
|
15
15
|
- configure and test the default PostgreSQL URL
|
|
16
16
|
- import validated dataset files into PostgreSQL with:
|
|
@@ -51,8 +51,8 @@ cnpj-db-loader schema generate --profile full
|
|
|
51
51
|
cnpj-db-loader import ./downloads/<reference>/sanitized --load-batch-size 500 --materialize-batch-size 50000 --verbose-progress
|
|
52
52
|
|
|
53
53
|
# Optional hybrid path for PostgreSQL direct loading
|
|
54
|
-
cnpj-db-loader postgres generate-script ./downloads/<reference>/sanitized --output ./downloads/<reference>/postgres-direct --force
|
|
55
|
-
psql "postgres://postgres:postgres@localhost:5432/cnpj" -f ./downloads/<reference>/postgres-direct/import-postgres-direct.sql
|
|
54
|
+
cnpj-db-loader postgres generate-script ./downloads/<reference>/sanitized --output ./downloads/<reference>/postgres-direct --source-encoding UTF8 --transaction-mode phase --force
|
|
55
|
+
psql -d "postgres://postgres:postgres@localhost:5432/cnpj" -f ./downloads/<reference>/postgres-direct/import-postgres-direct.sql
|
|
56
56
|
```
|
|
57
57
|
|
|
58
58
|
## Stable commands
|
|
@@ -67,7 +67,7 @@ cnpj-db-loader federal-revenue sync [reference] [--reference <yyyy-mm>] [--curre
|
|
|
67
67
|
cnpj-db-loader inspect <input>
|
|
68
68
|
cnpj-db-loader extract <input> [--output <path>]
|
|
69
69
|
cnpj-db-loader validate <input>
|
|
70
|
-
cnpj-db-loader sanitize <input> [--output <path>] [--dataset <name>] [-f]
|
|
70
|
+
cnpj-db-loader sanitize <input> [--output <path>] [--dataset <name>] [--source-encoding <encoding>] [-f]
|
|
71
71
|
cnpj-db-loader schema print [--profile <profile>]
|
|
72
72
|
cnpj-db-loader schema generate [--name <name>] [--output <path>] [--profile <profile>]
|
|
73
73
|
cnpj-db-loader database config set <url>
|
|
@@ -78,7 +78,7 @@ cnpj-db-loader database cleanup staging [--db-url <url>] [--dataset <name>] [--v
|
|
|
78
78
|
cnpj-db-loader database cleanup materialized [--db-url <url>] [--dataset <name>] [--force]
|
|
79
79
|
cnpj-db-loader database cleanup checkpoints [--db-url <url>] [--phase <phase>] [--dataset <name>] [--validated-path <path>] [--plan-id <id>] [--force]
|
|
80
80
|
cnpj-db-loader database cleanup plans [--db-url <url>] [--validated-path <path>] [--plan-id <id>] [--force]
|
|
81
|
-
cnpj-db-loader postgres generate-script <input> [--output <path>] [--dataset <name>] [--script-name <name>] [--source-encoding <encoding>] [-f]
|
|
81
|
+
cnpj-db-loader postgres generate-script <input> [--output <path>] [--dataset <name>] [--script-name <name>] [--source-encoding <encoding>] [--transaction-mode <mode>] [--include <items>] [--skip-indexes] [--skip-analyze] [-f]
|
|
82
82
|
cnpj-db-loader postgres export-csv <input> [--output <path>] [--dataset <name>] [--script-name <name>] [-f]
|
|
83
83
|
cnpj-db-loader import <input> [--db-url <url>] [--dataset <name>] [--load-batch-size <size>] [--materialize-batch-size <size>] [--verbose-progress] [-f]
|
|
84
84
|
cnpj-db-loader import load <input> [--db-url <url>] [--dataset <name>] [--load-batch-size <size>] [--verbose-progress] [-f]
|
|
@@ -95,11 +95,11 @@ For local benchmarks or controlled full loads, the CLI can now generate a direct
|
|
|
95
95
|
|
|
96
96
|
```bash
|
|
97
97
|
cnpj-db-loader sanitize ./downloads/<reference>/extracted
|
|
98
|
-
cnpj-db-loader postgres generate-script ./downloads/<reference>/sanitized --output ./downloads/<reference>/postgres-direct --force
|
|
99
|
-
psql "postgres://postgres:postgres@localhost:5432/cnpj" -f ./downloads/<reference>/postgres-direct/import-postgres-direct.sql
|
|
98
|
+
cnpj-db-loader postgres generate-script ./downloads/<reference>/sanitized --output ./downloads/<reference>/postgres-direct --source-encoding UTF8 --transaction-mode phase --force
|
|
99
|
+
psql -d "postgres://postgres:postgres@localhost:5432/cnpj" -f ./downloads/<reference>/postgres-direct/import-postgres-direct.sql
|
|
100
100
|
```
|
|
101
101
|
|
|
102
|
-
This path keeps download, extraction, validation and sanitization inside the loader, then lets PostgreSQL load the sanitized Receita files directly through `\copy`, convert values into staging tables and materialize the final tables with set-based SQL. The standard `import` command remains the safest path when checkpoint resume and quarantine recovery are required.
|
|
102
|
+
This path keeps download, extraction, validation and robust UTF-8 sanitization inside the loader, then lets PostgreSQL load the sanitized Receita files directly through `\copy`, convert values into staging tables and materialize the final tables with set-based SQL. The standard `import` command remains the safest path when checkpoint resume and quarantine recovery are required.
|
|
103
103
|
|
|
104
104
|
## Logs
|
|
105
105
|
|