dumpling-cli 0.7.0a0__tar.gz → 0.7.0b0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/AGENTS.md +3 -0
  2. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/CHANGELOG.md +15 -1
  3. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/Cargo.lock +132 -3
  4. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/Cargo.toml +5 -1
  5. dumpling_cli-0.7.0b0/LICENSE +21 -0
  6. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/PKG-INFO +17 -9
  7. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/README.md +13 -8
  8. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/docs/src/configuration.md +41 -9
  9. dumpling_cli-0.7.0b0/docs/src/getting-started.md +63 -0
  10. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/docs/src/index.md +2 -2
  11. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/pyproject.toml +4 -1
  12. dumpling_cli-0.7.0b0/src/compressed_input.rs +340 -0
  13. dumpling_cli-0.7.0b0/src/dump_input_detect.rs +171 -0
  14. dumpling_cli-0.7.0b0/src/dump_input_resolve.rs +224 -0
  15. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/src/filter.rs +12 -0
  16. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/src/lint.rs +2 -0
  17. dumpling_cli-0.7.0b0/src/log_sanitize.rs +29 -0
  18. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/src/main.rs +131 -178
  19. dumpling_cli-0.7.0b0/src/pg_restore_decode.rs +221 -0
  20. dumpling_cli-0.7.0b0/src/scaffold.rs +255 -0
  21. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/src/seal.rs +14 -1
  22. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/src/settings.rs +62 -2
  23. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/src/sql.rs +52 -0
  24. dumpling_cli-0.7.0a0/docs/src/getting-started.md +0 -45
  25. dumpling_cli-0.7.0a0/src/scaffold.rs +0 -280
  26. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/.dumplingconf.example +0 -0
  27. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/.github/workflows/ci.yml +0 -0
  28. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/.github/workflows/docs-pr.yml +0 -0
  29. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/.github/workflows/docs.yml +0 -0
  30. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/.github/workflows/platform-compat-latest.yml +0 -0
  31. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/.github/workflows/platform-compat-matrix.yml +0 -0
  32. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/.github/workflows/policy-lint.yml +0 -0
  33. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/.github/workflows/publish.yml +0 -0
  34. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/.github/workflows/release.yml +0 -0
  35. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/.github/workflows/tests.yml +0 -0
  36. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/.gitignore +0 -0
  37. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/CONTRIBUTING.md +0 -0
  38. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/CONTRIBUTORS.md +0 -0
  39. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/MAINTENANCE.md +0 -0
  40. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/assets/logo.svg +0 -0
  41. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/book.toml +0 -0
  42. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/datetime_out.sql +0 -0
  43. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/datetime_sample.sql +0 -0
  44. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/docs/src/SUMMARY.md +0 -0
  45. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/docs/src/ci-guardrails.md +0 -0
  46. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/docs/src/releasing.md +0 -0
  47. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/rust-toolchain.toml +0 -0
  48. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/scripts/setup-dev.sh +0 -0
  49. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/src/faker_dispatch.rs +0 -0
  50. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/src/report.rs +0 -0
  51. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/src/scan.rs +0 -0
  52. {dumpling_cli-0.7.0a0 → dumpling_cli-0.7.0b0}/src/transform.rs +0 -0
@@ -28,6 +28,9 @@ src/
28
28
  filter.rs — Row-filter predicate evaluation (eq/neq/like/regex/JSON-path/…)
29
29
  scan.rs — Post-transform residual PII scanner (email/SSN/PAN/token regex)
30
30
  report.rs — JSON report data structures and Reporter helper
31
+ compressed_input.rs — gzip/ZIP wrappers; streaming vs temp materialization
32
+ dump_input_resolve.rs — shared `--input` file resolution for anonymize + scaffold-config
33
+ dump_input_detect.rs — PGDMP / directory dumps / MSSQL sniff helpers
31
34
  docs/src/ — mdBook documentation source
32
35
  .github/ — CI/CD GitHub Actions workflows
33
36
  Cargo.toml — Rust package manifest
@@ -7,9 +7,21 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.7.0-beta] - 2026-05-07
11
+
12
+ Second **0.7.x** prerelease toward stable **0.7.0**.
13
+
14
+ ### Added
15
+
16
+ - **Gzip and ZIP inputs**: plain-SQL payloads inside **gzip** are decompressed **in-process** (streamed) when possible—no temporary file. Dumpling still materializes to the temp directory when required: **ZIP** archives (random-access central directory), **gzip wrapping `PGDMP`** or an inner **ZIP** (nested wrappers), or other cases where a filesystem path is needed for `pg_restore`. Temporary files are registered for removal when processing finishes. **`--in-place` is rejected** only when Dumpling had to write a **temporary** decompressed/extracted file (not when gzip plain-SQL streaming was used). Full multi-file ZIP packages (for example BACPAC) are still not supported as SQL input.
17
+
18
+ ### Changed
19
+
20
+ - **CLI**: removed **`--dump-decode`**. PostgreSQL **custom-format** (`PGDMP`) and **directory-format** (`toc.dat`) inputs are auto-detected when `--format postgres` (default) and decoded with `pg_restore -f -`. Options renamed: **`--pg-restore-arg`** (repeatable, was `--dump-decode-arg`), **`--keep-original`** (was `--dump-decode-keep-input` / `--pg-restore-keep-input`). **`--keep-original` is incompatible with `--in-place`** (use `--output` or stdout). Optional `[pg_restore]` table in config for default path/args.
21
+
10
22
  ## [0.7.0-alpha] - 2026-05-04
11
23
 
12
- Pre-release toward **0.7.0** (stable **0.7.0** is not published yet; crates use the **0.7.0-alpha** prerelease identifier until then).
24
+ First **0.7.x** prerelease toward stable **0.7.0** (superseded by **0.7.0-beta** for ongoing development builds).
13
25
 
14
26
  ### Removed
15
27
 
@@ -113,6 +125,8 @@ Pre-release toward **0.7.0** (stable **0.7.0** is not published yet; crates use
113
125
  - Configurable output scan severities and per-category thresholds via `[output_scan]`.
114
126
  - JSON report section for output scan findings including category, count, threshold, severity, and sample locations.
115
127
 
128
+ [0.7.0-beta]: https://github.com/ababic/dumpling/compare/v0.7.0-alpha...v0.7.0-beta
129
+ [0.7.0-alpha]: https://github.com/ababic/dumpling/compare/v0.6.0...v0.7.0-alpha
116
130
  [0.6.0]: https://github.com/ababic/dumpling/compare/v0.5.0...v0.6.0
117
131
  [0.5.0]: https://github.com/ababic/dumpling/compare/v0.4.3...v0.5.0
118
132
  [0.4.3]: https://github.com/ababic/dumpling/compare/v0.4.2...v0.4.3
@@ -2,6 +2,12 @@
2
2
  # It is not intended for manual editing.
3
3
  version = 4
4
4
 
5
+ [[package]]
6
+ name = "adler2"
7
+ version = "2.0.1"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
10
+
5
11
  [[package]]
6
12
  name = "aho-corasick"
7
13
  version = "1.1.4"
@@ -76,6 +82,15 @@ version = "1.0.102"
76
82
  source = "registry+https://github.com/rust-lang/crates.io-index"
77
83
  checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
78
84
 
85
+ [[package]]
86
+ name = "arbitrary"
87
+ version = "1.4.2"
88
+ source = "registry+https://github.com/rust-lang/crates.io-index"
89
+ checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
90
+ dependencies = [
91
+ "derive_arbitrary",
92
+ ]
93
+
79
94
  [[package]]
80
95
  name = "autocfg"
81
96
  version = "1.5.0"
@@ -187,6 +202,21 @@ dependencies = [
187
202
  "libc",
188
203
  ]
189
204
 
205
+ [[package]]
206
+ name = "crc32fast"
207
+ version = "1.5.0"
208
+ source = "registry+https://github.com/rust-lang/crates.io-index"
209
+ checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
210
+ dependencies = [
211
+ "cfg-if",
212
+ ]
213
+
214
+ [[package]]
215
+ name = "crossbeam-utils"
216
+ version = "0.8.21"
217
+ source = "registry+https://github.com/rust-lang/crates.io-index"
218
+ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
219
+
190
220
  [[package]]
191
221
  name = "crypto-common"
192
222
  version = "0.1.7"
@@ -231,6 +261,17 @@ dependencies = [
231
261
  "syn",
232
262
  ]
233
263
 
264
+ [[package]]
265
+ name = "derive_arbitrary"
266
+ version = "1.4.2"
267
+ source = "registry+https://github.com/rust-lang/crates.io-index"
268
+ checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a"
269
+ dependencies = [
270
+ "proc-macro2",
271
+ "quote",
272
+ "syn",
273
+ ]
274
+
234
275
  [[package]]
235
276
  name = "deunicode"
236
277
  version = "1.6.2"
@@ -248,6 +289,17 @@ dependencies = [
248
289
  "subtle",
249
290
  ]
250
291
 
292
+ [[package]]
293
+ name = "displaydoc"
294
+ version = "0.2.5"
295
+ source = "registry+https://github.com/rust-lang/crates.io-index"
296
+ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
297
+ dependencies = [
298
+ "proc-macro2",
299
+ "quote",
300
+ "syn",
301
+ ]
302
+
251
303
  [[package]]
252
304
  name = "dummy"
253
305
  version = "0.11.0"
@@ -262,12 +314,13 @@ dependencies = [
262
314
 
263
315
  [[package]]
264
316
  name = "dumpling"
265
- version = "0.7.0-alpha"
317
+ version = "0.7.0-beta"
266
318
  dependencies = [
267
319
  "anyhow",
268
320
  "chrono",
269
321
  "clap",
270
322
  "fake",
323
+ "flate2",
271
324
  "getrandom 0.2.17",
272
325
  "hmac",
273
326
  "lazy_static",
@@ -276,8 +329,9 @@ dependencies = [
276
329
  "serde",
277
330
  "serde_json",
278
331
  "sha2",
279
- "thiserror",
332
+ "thiserror 1.0.69",
280
333
  "toml",
334
+ "zip",
281
335
  ]
282
336
 
283
337
  [[package]]
@@ -310,6 +364,16 @@ version = "0.1.9"
310
364
  source = "registry+https://github.com/rust-lang/crates.io-index"
311
365
  checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
312
366
 
367
+ [[package]]
368
+ name = "flate2"
369
+ version = "1.1.9"
370
+ source = "registry+https://github.com/rust-lang/crates.io-index"
371
+ checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
372
+ dependencies = [
373
+ "crc32fast",
374
+ "miniz_oxide",
375
+ ]
376
+
313
377
  [[package]]
314
378
  name = "fnv"
315
379
  version = "1.0.7"
@@ -456,6 +520,16 @@ version = "2.8.0"
456
520
  source = "registry+https://github.com/rust-lang/crates.io-index"
457
521
  checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
458
522
 
523
+ [[package]]
524
+ name = "miniz_oxide"
525
+ version = "0.8.9"
526
+ source = "registry+https://github.com/rust-lang/crates.io-index"
527
+ checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
528
+ dependencies = [
529
+ "adler2",
530
+ "simd-adler32",
531
+ ]
532
+
459
533
  [[package]]
460
534
  name = "num-traits"
461
535
  version = "0.2.19"
@@ -643,6 +717,12 @@ version = "1.3.0"
643
717
  source = "registry+https://github.com/rust-lang/crates.io-index"
644
718
  checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
645
719
 
720
+ [[package]]
721
+ name = "simd-adler32"
722
+ version = "0.3.9"
723
+ source = "registry+https://github.com/rust-lang/crates.io-index"
724
+ checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
725
+
646
726
  [[package]]
647
727
  name = "strsim"
648
728
  version = "0.11.1"
@@ -672,7 +752,16 @@ version = "1.0.69"
672
752
  source = "registry+https://github.com/rust-lang/crates.io-index"
673
753
  checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
674
754
  dependencies = [
675
- "thiserror-impl",
755
+ "thiserror-impl 1.0.69",
756
+ ]
757
+
758
+ [[package]]
759
+ name = "thiserror"
760
+ version = "2.0.18"
761
+ source = "registry+https://github.com/rust-lang/crates.io-index"
762
+ checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
763
+ dependencies = [
764
+ "thiserror-impl 2.0.18",
676
765
  ]
677
766
 
678
767
  [[package]]
@@ -686,6 +775,17 @@ dependencies = [
686
775
  "syn",
687
776
  ]
688
777
 
778
+ [[package]]
779
+ name = "thiserror-impl"
780
+ version = "2.0.18"
781
+ source = "registry+https://github.com/rust-lang/crates.io-index"
782
+ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
783
+ dependencies = [
784
+ "proc-macro2",
785
+ "quote",
786
+ "syn",
787
+ ]
788
+
689
789
  [[package]]
690
790
  name = "toml"
691
791
  version = "0.8.23"
@@ -914,8 +1014,37 @@ dependencies = [
914
1014
  "syn",
915
1015
  ]
916
1016
 
1017
+ [[package]]
1018
+ name = "zip"
1019
+ version = "2.4.2"
1020
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1021
+ checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50"
1022
+ dependencies = [
1023
+ "arbitrary",
1024
+ "crc32fast",
1025
+ "crossbeam-utils",
1026
+ "displaydoc",
1027
+ "flate2",
1028
+ "indexmap",
1029
+ "memchr",
1030
+ "thiserror 2.0.18",
1031
+ "zopfli",
1032
+ ]
1033
+
917
1034
  [[package]]
918
1035
  name = "zmij"
919
1036
  version = "1.0.21"
920
1037
  source = "registry+https://github.com/rust-lang/crates.io-index"
921
1038
  checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
1039
+
1040
+ [[package]]
1041
+ name = "zopfli"
1042
+ version = "0.8.3"
1043
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1044
+ checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249"
1045
+ dependencies = [
1046
+ "bumpalo",
1047
+ "crc32fast",
1048
+ "log",
1049
+ "simd-adler32",
1050
+ ]
@@ -1,7 +1,9 @@
1
1
  [package]
2
2
  name = "dumpling"
3
- version = "0.7.0-alpha"
3
+ version = "0.7.0-beta"
4
4
  edition = "2021"
5
+ license = "MIT"
6
+ authors = ["Andy Babic"]
5
7
  readme = "README.md"
6
8
 
7
9
  [dependencies]
@@ -19,3 +21,5 @@ chrono = { version = "0.4" }
19
21
  lazy_static = "1"
20
22
  fake = { version = "4", features = ["derive"] }
21
23
  rand = "0.9"
24
+ flate2 = "1"
25
+ zip = { version = "2", default-features = false, features = ["deflate"] }
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Andy Babic
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dumpling-cli
3
- Version: 0.7.0a0
3
+ Version: 0.7.0b0
4
4
  Classifier: Development Status :: 4 - Beta
5
5
  Classifier: Environment :: Console
6
6
  Classifier: Intended Audience :: Developers
@@ -14,8 +14,11 @@ Classifier: Topic :: Database
14
14
  Classifier: Topic :: Security
15
15
  Classifier: Topic :: Software Development :: Libraries
16
16
  Classifier: Topic :: Utilities
17
+ License-File: LICENSE
17
18
  Summary: Static anonymizer for plain SQL dumps (PostgreSQL, SQLite, SQL Server).
18
19
  Keywords: postgres,sqlite,mssql,sql,anonymization,cli,rust
20
+ Author: Andy Babic
21
+ License-Expression: MIT
19
22
  Requires-Python: >=3.8
20
23
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
21
24
 
@@ -101,12 +104,11 @@ dumpling --help
101
104
 
102
105
  Follow these steps once; you will have a working path from “raw dump” to “first sanitized output,” then you can deepen coverage using the rest of this README and the [documentation site](https://ababic.github.io/dumpling/).
103
106
 
104
- 1. **Start from the example policy** — Copy [`.dumplingconf.example`](.dumplingconf.example) to `.dumplingconf` in your project root (or merge the same keys under `[tool.dumpling]` in `pyproject.toml`). Set environment variables for `salt` and any `${…}` references so Dumpling can resolve secrets at startup.
105
- 2. **Name your tables and columns** — Open your dump next to the config. `CREATE TABLE`, `COPY … ()` and `INSERT INTO … (…)` lines list the identifiers you need for `[rules."table"]` or `[rules."schema.table"]` (see [Configuration (TOML)](#configuration-toml) below). Trim the example rules down to the tables you care about first, then add columns and strategies as you go.
106
- 3. **Run Dumpling** — `dumpling -i dump.sql -o sanitized.sql` (add `-c path` if the config is not in the default search path). Use `dumpling --check -i dump.sql` when you only want to know whether anything would change.
107
- 4. **Tighten the policy** — Run `dumpling lint-policy` on your config. When you are ready for stricter gates, add `[sensitive_columns]` and use `--strict-coverage` / `--report` / `--scan-output` as described under [Usage](#usage).
108
-
109
- **Draft policy generation (planned)** — A future command will stream a dump and emit a **draft** starter TOML so you spend less time hunting table and column names and basic DDL hints (for example `varchar(N)` lengths). Output will be explicitly **draft**: always review and edit before production or compliance workflows; it is a time-saver, not a full policy.
107
+ 1. **Generate a draft policy (recommended)** — Run `dumpling scaffold-config -i dump.sql -o .dumplingconf` to emit a **beta** starter TOML with inferred `[rules]` from column names in `CREATE TABLE`, `INSERT`, and (PostgreSQL) `COPY` headers. Heuristics are **English-oriented**; treat the file as **draft only**—review every rule before production or compliance workflows. Add a global `salt` (for example `salt = "${DUMPLING_SALT}"`) and resolve `${…}` references before anonymizing. Optionally pass **`--infer-json-paths`** to sample up to **five rows per table** (reservoir) and suggest nested JSON keys as `column.path.to.leaf`; use **`--max-json-depth`** if you need a different walk depth (default 24). For PostgreSQL **custom-format** or **directory-format** archives, pass **`--input`** pointing at the archive with **`--format postgres`** (default); Dumpling auto-detects and runs **`pg_restore`** (optional **`--pg-restore-path`** / **`--pg-restore-arg`**). See `dumpling scaffold-config --help`.
108
+ 2. **Or start from the example policy** — Copy [`.dumplingconf.example`](.dumplingconf.example) to `.dumplingconf` (or merge under `[tool.dumpling]` in `pyproject.toml`) and edit `[rules]` by hand. Set environment variables for `salt` and any `${…}` references so Dumpling can resolve secrets at startup.
109
+ 3. **Align rules with your dump** — If you did not use `scaffold-config`, open the dump beside the config: `CREATE TABLE`, `COPY (…)`, and `INSERT INTO … (…)` lines list identifiers for `[rules."table"]` or `[rules."schema.table"]` (see [Configuration (TOML)](#configuration-toml)). Trim rules to the tables you care about first, then extend columns and strategies as you go.
110
+ 4. **Run Dumpling** — `dumpling -i dump.sql -o sanitized.sql` (add `-c path` if the config is not in the default search path). Use `dumpling --check -i dump.sql` when you only want to know whether anything would change.
111
+ 5. **Tighten the policy** — Run `dumpling lint-policy` on your config. When you are ready for stricter gates, add `[sensitive_columns]` and use `--strict-coverage` / `--report` / `--scan-output` as described under [Usage](#usage).
110
112
 
111
113
  The same flow is spelled out in the docs: [Getting started](https://ababic.github.io/dumpling/getting-started.html).
112
114
 
@@ -131,6 +133,8 @@ dumpling --format sqlite -i data.db.sql -o out.sql # process a SQLite .dump fil
131
133
  dumpling --format mssql -i backup.sql -o out.sql # process a SQL Server plain-SQL dump
132
134
  dumpling lint-policy # lint the anonymization policy config
133
135
  dumpling lint-policy --config .dumplingconf # lint with explicit config path
136
+ dumpling scaffold-config -i dump.sql -o .dumplingconf # draft [rules] from column names (beta)
137
+ dumpling scaffold-config -i dump.sql -o draft.toml --infer-json-paths # include JSON path hints (beta)
134
138
  ```
135
139
 
136
140
  Configuration is loaded in this order:
@@ -493,12 +497,16 @@ Produced by `pg_dump --format=plain`. Handles:
493
497
  Binary, custom, and directory formats from `pg_dump` are not parsed directly — Dumpling’s SQL pipeline expects plain text. Use either:
494
498
 
495
499
  - **`pg_dump --format=plain`** when you control capture, or
496
- - **`dumpling --dump-decode`** with `--input` set to a **custom-format** (`.dump`) or **directory-format** folder: Dumpling runs `pg_restore -f -` and streams the resulting SQL (same as a manual `pg_restore` “script” output, no database required). Requires PostgreSQL client tools on `PATH` (`pg_restore`), or set `--pg-restore-path`. Use `--dump-decode-arg` to pass extra flags (e.g. `--no-owner --no-acl`). **By default** the archive is removed after a fully successful run; pass **`--dump-decode-keep-input`** to retain it. **`--check`** requires **`--dump-decode-keep-input`** so the archive still exists if changes would be detected.
500
+ - **Auto-detected PostgreSQL archives** with `--format postgres` (default): if `--input` is a **custom-format** file (begins with `PGDMP`) or a **directory-format** dump (folder containing `toc.dat`), Dumpling runs **`pg_restore -f -`** and streams the resulting SQL (same as a manual `pg_restore` “script” output; no database required). Requires PostgreSQL client tools on **`PATH`** (`pg_restore`), or **`--pg-restore-path`**. Extra flags: **`--pg-restore-arg`** (repeatable), or defaults from **`[pg_restore]`** in `.dumplingconf` / `pyproject.toml` (CLI wins when set).
501
+
502
+ **Compressed inputs:** **`.gz`** files whose payload is plain SQL are **decompressed in-process** (no temporary file). **ZIP** archives (and gzip wrapping `PGDMP` or an inner ZIP) are expanded under the system temp directory; those paths are removed when the run finishes. **`--in-place`** is rejected when Dumpling had to materialize a temp file for compression or when the input is a PostgreSQL archive path that must go through `pg_restore` (use **`--output`** or stdout instead).
503
+
504
+ **Keeping archives:** **By default** the `--input` archive path (file or directory-format folder) is **removed** after a fully successful run. Pass **`--keep-original`** or set **`keep_original = true`** in config to retain it. **`--check`** against an archive requires an effective keep-original (CLI or config); **`--keep-original` cannot be combined with `--in-place`**.
497
505
 
498
506
  Example (e.g. after `heroku pg:backups:download`):
499
507
 
500
508
  ```bash
501
- dumpling --dump-decode -i latest.dump -c .dumplingconf -o anonymized.sql
509
+ dumpling -i latest.dump -c .dumplingconf -o anonymized.sql
502
510
  ```
503
511
 
504
512
  ### SQLite (`--format sqlite`)
@@ -80,12 +80,11 @@ dumpling --help
80
80
 
81
81
  Follow these steps once; you will have a working path from “raw dump” to “first sanitized output,” then you can deepen coverage using the rest of this README and the [documentation site](https://ababic.github.io/dumpling/).
82
82
 
83
- 1. **Start from the example policy** — Copy [`.dumplingconf.example`](.dumplingconf.example) to `.dumplingconf` in your project root (or merge the same keys under `[tool.dumpling]` in `pyproject.toml`). Set environment variables for `salt` and any `${…}` references so Dumpling can resolve secrets at startup.
84
- 2. **Name your tables and columns** — Open your dump next to the config. `CREATE TABLE`, `COPY … ()` and `INSERT INTO … (…)` lines list the identifiers you need for `[rules."table"]` or `[rules."schema.table"]` (see [Configuration (TOML)](#configuration-toml) below). Trim the example rules down to the tables you care about first, then add columns and strategies as you go.
85
- 3. **Run Dumpling** — `dumpling -i dump.sql -o sanitized.sql` (add `-c path` if the config is not in the default search path). Use `dumpling --check -i dump.sql` when you only want to know whether anything would change.
86
- 4. **Tighten the policy** — Run `dumpling lint-policy` on your config. When you are ready for stricter gates, add `[sensitive_columns]` and use `--strict-coverage` / `--report` / `--scan-output` as described under [Usage](#usage).
87
-
88
- **Draft policy generation (planned)** — A future command will stream a dump and emit a **draft** starter TOML so you spend less time hunting table and column names and basic DDL hints (for example `varchar(N)` lengths). Output will be explicitly **draft**: always review and edit before production or compliance workflows; it is a time-saver, not a full policy.
83
+ 1. **Generate a draft policy (recommended)** — Run `dumpling scaffold-config -i dump.sql -o .dumplingconf` to emit a **beta** starter TOML with inferred `[rules]` from column names in `CREATE TABLE`, `INSERT`, and (PostgreSQL) `COPY` headers. Heuristics are **English-oriented**; treat the file as **draft only**—review every rule before production or compliance workflows. Add a global `salt` (for example `salt = "${DUMPLING_SALT}"`) and resolve `${…}` references before anonymizing. Optionally pass **`--infer-json-paths`** to sample up to **five rows per table** (reservoir) and suggest nested JSON keys as `column.path.to.leaf`; use **`--max-json-depth`** if you need a different walk depth (default 24). For PostgreSQL **custom-format** or **directory-format** archives, pass **`--input`** pointing at the archive with **`--format postgres`** (default); Dumpling auto-detects and runs **`pg_restore`** (optional **`--pg-restore-path`** / **`--pg-restore-arg`**). See `dumpling scaffold-config --help`.
84
+ 2. **Or start from the example policy** — Copy [`.dumplingconf.example`](.dumplingconf.example) to `.dumplingconf` (or merge under `[tool.dumpling]` in `pyproject.toml`) and edit `[rules]` by hand. Set environment variables for `salt` and any `${…}` references so Dumpling can resolve secrets at startup.
85
+ 3. **Align rules with your dump** — If you did not use `scaffold-config`, open the dump beside the config: `CREATE TABLE`, `COPY (…)`, and `INSERT INTO … (…)` lines list identifiers for `[rules."table"]` or `[rules."schema.table"]` (see [Configuration (TOML)](#configuration-toml)). Trim rules to the tables you care about first, then extend columns and strategies as you go.
86
+ 4. **Run Dumpling** — `dumpling -i dump.sql -o sanitized.sql` (add `-c path` if the config is not in the default search path). Use `dumpling --check -i dump.sql` when you only want to know whether anything would change.
87
+ 5. **Tighten the policy** — Run `dumpling lint-policy` on your config. When you are ready for stricter gates, add `[sensitive_columns]` and use `--strict-coverage` / `--report` / `--scan-output` as described under [Usage](#usage).
89
88
 
90
89
  The same flow is spelled out in the docs: [Getting started](https://ababic.github.io/dumpling/getting-started.html).
91
90
 
@@ -110,6 +109,8 @@ dumpling --format sqlite -i data.db.sql -o out.sql # process a SQLite .dump fil
110
109
  dumpling --format mssql -i backup.sql -o out.sql # process a SQL Server plain-SQL dump
111
110
  dumpling lint-policy # lint the anonymization policy config
112
111
  dumpling lint-policy --config .dumplingconf # lint with explicit config path
112
+ dumpling scaffold-config -i dump.sql -o .dumplingconf # draft [rules] from column names (beta)
113
+ dumpling scaffold-config -i dump.sql -o draft.toml --infer-json-paths # include JSON path hints (beta)
113
114
  ```
114
115
 
115
116
  Configuration is loaded in this order:
@@ -472,12 +473,16 @@ Produced by `pg_dump --format=plain`. Handles:
472
473
  Binary, custom, and directory formats from `pg_dump` are not parsed directly — Dumpling’s SQL pipeline expects plain text. Use either:
473
474
 
474
475
  - **`pg_dump --format=plain`** when you control capture, or
475
- - **`dumpling --dump-decode`** with `--input` set to a **custom-format** (`.dump`) or **directory-format** folder: Dumpling runs `pg_restore -f -` and streams the resulting SQL (same as a manual `pg_restore` “script” output, no database required). Requires PostgreSQL client tools on `PATH` (`pg_restore`), or set `--pg-restore-path`. Use `--dump-decode-arg` to pass extra flags (e.g. `--no-owner --no-acl`). **By default** the archive is removed after a fully successful run; pass **`--dump-decode-keep-input`** to retain it. **`--check`** requires **`--dump-decode-keep-input`** so the archive still exists if changes would be detected.
476
+ - **Auto-detected PostgreSQL archives** with `--format postgres` (default): if `--input` is a **custom-format** file (begins with `PGDMP`) or a **directory-format** dump (folder containing `toc.dat`), Dumpling runs **`pg_restore -f -`** and streams the resulting SQL (same as a manual `pg_restore` “script” output; no database required). Requires PostgreSQL client tools on **`PATH`** (`pg_restore`), or **`--pg-restore-path`**. Extra flags: **`--pg-restore-arg`** (repeatable), or defaults from **`[pg_restore]`** in `.dumplingconf` / `pyproject.toml` (CLI wins when set).
477
+
478
+ **Compressed inputs:** **`.gz`** files whose payload is plain SQL are **decompressed in-process** (no temporary file). **ZIP** archives (and gzip wrapping `PGDMP` or an inner ZIP) are expanded under the system temp directory; those paths are removed when the run finishes. **`--in-place`** is rejected when Dumpling had to materialize a temp file for compression or when the input is a PostgreSQL archive path that must go through `pg_restore` (use **`--output`** or stdout instead).
479
+
480
+ **Keeping archives:** **By default** the `--input` archive path (file or directory-format folder) is **removed** after a fully successful run. Pass **`--keep-original`** or set **`keep_original = true`** in config to retain it. **`--check`** against an archive requires an effective keep-original (CLI or config); **`--keep-original` cannot be combined with `--in-place`**.
476
481
 
477
482
  Example (e.g. after `heroku pg:backups:download`):
478
483
 
479
484
  ```bash
480
- dumpling --dump-decode -i latest.dump -c .dumplingconf -o anonymized.sql
485
+ dumpling -i latest.dump -c .dumplingconf -o anonymized.sql
481
486
  ```
482
487
 
483
488
  ### SQLite (`--format sqlite`)
@@ -6,7 +6,7 @@ Use `--format` to declare the SQL dialect of your input file:
6
6
 
7
7
  | Value | Description |
8
8
  |---|---|
9
- | `postgres` (default) | PostgreSQL `pg_dump` plain-text format. Supports `COPY … FROM stdin` blocks, `"double-quoted"` identifiers, `''`-escaped strings. Custom-format (`-Fc`) or directory dumps can be decoded on the fly with `dumpling --dump-decode` (wraps `pg_restore -f -`; requires client tools). By default the archive is deleted after success; use `--dump-decode-keep-input` to retain it. |
9
+ | `postgres` (default) | PostgreSQL `pg_dump` plain-text format. Supports `COPY … FROM stdin` blocks, `"double-quoted"` identifiers, `''`-escaped strings. **Custom-format** (`PGDMP`) and **directory-format** (`toc.dat`) dumps are **auto-detected** and decoded with `pg_restore -f -` (requires client tools). **Gzip** — wrapped plain SQL is decompressed in-process; **ZIP** (or gzip wrapping `PGDMP`/nested ZIP) uses a temp file that is removed after the run. By default the archive is deleted after success; use **`--keep-original`** or **`keep_original`** in config to retain it. |
10
10
  | `sqlite` | SQLite `.dump` format. Adds `INSERT OR REPLACE INTO` / `INSERT OR IGNORE INTO` support. No COPY blocks. |
11
11
  | `mssql` | SQL Server / MSSQL plain SQL. Adds `[bracket]` identifier quoting, `N'…'` Unicode string literals, and `nvarchar(n)` / `nchar(n)` length extraction. No COPY blocks. |
12
12
 
@@ -17,26 +17,58 @@ dumpling --format sqlite -i data.db.sql -o anonymized.sql
17
17
  dumpling --format mssql -i backup.sql -o anonymized.sql
18
18
  ```
19
19
 
20
- ### PostgreSQL custom-format archives (`--dump-decode`)
20
+ ### PostgreSQL archives and compressed inputs
21
21
 
22
- Heroku PGBackups and many pipelines ship **`pg_dump` custom format** (`-Fc`) or **directory-format** dumps to save bandwidth. Dumpling’s SQL engine still expects **plain text**; use **`--dump-decode`** so Dumpling runs **`pg_restore -f -`** (script to stdout, no database) and pipes the result through the same anonymizer as a normal plain-SQL file.
22
+ Heroku PGBackups and many pipelines ship **`pg_dump` custom format** (`-Fc`), **directory-format** dumps, or **gzip**/**ZIP**-wrapped files. Dumpling’s SQL engine still expects **plain text** at the parser; anything else is normalized first.
23
23
 
24
- **Requirements:** PostgreSQL client tools on `PATH` (`pg_restore`), or set **`--pg-restore-path`**. Use **`--dump-decode-arg`** (repeatable) for extra `pg_restore` flags, e.g. `--dump-decode-arg=--no-owner --dump-decode-arg=--no-acl`.
24
+ #### Custom-format and directory dumps (auto-detected)
25
25
 
26
- **Input deletion:** After a **fully successful** run, Dumpling **removes** the `--input` path (single file or directory-format folder) by default so only the anonymized output remains. Pass **`--dump-decode-keep-input`** to retain the archive.
26
+ With **`--format postgres`** (default), Dumpling detects:
27
27
 
28
- **Check mode:** **`--check`** with **`--dump-decode`** requires **`--dump-decode-keep-input`**. Otherwise the default would delete the dump before you can iterate on config.
28
+ - **Custom-format** files (magic `PGDMP` at the start of the file), and
29
+ - **Directory-format** folders (a `toc.dat` beside table blobs),
29
30
 
30
- Example (e.g. after `heroku pg:backups:download`):
31
+ then runs **`pg_restore -f -`** (script to stdout inside the process — no database) and pipes the result through the same anonymizer as a normal plain-SQL file. Detection from **`--input`** is automatic.
32
+
33
+ **Requirements:** PostgreSQL client tools on **`PATH`** (`pg_restore`), or **`--pg-restore-path`**.
34
+
35
+ **Extra `pg_restore` arguments:**
36
+
37
+ - CLI: **`--pg-restore-arg`** (repeatable), e.g. `--pg-restore-arg=--no-owner --pg-restore-arg=--no-acl`
38
+ - Config (optional): **`[pg_restore]`** — CLI overrides these when you pass path or args:
39
+
40
+ ```toml
41
+ [pg_restore]
42
+ path = "/usr/bin/pg_restore"
43
+ args = ["--no-owner", "--no-acl"]
44
+ ```
45
+
46
+ #### Gzip and ZIP wrappers
47
+
48
+ - **Gzip (`.gz`)** whose decompressed payload is **plain SQL**: decompressed **in-process** (streamed); no temporary dump file.
49
+ - **ZIP** containing a single dump file (or a single `.sql` when multiple files exist), **gzip wrapping `PGDMP`**, or **gzip wrapping an inner ZIP**: Dumpling writes under the system temp directory and **removes** those paths when the run completes (including after errors — cleanup runs on drop).
50
+
51
+ **`--in-place`** is **rejected** when Dumpling had to **materialize** a temp file for compression **or** when the resolved input is a PostgreSQL archive decoded via **`pg_restore`** (use **`--output`** or stdout).
52
+
53
+ #### Keeping inputs and `--check`
54
+
55
+ After a **fully successful** run, Dumpling **removes** the `--input` archive path (single file or directory-format folder) **by default**. To keep it:
56
+
57
+ - **`--keep-original`**, or
58
+ - **`keep_original = true`** at the top level of `.dumplingconf` / `[tool.dumpling]` (merged with CLI; **`--keep-original` cannot be used with `--in-place`**).
59
+
60
+ **`--check`** with a PostgreSQL archive requires an **effective** keep-original (CLI or config); otherwise the default deletion would remove the dump before you iterate on policy.
61
+
62
+ Examples (e.g. after `heroku pg:backups:download`):
31
63
 
32
64
  ```bash
33
- dumpling --dump-decode -i latest.dump -c .dumplingconf -o anonymized.sql
65
+ dumpling -i latest.dump -c .dumplingconf -o anonymized.sql
34
66
  ```
35
67
 
36
68
  Dry run while keeping the downloaded file:
37
69
 
38
70
  ```bash
39
- dumpling --dump-decode --dump-decode-keep-input --check -i latest.dump -c .dumplingconf
71
+ dumpling --keep-original --check -i latest.dump -c .dumplingconf
40
72
  ```
41
73
 
42
74
  ---
@@ -0,0 +1,63 @@
1
+ # Getting started
2
+
3
+ This page is the **shortest path** from zero to a first successful run. For strategy details, row filters, dump seals, and CI patterns, continue with the [configuration guide](configuration.md) and the repository `README.md`.
4
+
5
+ ## Prerequisites
6
+
7
+ - Rust **stable** toolchain (`rustup` recommended). The repo includes `rust-toolchain.toml` (stable + `rustfmt` + `clippy`) so CI and local `cargo` stay aligned.
8
+ - `cargo` on your `PATH`
9
+
10
+ Optional: run **`./scripts/setup-dev.sh`** once from the repo root — it installs toolchain components, **`cargo fetch`**, and a pinned **mdBook** under `.tools/` for the same docs build CI uses.
11
+
12
+ ## Build
13
+
14
+ ```bash
15
+ cargo build --release
16
+ ./target/release/dumpling --help
17
+ ```
18
+
19
+ ### Python / pip (`dumpling-cli`)
20
+
21
+ ```bash
22
+ pip install dumpling-cli
23
+ dumpling --help
24
+ ```
25
+
26
+ ## First anonymization
27
+
28
+ 1. **Generate a draft policy (recommended)** — From your project root (or anywhere you keep config):
29
+
30
+ ```bash
31
+ dumpling scaffold-config -i dump.sql -o .dumplingconf
32
+ ```
33
+
34
+ This **beta** subcommand streams the dump once and writes inferred `[rules]` from SQL column names (`CREATE TABLE`, `INSERT`, and PostgreSQL `COPY` column lists). Heuristics are **English-oriented**; output is **draft only**—review and edit every rule, add a top-level **`salt`** (for hashing) and any **`${…}`** secret placeholders before production use.
35
+
36
+ Useful flags:
37
+
38
+ - **`--infer-json-paths`** — Keep up to **five sampled rows per table** (reservoir) and suggest nested JSON rules as `column.path.leaf`.
39
+ - **`--max-json-depth`** — Cap JSON walking depth when using `--infer-json-paths` (default 24).
40
+ - **`--format`** — `postgres` (default), `sqlite`, or `mssql`.
41
+ - **`--pg-restore-path`** / **`--pg-restore-arg`** — Optional **`pg_restore`** binary and extra arguments when **`--input`** is a PostgreSQL custom-format or directory-format archive (auto-detected with **`--format postgres`**); see [PostgreSQL archives and compressed inputs](configuration.md#postgresql-archives-and-compressed-inputs).
42
+
43
+ Run `dumpling scaffold-config --help` for the full flag list.
44
+
45
+ 2. **Or start from the example policy** — Copy [`.dumplingconf.example`](https://github.com/ababic/dumpling/blob/main/.dumplingconf.example) to `.dumplingconf` (or merge under `[tool.dumpling]` in `pyproject.toml`) and author `[rules]` by hand. Set environment variables for `salt` and any `${…}` references.
46
+
47
+ 3. **Align rules with your dump (manual path only)** — If you skipped `scaffold-config`, use `CREATE TABLE`, `COPY … (…)`, and `INSERT INTO … (…)` lines to name `[rules."table"]` or `[rules."schema.table"]` keys. Trim to the tables you care about first.
48
+
49
+ 4. **Run Dumpling** — `dumpling -i dump.sql -o sanitized.sql` (add `-c path` if the config is not in the default search path). Use `dumpling --check -i dump.sql` when you only want to know whether anything would change.
50
+
51
+ 5. **Tighten the policy** — Run `dumpling lint-policy` on your config. When you are ready for stricter gates, add `[sensitive_columns]` and use `--strict-coverage`, `--report`, and `--scan-output` as described in the [configuration guide](configuration.md) and the repository `README.md`.
52
+
53
+ ## PostgreSQL custom-format archives
54
+
55
+ If your input is a PostgreSQL **custom-format** file or **directory-format** folder (not plain SQL), use **`--format postgres`** (default): Dumpling **auto-detects** the archive and runs **`pg_restore -f -`** (needs `pg_restore` from PostgreSQL client tools). Gzip-wrapped plain SQL is streamed without a temp file; ZIP (or gzip wrapping `PGDMP`) uses a temp extract that is cleaned up afterward. See [PostgreSQL archives and compressed inputs](configuration.md#postgresql-archives-and-compressed-inputs) in the configuration guide.
56
+
57
+ ## Test locally (contributors)
58
+
59
+ ```bash
60
+ cargo fmt --all -- --check
61
+ cargo clippy --all-targets --all-features
62
+ cargo test --all-targets --all-features
63
+ ```
@@ -1,8 +1,8 @@
1
1
  # Dumpling documentation
2
2
 
3
- Dumpling is a streaming anonymizer for plain SQL dumps. It supports PostgreSQL (`pg_dump` plain format), SQLite (`.dump`), and SQL Server / MSSQL (SSMS / mssql-scripter plain SQL output). For PostgreSQL **custom-format** archives (e.g. Heroku `pg:backups:download`), use **`--dump-decode`** so Dumpling invokes `pg_restore` and streams plain SQL—see [Dump format](configuration.html#postgresql-custom-format-archives---dump-decode) in the configuration guide.
3
+ Dumpling is a streaming anonymizer for plain SQL dumps. It supports PostgreSQL (`pg_dump` plain format), SQLite (`.dump`), and SQL Server / MSSQL (SSMS / mssql-scripter plain SQL output). For PostgreSQL **custom-format** or **directory-format** archives (e.g. Heroku `pg:backups:download`), Dumpling **auto-detects** them when `--format postgres` (default) and invokes `pg_restore -f -`—see [PostgreSQL archives and compressed inputs](configuration.html#postgresql-archives-and-compressed-inputs) in the configuration guide.
4
4
 
5
- **New here?** Start with [**Getting started**](getting-started.html): copy the example config, align rules with your dump, run Dumpling, then tighten with `lint-policy` and optional CI flags.
5
+ **New here?** Start with [**Getting started**](getting-started.html): generate a **draft** policy with `scaffold-config`, review and add secrets, run Dumpling, then tighten with `lint-policy` and optional CI flags.
6
6
 
7
7
  This documentation covers the operating model for day-to-day use:
8
8
 
@@ -4,9 +4,12 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "dumpling-cli"
7
- version = "0.7.0-alpha"
7
+ version = "0.7.0-beta"
8
8
  description = "Static anonymizer for plain SQL dumps (PostgreSQL, SQLite, SQL Server)."
9
9
  readme = "README.md"
10
+ license = "MIT"
11
+ license-files = ["LICENSE"]
12
+ authors = [{ name = "Andy Babic" }]
10
13
  requires-python = ">=3.8"
11
14
  keywords = ["postgres", "sqlite", "mssql", "sql", "anonymization", "cli", "rust"]
12
15
  classifiers = [