dumpling-cli 0.4.3__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.dumplingconf.example +6 -6
  2. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/ci.yml +1 -1
  3. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/docs-pr.yml +1 -1
  4. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/docs.yml +3 -3
  5. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/platform-compat-latest.yml +1 -1
  6. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/platform-compat-matrix.yml +1 -1
  7. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/policy-lint.yml +1 -1
  8. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/publish.yml +8 -8
  9. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/release.yml +1 -1
  10. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/tests.yml +1 -1
  11. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/CHANGELOG.md +29 -0
  12. dumpling_cli-0.6.0/CONTRIBUTORS.md +6 -0
  13. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/Cargo.lock +1 -1
  14. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/Cargo.toml +1 -1
  15. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/PKG-INFO +21 -7
  16. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/README.md +20 -6
  17. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/docs/src/releasing.md +1 -1
  18. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/pyproject.toml +1 -1
  19. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/faker_dispatch.rs +66 -6
  20. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/filter.rs +25 -36
  21. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/main.rs +213 -13
  22. dumpling_cli-0.6.0/src/seal.rs +482 -0
  23. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/settings.rs +35 -50
  24. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/sql.rs +133 -140
  25. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/transform.rs +82 -63
  26. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.gitignore +0 -0
  27. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/AGENTS.md +0 -0
  28. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/CONTRIBUTING.md +0 -0
  29. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/MAINTENANCE.md +0 -0
  30. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/assets/logo.svg +0 -0
  31. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/book.toml +0 -0
  32. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/datetime_out.sql +0 -0
  33. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/datetime_sample.sql +0 -0
  34. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/docs/src/SUMMARY.md +0 -0
  35. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/docs/src/ci-guardrails.md +0 -0
  36. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/docs/src/configuration.md +0 -0
  37. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/docs/src/getting-started.md +0 -0
  38. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/docs/src/index.md +0 -0
  39. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/rust-toolchain.toml +0 -0
  40. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/scripts/setup-dev.sh +0 -0
  41. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/lint.rs +0 -0
  42. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/report.rs +0 -0
  43. {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/scan.rs +0 -0
@@ -31,12 +31,12 @@ salt = "${DUMPLING_GLOBAL_SALT}"
31
31
  # Faker modules: https://docs.rs/fake/latest/fake/faker/index.html
32
32
  # Upstream repo: https://github.com/cksac/fake-rs
33
33
  [rules."public.users"]
34
- # email — fake email via Rust `fake` crate; force quoted string output
35
- email = { strategy = "faker", faker = "internet::SafeEmail", domain = "customer_identity", unique_within_domain = true }
36
- # name — locale-aware full name (see `locale`); other generators use `faker = "module::Type"`
37
- full_name = { strategy = "faker", faker = "name::Name" }
38
- first_name = { strategy = "faker", faker = "name::FirstName" }
39
- last_name = { strategy = "faker", faker = "name::LastName" }
34
+ # email — safe fake email (built-in); force quoted string output
35
+ email = { strategy = "email", domain = "customer_identity", unique_within_domain = true }
36
+ # name — locale-aware full name (see `locale`)
37
+ full_name = { strategy = "name" }
38
+ first_name = { strategy = "first_name" }
39
+ last_name = { strategy = "last_name" }
40
40
  # phone — US-style (xxx) xxx-xxxx
41
41
  phone = { strategy = "phone" }
42
42
  # ssn — SHA-256 hex of original; use per-column salt for extra protection
@@ -15,7 +15,7 @@ jobs:
15
15
  runs-on: ubuntu-latest
16
16
  steps:
17
17
  - name: Checkout
18
- uses: actions/checkout@v4
18
+ uses: actions/checkout@v6
19
19
 
20
20
  - name: Install Rust toolchain
21
21
  uses: dtolnay/rust-toolchain@stable
@@ -24,7 +24,7 @@ jobs:
24
24
  runs-on: ubuntu-latest
25
25
  steps:
26
26
  - name: Checkout
27
- uses: actions/checkout@v4
27
+ uses: actions/checkout@v6
28
28
 
29
29
  - name: Install mdBook
30
30
  uses: peaceiris/actions-mdbook@v2
@@ -26,7 +26,7 @@ jobs:
26
26
  contents: read
27
27
  steps:
28
28
  - name: Checkout
29
- uses: actions/checkout@v4
29
+ uses: actions/checkout@v6
30
30
 
31
31
  - name: Install mdBook
32
32
  uses: peaceiris/actions-mdbook@v2
@@ -37,7 +37,7 @@ jobs:
37
37
  run: mdbook build
38
38
 
39
39
  - name: Upload Pages deployment artifact
40
- uses: actions/upload-pages-artifact@v3
40
+ uses: actions/upload-pages-artifact@v5
41
41
  with:
42
42
  path: docs/book
43
43
 
@@ -53,4 +53,4 @@ jobs:
53
53
  steps:
54
54
  - name: Deploy docs to GitHub Pages
55
55
  id: deployment
56
- uses: actions/deploy-pages@v4
56
+ uses: actions/deploy-pages@v5
@@ -22,7 +22,7 @@ jobs:
22
22
  - windows-latest
23
23
  steps:
24
24
  - name: Checkout
25
- uses: actions/checkout@v4
25
+ uses: actions/checkout@v6
26
26
 
27
27
  - name: Install Rust toolchain
28
28
  uses: dtolnay/rust-toolchain@stable
@@ -20,7 +20,7 @@ jobs:
20
20
  - windows-2022
21
21
  steps:
22
22
  - name: Checkout
23
- uses: actions/checkout@v4
23
+ uses: actions/checkout@v6
24
24
 
25
25
  - name: Install Rust toolchain
26
26
  uses: dtolnay/rust-toolchain@stable
@@ -32,7 +32,7 @@ jobs:
32
32
 
33
33
  steps:
34
34
  - name: Checkout
35
- uses: actions/checkout@v4
35
+ uses: actions/checkout@v6
36
36
 
37
37
  - name: Install Rust toolchain
38
38
  uses: dtolnay/rust-toolchain@stable
@@ -32,7 +32,7 @@ jobs:
32
32
  - windows-latest
33
33
  steps:
34
34
  - name: Checkout
35
- uses: actions/checkout@v4
35
+ uses: actions/checkout@v6
36
36
 
37
37
  - name: Install Rust toolchain
38
38
  uses: dtolnay/rust-toolchain@stable
@@ -41,7 +41,7 @@ jobs:
41
41
  uses: Swatinem/rust-cache@v2
42
42
 
43
43
  - name: Set up Python
44
- uses: actions/setup-python@v5
44
+ uses: actions/setup-python@v6
45
45
  with:
46
46
  python-version: "3.12"
47
47
 
@@ -52,7 +52,7 @@ jobs:
52
52
  run: python -m maturin build --release --out dist
53
53
 
54
54
  - name: Upload wheel artifacts
55
- uses: actions/upload-artifact@v4
55
+ uses: actions/upload-artifact@v7
56
56
  with:
57
57
  name: wheels-${{ matrix.os }}
58
58
  path: dist/*.whl
@@ -63,10 +63,10 @@ jobs:
63
63
  runs-on: ubuntu-latest
64
64
  steps:
65
65
  - name: Checkout
66
- uses: actions/checkout@v4
66
+ uses: actions/checkout@v6
67
67
 
68
68
  - name: Set up Python
69
- uses: actions/setup-python@v5
69
+ uses: actions/setup-python@v6
70
70
  with:
71
71
  python-version: "3.12"
72
72
 
@@ -77,7 +77,7 @@ jobs:
77
77
  run: python -m maturin sdist --out dist
78
78
 
79
79
  - name: Upload sdist artifact
80
- uses: actions/upload-artifact@v4
80
+ uses: actions/upload-artifact@v7
81
81
  with:
82
82
  name: sdist
83
83
  path: dist/*.tar.gz
@@ -96,7 +96,7 @@ jobs:
96
96
  id-token: write
97
97
  steps:
98
98
  - name: Download built distributions
99
- uses: actions/download-artifact@v4
99
+ uses: actions/download-artifact@v8
100
100
  with:
101
101
  pattern: "*"
102
102
  path: dist
@@ -123,7 +123,7 @@ jobs:
123
123
  id-token: write
124
124
  steps:
125
125
  - name: Download built distributions
126
- uses: actions/download-artifact@v4
126
+ uses: actions/download-artifact@v8
127
127
  with:
128
128
  pattern: "*"
129
129
  path: dist
@@ -13,7 +13,7 @@ jobs:
13
13
  runs-on: ubuntu-latest
14
14
  steps:
15
15
  - name: Checkout
16
- uses: actions/checkout@v4
16
+ uses: actions/checkout@v6
17
17
 
18
18
  - name: Install Rust toolchain
19
19
  uses: dtolnay/rust-toolchain@stable
@@ -15,7 +15,7 @@ jobs:
15
15
  runs-on: ubuntu-latest
16
16
  steps:
17
17
  - name: Checkout
18
- uses: actions/checkout@v4
18
+ uses: actions/checkout@v6
19
19
 
20
20
  - name: Install Rust toolchain
21
21
  uses: dtolnay/rust-toolchain@stable
@@ -7,6 +7,33 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.6.0] - 2026-05-03
11
+
12
+ ### Added
13
+
14
+ - **Dump seal** (leading `-- dumpling-seal:` SQL comment): records Dumpling version, security profile, a SHA-256 fingerprint of the resolved policy, and runtime CLI options that affect transforms (`--format`, sorted `--include-table` / `--exclude-table`, effective PRNG seed in standard profile). When the input already begins with a **matching** seal, the remainder is copied through unchanged; stale or unknown seal lines are stripped and the dump is re-processed. See README for full semantics ([#58](https://github.com/ababic/dumpling/pull/58)).
15
+ - **`--stats`**: prints `wall_ms` plus `domain_cache_hits` and `domain_cache_misses` for quick profiling of large runs ([#59](https://github.com/ababic/dumpling/pull/59)).
16
+ - **`CONTRIBUTORS.md`** ([#59](https://github.com/ababic/dumpling/pull/59)).
17
+
18
+ ### Changed
19
+
20
+ - **Domain-mapped replacement values** use shared `Arc<str>` storage so repeated lookups reuse the same allocation ([#59](https://github.com/ababic/dumpling/pull/59)).
21
+
22
+ ## [0.5.0] - 2026-05-03
23
+
24
+ ### Added
25
+
26
+ - **First-class strategies** `email`, `name`, `first_name`, `last_name`, and `phone` in config (same generators as `faker = "internet::SafeEmail"`, `name::Name`, `name::FirstName`, `name::LastName`, and locale-aware phone). Strategy names are normalized to lowercase at load.
27
+
28
+ ### Changed
29
+
30
+ - **Random-path faker/phone/PII**: one reused `StdRng` on `AnonymizerRegistry` instead of re-seeding per cell.
31
+ - **`faker` locale resolution**: `resolved_locale_key` avoids allocating a `String` per faker call when locale is `en` or absent.
32
+
33
+ ### Performance
34
+
35
+ - Larger default I/O buffers; fewer per-line and per-row allocations on the SQL stream path (INSERT/COPY parsing and row filters).
36
+
10
37
  ## [0.4.3] - 2026-05-03
11
38
 
12
39
  ### Fixed
@@ -74,6 +101,8 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
74
101
  - Configurable output scan severities and per-category thresholds via `[output_scan]`.
75
102
  - JSON report section for output scan findings including category, count, threshold, severity, and sample locations.
76
103
 
104
+ [0.6.0]: https://github.com/ababic/dumpling/compare/v0.5.0...v0.6.0
105
+ [0.5.0]: https://github.com/ababic/dumpling/compare/v0.4.3...v0.5.0
77
106
  [0.4.3]: https://github.com/ababic/dumpling/compare/v0.4.2...v0.4.3
78
107
  [0.4.2]: https://github.com/ababic/dumpling/compare/v0.4.1...v0.4.2
79
108
  [0.4.1]: https://github.com/ababic/dumpling/compare/v0.4.0...v0.4.1
@@ -0,0 +1,6 @@
1
+ # Contributors
2
+
3
+ Thank you to everyone who has helped improve Dumpling.
4
+
5
+ - **Andy Babic** — creator and maintainer
6
+ - **Jordan Hale** — performance and observability (including AI-assisted patches)
@@ -262,7 +262,7 @@ dependencies = [
262
262
 
263
263
  [[package]]
264
264
  name = "dumpling"
265
- version = "0.4.3"
265
+ version = "0.6.0"
266
266
  dependencies = [
267
267
  "anyhow",
268
268
  "chrono",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "dumpling"
3
- version = "0.4.3"
3
+ version = "0.6.0"
4
4
  edition = "2021"
5
5
  readme = "README.md"
6
6
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dumpling-cli
3
- Version: 0.4.3
3
+ Version: 0.6.0
4
4
  Classifier: Development Status :: 4 - Beta
5
5
  Classifier: Environment :: Console
6
6
  Classifier: Intended Audience :: Developers
@@ -129,6 +129,16 @@ If no configuration is found, Dumpling fails closed by default and exits non-zer
129
129
  The error output lists every checked location. Use `--allow-noop` to explicitly
130
130
  permit no-op behavior.
131
131
 
132
+ ### Dump seal (always on)
133
+
134
+ Every successful run that writes output prefixes the stream with a single-line SQL comment:
135
+
136
+ `-- dumpling-seal: v=2 version=<semver> profile=<standard|hardened> sha256=<64 hex chars>`
137
+
138
+ The `sha256` is over canonical JSON that includes the Dumpling version, the active security profile, a stable encoding of the resolved policy (rules, row filters, column cases, sensitive columns, output scan, global salt), and **runtime options** that affect transforms: `--format`, sorted `--include-table` / `--exclude-table` patterns, and the effective `--seed` / `DUMPLING_SEED` value in standard profile (`null` in hardened, where seeds are ignored).
139
+
140
+ If the **input** already begins with a seal line and it **matches** the current run, Dumpling copies the rest of the file through unchanged. If the line looks like a seal but does **not** match (stale policy, different flags, or older `v=`), that line is **dropped** and the dump is re-processed so you do not end up with two seal lines. `--strict-coverage` cannot be combined with a matching seal (table definitions are not scanned in passthrough mode). `--check` writes no output and therefore emits no seal line.
141
+
132
142
  ---
133
143
 
134
144
  ## Configuration (TOML)
@@ -142,8 +152,8 @@ salt = "${DUMPLING_GLOBAL_SALT}"
142
152
 
143
153
  # Rules are keyed by either "table" or "schema.table"
144
154
  [rules."public.users"]
145
- email = { strategy = "faker", faker = "internet::SafeEmail", domain = "customer_identity", unique_within_domain = true }
146
- name = { strategy = "faker", faker = "name::Name", locale = "de_de" } # German-locale name
155
+ email = { strategy = "email", domain = "customer_identity", unique_within_domain = true }
156
+ name = { strategy = "name", locale = "de_de" } # German-locale name
147
157
  ssn = { strategy = "hash", salt = "${env:DUMPLING_USERS_SSN_SALT}", as_string = true } # SHA-256 of original (salted)
148
158
  age = { strategy = "int_range", min = 18, max = 90 }
149
159
 
@@ -183,8 +193,12 @@ token = "high"
183
193
  | `redact` | Replace with `REDACTED` (string) |
184
194
  | `uuid` | Random UUIDv4-like string |
185
195
  | `hash` | SHA-256 hex of original value; supports per-column `salt` and global `salt` |
186
- | `faker` | Values from the Rust [`fake`](https://crates.io/crates/fake) crate ([docs.rs](https://docs.rs/fake/latest/fake/), [`faker` modules](https://docs.rs/fake/latest/fake/faker/index.html)), chosen by a **string identifier** only (`faker = "module::Type"`, e.g. `internet::SafeEmail`). Config is **data only**: nothing from TOML is compiled or executed as Rust at runtime. Use `locale` for locale-aware generators; optional `min`/`max`, `length`, `format` as documented. Unsupported targets fail at config load. New generators require a **new Dumpling release** (or your own fork), not config-side code. |
196
+ | `email` | Safe email address (same generator as `faker = "internet::SafeEmail"`); supports `locale` |
197
+ | `name` | Full name (same as `faker = "name::Name"`); supports `locale` |
198
+ | `first_name` | First name (same as `faker = "name::FirstName"`); supports `locale` |
199
+ | `last_name` | Last name (same as `faker = "name::LastName"`); supports `locale` |
187
200
  | `phone` | Locale-aware fake phone number (configurable via `locale`); defaults to English format |
201
+ | `faker` | Values from the Rust [`fake`](https://crates.io/crates/fake) crate ([docs.rs](https://docs.rs/fake/latest/fake/), [`faker` modules](https://docs.rs/fake/latest/fake/faker/index.html)), chosen by a **string identifier** only (`faker = "module::Type"`, e.g. `internet::SafeEmail`). Config is **data only**: nothing from TOML is compiled or executed as Rust at runtime. Use `locale` for locale-aware generators; optional `min`/`max`, `length`, `format` as documented. Unsupported targets fail at config load. New generators require a **new Dumpling release** (or your own fork), not config-side code. |
188
202
  | `int_range` | Random integer in `[min, max]` |
189
203
  | `string` | Random alphanumeric string (`length = 12` by default) |
190
204
  | `date_fuzz` | Shifts a date by a random number of days in `[min_days, max_days]` (defaults: `-30..30`) |
@@ -244,7 +258,7 @@ dumpling --security-profile hardened --input dump.sql --check
244
258
  - `unique_within_domain`: when true, different source values are assigned unique pseudonyms within the configured `domain`. NULL values are unaffected and always remain NULL.
245
259
  - `min_days` / `max_days`: used by `date_fuzz`.
246
260
  - `min_seconds` / `max_seconds`: used by `time_fuzz` and `datetime_fuzz`.
247
- - `locale`: selects the language/regional format for the `faker` and `phone` strategies. Supported values: `en`, `fr_fr`, `de_de`, `it_it`, `pt_br`, `pt_pt`, `ar_sa`, `zh_cn`, `zh_tw`, `ja_jp`, `cy_gb`. Defaults to `en` when not specified.
261
+ - `locale`: selects the language/regional format for `email`, `name`, `first_name`, `last_name`, `faker`, and `phone`. Supported values: `en`, `fr_fr`, `de_de`, `it_it`, `pt_br`, `pt_pt`, `ar_sa`, `zh_cn`, `zh_tw`, `ja_jp`, `cy_gb`. Defaults to `en` when not specified.
248
262
  - `faker`: required when `strategy = "faker"`. A plain string `"module::Type"` (case-insensitive) that maps to a **built-in** generator compiled into Dumpling—not arbitrary Rust or expressions. Names follow [`fake::faker`](https://docs.rs/fake/latest/fake/faker/index.html) (e.g. `internet::SafeEmail` → `faker::internet::SafeEmail` in the crate).
249
263
  - `format`: used with `faker = "number::NumberWithFormat"`; pattern uses `#` (0–9) and `^` (1–9) per the [`fake` crate docs](https://docs.rs/fake/latest/fake/).
250
264
 
@@ -415,7 +429,7 @@ Define default strategies in `rules."<table>"` and add ordered per-column cases
415
429
  ```toml
416
430
  [rules."public.users"]
417
431
  email = { strategy = "hash", as_string = true } # default
418
- name = { strategy = "faker", faker = "name::Name" }
432
+ name = { strategy = "name" }
419
433
 
420
434
  [[column_cases."public.users".email]]
421
435
  when.any = [{ column = "is_admin", op = "eq", value = "true" }]
@@ -466,7 +480,7 @@ salt = "${DUMPLING_HMAC_KEY}"
466
480
 
467
481
  [rules."public.users"]
468
482
  ssn = { strategy = "hash", as_string = true }
469
- email = { strategy = "faker", faker = "internet::SafeEmail", domain = "users" }
483
+ email = { strategy = "email", domain = "users" }
470
484
  ```
471
485
 
472
486
  ```bash
@@ -108,6 +108,16 @@ If no configuration is found, Dumpling fails closed by default and exits non-zer
108
108
  The error output lists every checked location. Use `--allow-noop` to explicitly
109
109
  permit no-op behavior.
110
110
 
111
+ ### Dump seal (always on)
112
+
113
+ Every successful run that writes output prefixes the stream with a single-line SQL comment:
114
+
115
+ `-- dumpling-seal: v=2 version=<semver> profile=<standard|hardened> sha256=<64 hex chars>`
116
+
117
+ The `sha256` is over canonical JSON that includes the Dumpling version, the active security profile, a stable encoding of the resolved policy (rules, row filters, column cases, sensitive columns, output scan, global salt), and **runtime options** that affect transforms: `--format`, sorted `--include-table` / `--exclude-table` patterns, and the effective `--seed` / `DUMPLING_SEED` value in standard profile (`null` in hardened, where seeds are ignored).
118
+
119
+ If the **input** already begins with a seal line and it **matches** the current run, Dumpling copies the rest of the file through unchanged. If the line looks like a seal but does **not** match (stale policy, different flags, or older `v=`), that line is **dropped** and the dump is re-processed so you do not end up with two seal lines. `--strict-coverage` cannot be combined with a matching seal (table definitions are not scanned in passthrough mode). `--check` writes no output and therefore emits no seal line.
120
+
111
121
  ---
112
122
 
113
123
  ## Configuration (TOML)
@@ -121,8 +131,8 @@ salt = "${DUMPLING_GLOBAL_SALT}"
121
131
 
122
132
  # Rules are keyed by either "table" or "schema.table"
123
133
  [rules."public.users"]
124
- email = { strategy = "faker", faker = "internet::SafeEmail", domain = "customer_identity", unique_within_domain = true }
125
- name = { strategy = "faker", faker = "name::Name", locale = "de_de" } # German-locale name
134
+ email = { strategy = "email", domain = "customer_identity", unique_within_domain = true }
135
+ name = { strategy = "name", locale = "de_de" } # German-locale name
126
136
  ssn = { strategy = "hash", salt = "${env:DUMPLING_USERS_SSN_SALT}", as_string = true } # SHA-256 of original (salted)
127
137
  age = { strategy = "int_range", min = 18, max = 90 }
128
138
 
@@ -162,8 +172,12 @@ token = "high"
162
172
  | `redact` | Replace with `REDACTED` (string) |
163
173
  | `uuid` | Random UUIDv4-like string |
164
174
  | `hash` | SHA-256 hex of original value; supports per-column `salt` and global `salt` |
165
- | `faker` | Values from the Rust [`fake`](https://crates.io/crates/fake) crate ([docs.rs](https://docs.rs/fake/latest/fake/), [`faker` modules](https://docs.rs/fake/latest/fake/faker/index.html)), chosen by a **string identifier** only (`faker = "module::Type"`, e.g. `internet::SafeEmail`). Config is **data only**: nothing from TOML is compiled or executed as Rust at runtime. Use `locale` for locale-aware generators; optional `min`/`max`, `length`, `format` as documented. Unsupported targets fail at config load. New generators require a **new Dumpling release** (or your own fork), not config-side code. |
175
+ | `email` | Safe email address (same generator as `faker = "internet::SafeEmail"`); supports `locale` |
176
+ | `name` | Full name (same as `faker = "name::Name"`); supports `locale` |
177
+ | `first_name` | First name (same as `faker = "name::FirstName"`); supports `locale` |
178
+ | `last_name` | Last name (same as `faker = "name::LastName"`); supports `locale` |
166
179
  | `phone` | Locale-aware fake phone number (configurable via `locale`); defaults to English format |
180
+ | `faker` | Values from the Rust [`fake`](https://crates.io/crates/fake) crate ([docs.rs](https://docs.rs/fake/latest/fake/), [`faker` modules](https://docs.rs/fake/latest/fake/faker/index.html)), chosen by a **string identifier** only (`faker = "module::Type"`, e.g. `internet::SafeEmail`). Config is **data only**: nothing from TOML is compiled or executed as Rust at runtime. Use `locale` for locale-aware generators; optional `min`/`max`, `length`, `format` as documented. Unsupported targets fail at config load. New generators require a **new Dumpling release** (or your own fork), not config-side code. |
167
181
  | `int_range` | Random integer in `[min, max]` |
168
182
  | `string` | Random alphanumeric string (`length = 12` by default) |
169
183
  | `date_fuzz` | Shifts a date by a random number of days in `[min_days, max_days]` (defaults: `-30..30`) |
@@ -223,7 +237,7 @@ dumpling --security-profile hardened --input dump.sql --check
223
237
  - `unique_within_domain`: when true, different source values are assigned unique pseudonyms within the configured `domain`. NULL values are unaffected and always remain NULL.
224
238
  - `min_days` / `max_days`: used by `date_fuzz`.
225
239
  - `min_seconds` / `max_seconds`: used by `time_fuzz` and `datetime_fuzz`.
226
- - `locale`: selects the language/regional format for the `faker` and `phone` strategies. Supported values: `en`, `fr_fr`, `de_de`, `it_it`, `pt_br`, `pt_pt`, `ar_sa`, `zh_cn`, `zh_tw`, `ja_jp`, `cy_gb`. Defaults to `en` when not specified.
240
+ - `locale`: selects the language/regional format for `email`, `name`, `first_name`, `last_name`, `faker`, and `phone`. Supported values: `en`, `fr_fr`, `de_de`, `it_it`, `pt_br`, `pt_pt`, `ar_sa`, `zh_cn`, `zh_tw`, `ja_jp`, `cy_gb`. Defaults to `en` when not specified.
227
241
  - `faker`: required when `strategy = "faker"`. A plain string `"module::Type"` (case-insensitive) that maps to a **built-in** generator compiled into Dumpling—not arbitrary Rust or expressions. Names follow [`fake::faker`](https://docs.rs/fake/latest/fake/faker/index.html) (e.g. `internet::SafeEmail` → `faker::internet::SafeEmail` in the crate).
228
242
  - `format`: used with `faker = "number::NumberWithFormat"`; pattern uses `#` (0–9) and `^` (1–9) per the [`fake` crate docs](https://docs.rs/fake/latest/fake/).
229
243
 
@@ -394,7 +408,7 @@ Define default strategies in `rules."<table>"` and add ordered per-column cases
394
408
  ```toml
395
409
  [rules."public.users"]
396
410
  email = { strategy = "hash", as_string = true } # default
397
- name = { strategy = "faker", faker = "name::Name" }
411
+ name = { strategy = "name" }
398
412
 
399
413
  [[column_cases."public.users".email]]
400
414
  when.any = [{ column = "is_admin", op = "eq", value = "true" }]
@@ -445,7 +459,7 @@ salt = "${DUMPLING_HMAC_KEY}"
445
459
 
446
460
  [rules."public.users"]
447
461
  ssn = { strategy = "hash", as_string = true }
448
- email = { strategy = "faker", faker = "internet::SafeEmail", domain = "users" }
462
+ email = { strategy = "email", domain = "users" }
449
463
  ```
450
464
 
451
465
  ```bash
@@ -11,7 +11,7 @@ This project uses **tag-driven releases**.
11
11
  ## Maintainer checklist
12
12
 
13
13
  1. Ensure `main` is green in CI.
14
- 2. Update `Cargo.toml` version and `CHANGELOG.md`.
14
+ 2. Update `Cargo.toml` and `pyproject.toml` versions and `CHANGELOG.md`.
15
15
  3. Open and merge a release preparation PR.
16
16
  4. Create and push a tag from `main`:
17
17
 
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "dumpling-cli"
7
- version = "0.4.3"
7
+ version = "0.6.0"
8
8
  description = "Static anonymizer for plain SQL dumps (PostgreSQL, SQLite, SQL Server)."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -89,6 +89,71 @@ pub fn parse_faker_path(faker: &str) -> Option<(&str, &str)> {
89
89
  Some((module, typ))
90
90
  }
91
91
 
92
+ /// Normalized locale key for `faker`, `phone`, and built-in PII strategies (`email`, `name`, …).
93
+ /// Uses ASCII case-insensitive matching without allocating.
94
+ pub fn resolved_locale_key(spec: &AnonymizerSpec) -> &'static str {
95
+ let s = spec.locale.as_deref().map(str::trim).unwrap_or("");
96
+ if s.is_empty() || s.eq_ignore_ascii_case("en") {
97
+ return "en";
98
+ }
99
+ if s.eq_ignore_ascii_case("fr_fr") {
100
+ return "fr_fr";
101
+ }
102
+ if s.eq_ignore_ascii_case("de_de") {
103
+ return "de_de";
104
+ }
105
+ if s.eq_ignore_ascii_case("it_it") {
106
+ return "it_it";
107
+ }
108
+ if s.eq_ignore_ascii_case("pt_br") {
109
+ return "pt_br";
110
+ }
111
+ if s.eq_ignore_ascii_case("pt_pt") {
112
+ return "pt_pt";
113
+ }
114
+ if s.eq_ignore_ascii_case("ar_sa") {
115
+ return "ar_sa";
116
+ }
117
+ if s.eq_ignore_ascii_case("zh_cn") {
118
+ return "zh_cn";
119
+ }
120
+ if s.eq_ignore_ascii_case("zh_tw") {
121
+ return "zh_tw";
122
+ }
123
+ if s.eq_ignore_ascii_case("ja_jp") {
124
+ return "ja_jp";
125
+ }
126
+ if s.eq_ignore_ascii_case("cy_gb") {
127
+ return "cy_gb";
128
+ }
129
+ "en"
130
+ }
131
+
132
+ /// Built-in `strategy = "email"` — same generator as `faker = "internet::SafeEmail"`.
133
+ pub fn pii_safe_email(loc: &str, rng: &mut StdRng) -> String {
134
+ fl!(loc, rng, SafeEmail)
135
+ }
136
+
137
+ /// Built-in `strategy = "name"` — full name.
138
+ pub fn pii_full_name(loc: &str, rng: &mut StdRng) -> String {
139
+ fl!(loc, rng, Name)
140
+ }
141
+
142
+ /// Built-in `strategy = "first_name"`.
143
+ pub fn pii_first_name(loc: &str, rng: &mut StdRng) -> String {
144
+ fl!(loc, rng, FirstName)
145
+ }
146
+
147
+ /// Built-in `strategy = "last_name"`.
148
+ pub fn pii_last_name(loc: &str, rng: &mut StdRng) -> String {
149
+ fl!(loc, rng, LastName)
150
+ }
151
+
152
+ /// Built-in `strategy = "phone"` — same generator as `faker` phone_number fakers.
153
+ pub fn pii_phone_number(loc: &str, rng: &mut StdRng) -> String {
154
+ fl!(loc, rng, PhoneNumber)
155
+ }
156
+
92
157
  pub fn faker_string_with_rng(spec: &AnonymizerSpec, rng: &mut StdRng) -> Option<String> {
93
158
  let faker = spec.faker.as_deref()?.trim();
94
159
  if faker.is_empty() {
@@ -97,12 +162,7 @@ pub fn faker_string_with_rng(spec: &AnonymizerSpec, rng: &mut StdRng) -> Option<
97
162
  let (module, typ) = parse_faker_path(faker)?;
98
163
  let module_lc = module.to_ascii_lowercase();
99
164
  let typ_lc = typ.to_ascii_lowercase();
100
- let locale = spec
101
- .locale
102
- .as_deref()
103
- .map(|l| l.trim().to_ascii_lowercase())
104
- .unwrap_or_else(|| "en".to_string());
105
- let loc = locale.as_str();
165
+ let loc = resolved_locale_key(spec);
106
166
 
107
167
  let s: String = match (module_lc.as_str(), typ_lc.as_str()) {
108
168
  ("name", "firstname") => fl!(loc, rng, FirstName),