dumpling-cli 0.4.3__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.dumplingconf.example +6 -6
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/ci.yml +1 -1
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/docs-pr.yml +1 -1
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/docs.yml +3 -3
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/platform-compat-latest.yml +1 -1
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/platform-compat-matrix.yml +1 -1
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/policy-lint.yml +1 -1
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/publish.yml +8 -8
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/release.yml +1 -1
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.github/workflows/tests.yml +1 -1
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/CHANGELOG.md +29 -0
- dumpling_cli-0.6.0/CONTRIBUTORS.md +6 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/Cargo.lock +1 -1
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/Cargo.toml +1 -1
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/PKG-INFO +21 -7
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/README.md +20 -6
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/docs/src/releasing.md +1 -1
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/pyproject.toml +1 -1
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/faker_dispatch.rs +66 -6
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/filter.rs +25 -36
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/main.rs +213 -13
- dumpling_cli-0.6.0/src/seal.rs +482 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/settings.rs +35 -50
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/sql.rs +133 -140
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/transform.rs +82 -63
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/.gitignore +0 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/AGENTS.md +0 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/CONTRIBUTING.md +0 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/MAINTENANCE.md +0 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/assets/logo.svg +0 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/book.toml +0 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/datetime_out.sql +0 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/datetime_sample.sql +0 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/docs/src/SUMMARY.md +0 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/docs/src/ci-guardrails.md +0 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/docs/src/configuration.md +0 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/docs/src/getting-started.md +0 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/docs/src/index.md +0 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/rust-toolchain.toml +0 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/scripts/setup-dev.sh +0 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/lint.rs +0 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/report.rs +0 -0
- {dumpling_cli-0.4.3 → dumpling_cli-0.6.0}/src/scan.rs +0 -0
|
@@ -31,12 +31,12 @@ salt = "${DUMPLING_GLOBAL_SALT}"
|
|
|
31
31
|
# Faker modules: https://docs.rs/fake/latest/fake/faker/index.html
|
|
32
32
|
# Upstream repo: https://github.com/cksac/fake-rs
|
|
33
33
|
[rules."public.users"]
|
|
34
|
-
# email — fake email
|
|
35
|
-
email = { strategy = "
|
|
36
|
-
# name — locale-aware full name (see `locale`)
|
|
37
|
-
full_name = { strategy = "
|
|
38
|
-
first_name = { strategy = "
|
|
39
|
-
last_name = { strategy = "
|
|
34
|
+
# email — safe fake email (built-in); force quoted string output
|
|
35
|
+
email = { strategy = "email", domain = "customer_identity", unique_within_domain = true }
|
|
36
|
+
# name — locale-aware full name (see `locale`)
|
|
37
|
+
full_name = { strategy = "name" }
|
|
38
|
+
first_name = { strategy = "first_name" }
|
|
39
|
+
last_name = { strategy = "last_name" }
|
|
40
40
|
# phone — US-style (xxx) xxx-xxxx
|
|
41
41
|
phone = { strategy = "phone" }
|
|
42
42
|
# ssn — SHA-256 hex of original; use per-column salt for extra protection
|
|
@@ -26,7 +26,7 @@ jobs:
|
|
|
26
26
|
contents: read
|
|
27
27
|
steps:
|
|
28
28
|
- name: Checkout
|
|
29
|
-
uses: actions/checkout@
|
|
29
|
+
uses: actions/checkout@v6
|
|
30
30
|
|
|
31
31
|
- name: Install mdBook
|
|
32
32
|
uses: peaceiris/actions-mdbook@v2
|
|
@@ -37,7 +37,7 @@ jobs:
|
|
|
37
37
|
run: mdbook build
|
|
38
38
|
|
|
39
39
|
- name: Upload Pages deployment artifact
|
|
40
|
-
uses: actions/upload-pages-artifact@
|
|
40
|
+
uses: actions/upload-pages-artifact@v5
|
|
41
41
|
with:
|
|
42
42
|
path: docs/book
|
|
43
43
|
|
|
@@ -53,4 +53,4 @@ jobs:
|
|
|
53
53
|
steps:
|
|
54
54
|
- name: Deploy docs to GitHub Pages
|
|
55
55
|
id: deployment
|
|
56
|
-
uses: actions/deploy-pages@
|
|
56
|
+
uses: actions/deploy-pages@v5
|
|
@@ -32,7 +32,7 @@ jobs:
|
|
|
32
32
|
- windows-latest
|
|
33
33
|
steps:
|
|
34
34
|
- name: Checkout
|
|
35
|
-
uses: actions/checkout@
|
|
35
|
+
uses: actions/checkout@v6
|
|
36
36
|
|
|
37
37
|
- name: Install Rust toolchain
|
|
38
38
|
uses: dtolnay/rust-toolchain@stable
|
|
@@ -41,7 +41,7 @@ jobs:
|
|
|
41
41
|
uses: Swatinem/rust-cache@v2
|
|
42
42
|
|
|
43
43
|
- name: Set up Python
|
|
44
|
-
uses: actions/setup-python@
|
|
44
|
+
uses: actions/setup-python@v6
|
|
45
45
|
with:
|
|
46
46
|
python-version: "3.12"
|
|
47
47
|
|
|
@@ -52,7 +52,7 @@ jobs:
|
|
|
52
52
|
run: python -m maturin build --release --out dist
|
|
53
53
|
|
|
54
54
|
- name: Upload wheel artifacts
|
|
55
|
-
uses: actions/upload-artifact@
|
|
55
|
+
uses: actions/upload-artifact@v7
|
|
56
56
|
with:
|
|
57
57
|
name: wheels-${{ matrix.os }}
|
|
58
58
|
path: dist/*.whl
|
|
@@ -63,10 +63,10 @@ jobs:
|
|
|
63
63
|
runs-on: ubuntu-latest
|
|
64
64
|
steps:
|
|
65
65
|
- name: Checkout
|
|
66
|
-
uses: actions/checkout@
|
|
66
|
+
uses: actions/checkout@v6
|
|
67
67
|
|
|
68
68
|
- name: Set up Python
|
|
69
|
-
uses: actions/setup-python@
|
|
69
|
+
uses: actions/setup-python@v6
|
|
70
70
|
with:
|
|
71
71
|
python-version: "3.12"
|
|
72
72
|
|
|
@@ -77,7 +77,7 @@ jobs:
|
|
|
77
77
|
run: python -m maturin sdist --out dist
|
|
78
78
|
|
|
79
79
|
- name: Upload sdist artifact
|
|
80
|
-
uses: actions/upload-artifact@
|
|
80
|
+
uses: actions/upload-artifact@v7
|
|
81
81
|
with:
|
|
82
82
|
name: sdist
|
|
83
83
|
path: dist/*.tar.gz
|
|
@@ -96,7 +96,7 @@ jobs:
|
|
|
96
96
|
id-token: write
|
|
97
97
|
steps:
|
|
98
98
|
- name: Download built distributions
|
|
99
|
-
uses: actions/download-artifact@
|
|
99
|
+
uses: actions/download-artifact@v8
|
|
100
100
|
with:
|
|
101
101
|
pattern: "*"
|
|
102
102
|
path: dist
|
|
@@ -123,7 +123,7 @@ jobs:
|
|
|
123
123
|
id-token: write
|
|
124
124
|
steps:
|
|
125
125
|
- name: Download built distributions
|
|
126
|
-
uses: actions/download-artifact@
|
|
126
|
+
uses: actions/download-artifact@v8
|
|
127
127
|
with:
|
|
128
128
|
pattern: "*"
|
|
129
129
|
path: dist
|
|
@@ -7,6 +7,33 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.6.0] - 2026-05-03
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- **Dump seal** (leading `-- dumpling-seal:` SQL comment): records Dumpling version, security profile, a SHA-256 fingerprint of the resolved policy, and runtime CLI options that affect transforms (`--format`, sorted `--include-table` / `--exclude-table`, effective PRNG seed in standard profile). When the input already begins with a **matching** seal, the remainder is copied through unchanged; stale or unknown seal lines are stripped and the dump is re-processed. See README for full semantics ([#58](https://github.com/ababic/dumpling/pull/58)).
|
|
15
|
+
- **`--stats`**: prints `wall_ms` plus `domain_cache_hits` and `domain_cache_misses` for quick profiling of large runs ([#59](https://github.com/ababic/dumpling/pull/59)).
|
|
16
|
+
- **`CONTRIBUTORS.md`** ([#59](https://github.com/ababic/dumpling/pull/59)).
|
|
17
|
+
|
|
18
|
+
### Changed
|
|
19
|
+
|
|
20
|
+
- **Domain-mapped replacement values** use shared `Arc<str>` storage so repeated lookups reuse the same allocation ([#59](https://github.com/ababic/dumpling/pull/59)).
|
|
21
|
+
|
|
22
|
+
## [0.5.0] - 2026-05-03
|
|
23
|
+
|
|
24
|
+
### Added
|
|
25
|
+
|
|
26
|
+
- **First-class strategies** `email`, `name`, `first_name`, `last_name`, and `phone` in config (same generators as `faker = "internet::SafeEmail"`, `name::Name`, `name::FirstName`, `name::LastName`, and locale-aware phone). Strategy names are normalized to lowercase at load.
|
|
27
|
+
|
|
28
|
+
### Changed
|
|
29
|
+
|
|
30
|
+
- **Random-path faker/phone/PII**: one reused `StdRng` on `AnonymizerRegistry` instead of re-seeding per cell.
|
|
31
|
+
- **`faker` locale resolution**: `resolved_locale_key` avoids allocating a `String` per faker call when locale is `en` or absent.
|
|
32
|
+
|
|
33
|
+
### Performance
|
|
34
|
+
|
|
35
|
+
- Larger default I/O buffers; fewer per-line and per-row allocations on the SQL stream path (INSERT/COPY parsing and row filters).
|
|
36
|
+
|
|
10
37
|
## [0.4.3] - 2026-05-03
|
|
11
38
|
|
|
12
39
|
### Fixed
|
|
@@ -74,6 +101,8 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
|
|
|
74
101
|
- Configurable output scan severities and per-category thresholds via `[output_scan]`.
|
|
75
102
|
- JSON report section for output scan findings including category, count, threshold, severity, and sample locations.
|
|
76
103
|
|
|
104
|
+
[0.6.0]: https://github.com/ababic/dumpling/compare/v0.5.0...v0.6.0
|
|
105
|
+
[0.5.0]: https://github.com/ababic/dumpling/compare/v0.4.3...v0.5.0
|
|
77
106
|
[0.4.3]: https://github.com/ababic/dumpling/compare/v0.4.2...v0.4.3
|
|
78
107
|
[0.4.2]: https://github.com/ababic/dumpling/compare/v0.4.1...v0.4.2
|
|
79
108
|
[0.4.1]: https://github.com/ababic/dumpling/compare/v0.4.0...v0.4.1
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dumpling-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Classifier: Development Status :: 4 - Beta
|
|
5
5
|
Classifier: Environment :: Console
|
|
6
6
|
Classifier: Intended Audience :: Developers
|
|
@@ -129,6 +129,16 @@ If no configuration is found, Dumpling fails closed by default and exits non-zer
|
|
|
129
129
|
The error output lists every checked location. Use `--allow-noop` to explicitly
|
|
130
130
|
permit no-op behavior.
|
|
131
131
|
|
|
132
|
+
### Dump seal (always on)
|
|
133
|
+
|
|
134
|
+
Every successful run that writes output prefixes the stream with a single-line SQL comment:
|
|
135
|
+
|
|
136
|
+
`-- dumpling-seal: v=2 version=<semver> profile=<standard|hardened> sha256=<64 hex chars>`
|
|
137
|
+
|
|
138
|
+
The `sha256` is over canonical JSON that includes the Dumpling version, the active security profile, a stable encoding of the resolved policy (rules, row filters, column cases, sensitive columns, output scan, global salt), and **runtime options** that affect transforms: `--format`, sorted `--include-table` / `--exclude-table` patterns, and the effective `--seed` / `DUMPLING_SEED` value in standard profile (`null` in hardened, where seeds are ignored).
|
|
139
|
+
|
|
140
|
+
If the **input** already begins with a seal line and it **matches** the current run, Dumpling copies the rest of the file through unchanged. If the line looks like a seal but does **not** match (stale policy, different flags, or older `v=`), that line is **dropped** and the dump is re-processed so you do not end up with two seal lines. `--strict-coverage` cannot be combined with a matching seal (table definitions are not scanned in passthrough mode). `--check` writes no output and therefore emits no seal line.
|
|
141
|
+
|
|
132
142
|
---
|
|
133
143
|
|
|
134
144
|
## Configuration (TOML)
|
|
@@ -142,8 +152,8 @@ salt = "${DUMPLING_GLOBAL_SALT}"
|
|
|
142
152
|
|
|
143
153
|
# Rules are keyed by either "table" or "schema.table"
|
|
144
154
|
[rules."public.users"]
|
|
145
|
-
email = { strategy = "
|
|
146
|
-
name = { strategy = "
|
|
155
|
+
email = { strategy = "email", domain = "customer_identity", unique_within_domain = true }
|
|
156
|
+
name = { strategy = "name", locale = "de_de" } # German-locale name
|
|
147
157
|
ssn = { strategy = "hash", salt = "${env:DUMPLING_USERS_SSN_SALT}", as_string = true } # SHA-256 of original (salted)
|
|
148
158
|
age = { strategy = "int_range", min = 18, max = 90 }
|
|
149
159
|
|
|
@@ -183,8 +193,12 @@ token = "high"
|
|
|
183
193
|
| `redact` | Replace with `REDACTED` (string) |
|
|
184
194
|
| `uuid` | Random UUIDv4-like string |
|
|
185
195
|
| `hash` | SHA-256 hex of original value; supports per-column `salt` and global `salt` |
|
|
186
|
-
| `
|
|
196
|
+
| `email` | Safe email address (same generator as `faker = "internet::SafeEmail"`); supports `locale` |
|
|
197
|
+
| `name` | Full name (same as `faker = "name::Name"`); supports `locale` |
|
|
198
|
+
| `first_name` | First name (same as `faker = "name::FirstName"`); supports `locale` |
|
|
199
|
+
| `last_name` | Last name (same as `faker = "name::LastName"`); supports `locale` |
|
|
187
200
|
| `phone` | Locale-aware fake phone number (configurable via `locale`); defaults to English format |
|
|
201
|
+
| `faker` | Values from the Rust [`fake`](https://crates.io/crates/fake) crate ([docs.rs](https://docs.rs/fake/latest/fake/), [`faker` modules](https://docs.rs/fake/latest/fake/faker/index.html)), chosen by a **string identifier** only (`faker = "module::Type"`, e.g. `internet::SafeEmail`). Config is **data only**: nothing from TOML is compiled or executed as Rust at runtime. Use `locale` for locale-aware generators; optional `min`/`max`, `length`, `format` as documented. Unsupported targets fail at config load. New generators require a **new Dumpling release** (or your own fork), not config-side code. |
|
|
188
202
|
| `int_range` | Random integer in `[min, max]` |
|
|
189
203
|
| `string` | Random alphanumeric string (`length = 12` by default) |
|
|
190
204
|
| `date_fuzz` | Shifts a date by a random number of days in `[min_days, max_days]` (defaults: `-30..30`) |
|
|
@@ -244,7 +258,7 @@ dumpling --security-profile hardened --input dump.sql --check
|
|
|
244
258
|
- `unique_within_domain`: when true, different source values are assigned unique pseudonyms within the configured `domain`. NULL values are unaffected and always remain NULL.
|
|
245
259
|
- `min_days` / `max_days`: used by `date_fuzz`.
|
|
246
260
|
- `min_seconds` / `max_seconds`: used by `time_fuzz` and `datetime_fuzz`.
|
|
247
|
-
- `locale`: selects the language/regional format for
|
|
261
|
+
- `locale`: selects the language/regional format for `email`, `name`, `first_name`, `last_name`, `faker`, and `phone`. Supported values: `en`, `fr_fr`, `de_de`, `it_it`, `pt_br`, `pt_pt`, `ar_sa`, `zh_cn`, `zh_tw`, `ja_jp`, `cy_gb`. Defaults to `en` when not specified.
|
|
248
262
|
- `faker`: required when `strategy = "faker"`. A plain string `"module::Type"` (case-insensitive) that maps to a **built-in** generator compiled into Dumpling—not arbitrary Rust or expressions. Names follow [`fake::faker`](https://docs.rs/fake/latest/fake/faker/index.html) (e.g. `internet::SafeEmail` → `faker::internet::SafeEmail` in the crate).
|
|
249
263
|
- `format`: used with `faker = "number::NumberWithFormat"`; pattern uses `#` (0–9) and `^` (1–9) per the [`fake` crate docs](https://docs.rs/fake/latest/fake/).
|
|
250
264
|
|
|
@@ -415,7 +429,7 @@ Define default strategies in `rules."<table>"` and add ordered per-column cases
|
|
|
415
429
|
```toml
|
|
416
430
|
[rules."public.users"]
|
|
417
431
|
email = { strategy = "hash", as_string = true } # default
|
|
418
|
-
name = { strategy = "
|
|
432
|
+
name = { strategy = "name" }
|
|
419
433
|
|
|
420
434
|
[[column_cases."public.users".email]]
|
|
421
435
|
when.any = [{ column = "is_admin", op = "eq", value = "true" }]
|
|
@@ -466,7 +480,7 @@ salt = "${DUMPLING_HMAC_KEY}"
|
|
|
466
480
|
|
|
467
481
|
[rules."public.users"]
|
|
468
482
|
ssn = { strategy = "hash", as_string = true }
|
|
469
|
-
email = { strategy = "
|
|
483
|
+
email = { strategy = "email", domain = "users" }
|
|
470
484
|
```
|
|
471
485
|
|
|
472
486
|
```bash
|
|
@@ -108,6 +108,16 @@ If no configuration is found, Dumpling fails closed by default and exits non-zer
|
|
|
108
108
|
The error output lists every checked location. Use `--allow-noop` to explicitly
|
|
109
109
|
permit no-op behavior.
|
|
110
110
|
|
|
111
|
+
### Dump seal (always on)
|
|
112
|
+
|
|
113
|
+
Every successful run that writes output prefixes the stream with a single-line SQL comment:
|
|
114
|
+
|
|
115
|
+
`-- dumpling-seal: v=2 version=<semver> profile=<standard|hardened> sha256=<64 hex chars>`
|
|
116
|
+
|
|
117
|
+
The `sha256` is over canonical JSON that includes the Dumpling version, the active security profile, a stable encoding of the resolved policy (rules, row filters, column cases, sensitive columns, output scan, global salt), and **runtime options** that affect transforms: `--format`, sorted `--include-table` / `--exclude-table` patterns, and the effective `--seed` / `DUMPLING_SEED` value in standard profile (`null` in hardened, where seeds are ignored).
|
|
118
|
+
|
|
119
|
+
If the **input** already begins with a seal line and it **matches** the current run, Dumpling copies the rest of the file through unchanged. If the line looks like a seal but does **not** match (stale policy, different flags, or older `v=`), that line is **dropped** and the dump is re-processed so you do not end up with two seal lines. `--strict-coverage` cannot be combined with a matching seal (table definitions are not scanned in passthrough mode). `--check` writes no output and therefore emits no seal line.
|
|
120
|
+
|
|
111
121
|
---
|
|
112
122
|
|
|
113
123
|
## Configuration (TOML)
|
|
@@ -121,8 +131,8 @@ salt = "${DUMPLING_GLOBAL_SALT}"
|
|
|
121
131
|
|
|
122
132
|
# Rules are keyed by either "table" or "schema.table"
|
|
123
133
|
[rules."public.users"]
|
|
124
|
-
email = { strategy = "
|
|
125
|
-
name = { strategy = "
|
|
134
|
+
email = { strategy = "email", domain = "customer_identity", unique_within_domain = true }
|
|
135
|
+
name = { strategy = "name", locale = "de_de" } # German-locale name
|
|
126
136
|
ssn = { strategy = "hash", salt = "${env:DUMPLING_USERS_SSN_SALT}", as_string = true } # SHA-256 of original (salted)
|
|
127
137
|
age = { strategy = "int_range", min = 18, max = 90 }
|
|
128
138
|
|
|
@@ -162,8 +172,12 @@ token = "high"
|
|
|
162
172
|
| `redact` | Replace with `REDACTED` (string) |
|
|
163
173
|
| `uuid` | Random UUIDv4-like string |
|
|
164
174
|
| `hash` | SHA-256 hex of original value; supports per-column `salt` and global `salt` |
|
|
165
|
-
| `
|
|
175
|
+
| `email` | Safe email address (same generator as `faker = "internet::SafeEmail"`); supports `locale` |
|
|
176
|
+
| `name` | Full name (same as `faker = "name::Name"`); supports `locale` |
|
|
177
|
+
| `first_name` | First name (same as `faker = "name::FirstName"`); supports `locale` |
|
|
178
|
+
| `last_name` | Last name (same as `faker = "name::LastName"`); supports `locale` |
|
|
166
179
|
| `phone` | Locale-aware fake phone number (configurable via `locale`); defaults to English format |
|
|
180
|
+
| `faker` | Values from the Rust [`fake`](https://crates.io/crates/fake) crate ([docs.rs](https://docs.rs/fake/latest/fake/), [`faker` modules](https://docs.rs/fake/latest/fake/faker/index.html)), chosen by a **string identifier** only (`faker = "module::Type"`, e.g. `internet::SafeEmail`). Config is **data only**: nothing from TOML is compiled or executed as Rust at runtime. Use `locale` for locale-aware generators; optional `min`/`max`, `length`, `format` as documented. Unsupported targets fail at config load. New generators require a **new Dumpling release** (or your own fork), not config-side code. |
|
|
167
181
|
| `int_range` | Random integer in `[min, max]` |
|
|
168
182
|
| `string` | Random alphanumeric string (`length = 12` by default) |
|
|
169
183
|
| `date_fuzz` | Shifts a date by a random number of days in `[min_days, max_days]` (defaults: `-30..30`) |
|
|
@@ -223,7 +237,7 @@ dumpling --security-profile hardened --input dump.sql --check
|
|
|
223
237
|
- `unique_within_domain`: when true, different source values are assigned unique pseudonyms within the configured `domain`. NULL values are unaffected and always remain NULL.
|
|
224
238
|
- `min_days` / `max_days`: used by `date_fuzz`.
|
|
225
239
|
- `min_seconds` / `max_seconds`: used by `time_fuzz` and `datetime_fuzz`.
|
|
226
|
-
- `locale`: selects the language/regional format for
|
|
240
|
+
- `locale`: selects the language/regional format for `email`, `name`, `first_name`, `last_name`, `faker`, and `phone`. Supported values: `en`, `fr_fr`, `de_de`, `it_it`, `pt_br`, `pt_pt`, `ar_sa`, `zh_cn`, `zh_tw`, `ja_jp`, `cy_gb`. Defaults to `en` when not specified.
|
|
227
241
|
- `faker`: required when `strategy = "faker"`. A plain string `"module::Type"` (case-insensitive) that maps to a **built-in** generator compiled into Dumpling—not arbitrary Rust or expressions. Names follow [`fake::faker`](https://docs.rs/fake/latest/fake/faker/index.html) (e.g. `internet::SafeEmail` → `faker::internet::SafeEmail` in the crate).
|
|
228
242
|
- `format`: used with `faker = "number::NumberWithFormat"`; pattern uses `#` (0–9) and `^` (1–9) per the [`fake` crate docs](https://docs.rs/fake/latest/fake/).
|
|
229
243
|
|
|
@@ -394,7 +408,7 @@ Define default strategies in `rules."<table>"` and add ordered per-column cases
|
|
|
394
408
|
```toml
|
|
395
409
|
[rules."public.users"]
|
|
396
410
|
email = { strategy = "hash", as_string = true } # default
|
|
397
|
-
name = { strategy = "
|
|
411
|
+
name = { strategy = "name" }
|
|
398
412
|
|
|
399
413
|
[[column_cases."public.users".email]]
|
|
400
414
|
when.any = [{ column = "is_admin", op = "eq", value = "true" }]
|
|
@@ -445,7 +459,7 @@ salt = "${DUMPLING_HMAC_KEY}"
|
|
|
445
459
|
|
|
446
460
|
[rules."public.users"]
|
|
447
461
|
ssn = { strategy = "hash", as_string = true }
|
|
448
|
-
email = { strategy = "
|
|
462
|
+
email = { strategy = "email", domain = "users" }
|
|
449
463
|
```
|
|
450
464
|
|
|
451
465
|
```bash
|
|
@@ -11,7 +11,7 @@ This project uses **tag-driven releases**.
|
|
|
11
11
|
## Maintainer checklist
|
|
12
12
|
|
|
13
13
|
1. Ensure `main` is green in CI.
|
|
14
|
-
2. Update `Cargo.toml`
|
|
14
|
+
2. Update `Cargo.toml` and `pyproject.toml` versions and `CHANGELOG.md`.
|
|
15
15
|
3. Open and merge a release preparation PR.
|
|
16
16
|
4. Create and push a tag from `main`:
|
|
17
17
|
|
|
@@ -89,6 +89,71 @@ pub fn parse_faker_path(faker: &str) -> Option<(&str, &str)> {
|
|
|
89
89
|
Some((module, typ))
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
+
/// Normalized locale key for `faker`, `phone`, and built-in PII strategies (`email`, `name`, …).
|
|
93
|
+
/// Uses ASCII case-insensitive matching without allocating.
|
|
94
|
+
pub fn resolved_locale_key(spec: &AnonymizerSpec) -> &'static str {
|
|
95
|
+
let s = spec.locale.as_deref().map(str::trim).unwrap_or("");
|
|
96
|
+
if s.is_empty() || s.eq_ignore_ascii_case("en") {
|
|
97
|
+
return "en";
|
|
98
|
+
}
|
|
99
|
+
if s.eq_ignore_ascii_case("fr_fr") {
|
|
100
|
+
return "fr_fr";
|
|
101
|
+
}
|
|
102
|
+
if s.eq_ignore_ascii_case("de_de") {
|
|
103
|
+
return "de_de";
|
|
104
|
+
}
|
|
105
|
+
if s.eq_ignore_ascii_case("it_it") {
|
|
106
|
+
return "it_it";
|
|
107
|
+
}
|
|
108
|
+
if s.eq_ignore_ascii_case("pt_br") {
|
|
109
|
+
return "pt_br";
|
|
110
|
+
}
|
|
111
|
+
if s.eq_ignore_ascii_case("pt_pt") {
|
|
112
|
+
return "pt_pt";
|
|
113
|
+
}
|
|
114
|
+
if s.eq_ignore_ascii_case("ar_sa") {
|
|
115
|
+
return "ar_sa";
|
|
116
|
+
}
|
|
117
|
+
if s.eq_ignore_ascii_case("zh_cn") {
|
|
118
|
+
return "zh_cn";
|
|
119
|
+
}
|
|
120
|
+
if s.eq_ignore_ascii_case("zh_tw") {
|
|
121
|
+
return "zh_tw";
|
|
122
|
+
}
|
|
123
|
+
if s.eq_ignore_ascii_case("ja_jp") {
|
|
124
|
+
return "ja_jp";
|
|
125
|
+
}
|
|
126
|
+
if s.eq_ignore_ascii_case("cy_gb") {
|
|
127
|
+
return "cy_gb";
|
|
128
|
+
}
|
|
129
|
+
"en"
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/// Built-in `strategy = "email"` — same generator as `faker = "internet::SafeEmail"`.
|
|
133
|
+
pub fn pii_safe_email(loc: &str, rng: &mut StdRng) -> String {
|
|
134
|
+
fl!(loc, rng, SafeEmail)
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/// Built-in `strategy = "name"` — full name.
|
|
138
|
+
pub fn pii_full_name(loc: &str, rng: &mut StdRng) -> String {
|
|
139
|
+
fl!(loc, rng, Name)
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/// Built-in `strategy = "first_name"`.
|
|
143
|
+
pub fn pii_first_name(loc: &str, rng: &mut StdRng) -> String {
|
|
144
|
+
fl!(loc, rng, FirstName)
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/// Built-in `strategy = "last_name"`.
|
|
148
|
+
pub fn pii_last_name(loc: &str, rng: &mut StdRng) -> String {
|
|
149
|
+
fl!(loc, rng, LastName)
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/// Built-in `strategy = "phone"` — same generator as `faker` phone_number fakers.
|
|
153
|
+
pub fn pii_phone_number(loc: &str, rng: &mut StdRng) -> String {
|
|
154
|
+
fl!(loc, rng, PhoneNumber)
|
|
155
|
+
}
|
|
156
|
+
|
|
92
157
|
pub fn faker_string_with_rng(spec: &AnonymizerSpec, rng: &mut StdRng) -> Option<String> {
|
|
93
158
|
let faker = spec.faker.as_deref()?.trim();
|
|
94
159
|
if faker.is_empty() {
|
|
@@ -97,12 +162,7 @@ pub fn faker_string_with_rng(spec: &AnonymizerSpec, rng: &mut StdRng) -> Option<
|
|
|
97
162
|
let (module, typ) = parse_faker_path(faker)?;
|
|
98
163
|
let module_lc = module.to_ascii_lowercase();
|
|
99
164
|
let typ_lc = typ.to_ascii_lowercase();
|
|
100
|
-
let
|
|
101
|
-
.locale
|
|
102
|
-
.as_deref()
|
|
103
|
-
.map(|l| l.trim().to_ascii_lowercase())
|
|
104
|
-
.unwrap_or_else(|| "en".to_string());
|
|
105
|
-
let loc = locale.as_str();
|
|
165
|
+
let loc = resolved_locale_key(spec);
|
|
106
166
|
|
|
107
167
|
let s: String = match (module_lc.as_str(), typ_lc.as_str()) {
|
|
108
168
|
("name", "firstname") => fl!(loc, rng, FirstName),
|