dumpling-cli 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/.dumplingconf.example +11 -7
- dumpling_cli-0.3.0/.github/workflows/docs-pr.yml +35 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/.github/workflows/docs.yml +9 -11
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/.gitignore +1 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/AGENTS.md +17 -1
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/CHANGELOG.md +14 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/CONTRIBUTING.md +8 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/Cargo.lock +1 -1
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/Cargo.toml +1 -1
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/PKG-INFO +15 -8
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/README.md +14 -7
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/docs/src/ci-guardrails.md +1 -1
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/docs/src/configuration.md +21 -2
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/docs/src/getting-started.md +3 -1
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/docs/src/index.md +3 -3
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/pyproject.toml +1 -1
- dumpling_cli-0.3.0/rust-toolchain.toml +3 -0
- dumpling_cli-0.3.0/scripts/setup-dev.sh +89 -0
- dumpling_cli-0.3.0/src/faker_dispatch.rs +521 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/src/filter.rs +115 -41
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/src/lint.rs +6 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/src/main.rs +1 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/src/scan.rs +1 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/src/settings.rs +245 -29
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/src/sql.rs +276 -95
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/src/transform.rs +48 -119
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/.github/workflows/ci.yml +0 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/.github/workflows/platform-compat-latest.yml +0 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/.github/workflows/platform-compat-matrix.yml +0 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/.github/workflows/policy-lint.yml +0 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/.github/workflows/publish.yml +0 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/.github/workflows/release.yml +0 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/.github/workflows/tests.yml +0 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/MAINTENANCE.md +0 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/book.toml +0 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/datetime_out.sql +0 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/datetime_sample.sql +0 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/docs/src/SUMMARY.md +0 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/docs/src/releasing.md +0 -0
- {dumpling_cli-0.2.0 → dumpling_cli-0.3.0}/src/report.rs +0 -0
|
@@ -26,13 +26,17 @@ salt = "${DUMPLING_GLOBAL_SALT}"
|
|
|
26
26
|
#
|
|
27
27
|
# Each column maps to an anonymizer spec: { strategy = "…", <options> }
|
|
28
28
|
# ---------------------------------------------------------------------------
|
|
29
|
+
# Faker strategy: `faker = "module::Type"` matches the Rust `fake` crate layout.
|
|
30
|
+
# Crate docs: https://docs.rs/fake/latest/fake/
|
|
31
|
+
# Faker modules: https://docs.rs/fake/latest/fake/faker/index.html
|
|
32
|
+
# Upstream repo: https://github.com/cksac/fake-rs
|
|
29
33
|
[rules."public.users"]
|
|
30
|
-
# email —
|
|
31
|
-
email = { strategy = "
|
|
32
|
-
# name —
|
|
33
|
-
full_name = { strategy = "name" }
|
|
34
|
-
first_name = { strategy = "
|
|
35
|
-
last_name = { strategy = "
|
|
34
|
+
# email — fake email via Rust `fake` crate; force quoted string output
|
|
35
|
+
email = { strategy = "faker", faker = "internet::SafeEmail", domain = "customer_identity", unique_within_domain = true }
|
|
36
|
+
# name — locale-aware full name (see `locale`); other generators use `faker = "module::Type"`
|
|
37
|
+
full_name = { strategy = "faker", faker = "name::Name" }
|
|
38
|
+
first_name = { strategy = "faker", faker = "name::FirstName" }
|
|
39
|
+
last_name = { strategy = "faker", faker = "name::LastName" }
|
|
36
40
|
# phone — US-style (xxx) xxx-xxxx
|
|
37
41
|
phone = { strategy = "phone" }
|
|
38
42
|
# ssn — SHA-256 hex of original; use per-column salt for extra protection
|
|
@@ -58,7 +62,7 @@ wake_time = { strategy = "time_fuzz", min_seconds = -3600, max_seconds = 360
|
|
|
58
62
|
# credit card — redact entirely; force as quoted string
|
|
59
63
|
credit_card = { strategy = "redact", as_string = true }
|
|
60
64
|
# keep the same anonymized email as users table via shared domain
|
|
61
|
-
customer_email = { strategy = "
|
|
65
|
+
customer_email = { strategy = "faker", faker = "internet::SafeEmail", domain = "customer_identity" }
|
|
62
66
|
|
|
63
67
|
[rules."public.audit_log"]
|
|
64
68
|
# unqualified table name also works (matches any schema)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# mdBook verification on pull requests only (no GitHub Pages upload or deploy).
|
|
2
|
+
# Pages build + deploy live in docs.yml and run on pushes to main.
|
|
3
|
+
name: Docs (PR)
|
|
4
|
+
|
|
5
|
+
on:
|
|
6
|
+
pull_request:
|
|
7
|
+
paths:
|
|
8
|
+
- "README.md"
|
|
9
|
+
- "book.toml"
|
|
10
|
+
- "docs/**"
|
|
11
|
+
- ".github/workflows/docs.yml"
|
|
12
|
+
- ".github/workflows/docs-pr.yml"
|
|
13
|
+
|
|
14
|
+
permissions:
|
|
15
|
+
contents: read
|
|
16
|
+
|
|
17
|
+
concurrency:
|
|
18
|
+
group: docs-pr-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
|
19
|
+
cancel-in-progress: true
|
|
20
|
+
|
|
21
|
+
jobs:
|
|
22
|
+
verify:
|
|
23
|
+
name: Build mdBook (verify)
|
|
24
|
+
runs-on: ubuntu-latest
|
|
25
|
+
steps:
|
|
26
|
+
- name: Checkout
|
|
27
|
+
uses: actions/checkout@v4
|
|
28
|
+
|
|
29
|
+
- name: Install mdBook
|
|
30
|
+
uses: peaceiris/actions-mdbook@v2
|
|
31
|
+
with:
|
|
32
|
+
mdbook-version: "0.4.52"
|
|
33
|
+
|
|
34
|
+
- name: Build documentation site
|
|
35
|
+
run: mdbook build
|
|
@@ -1,12 +1,8 @@
|
|
|
1
|
+
# Build and deploy the mdBook site to GitHub Pages (main branch only).
|
|
2
|
+
# Pull-request verification runs in docs-pr.yml — this workflow does not run on PRs.
|
|
1
3
|
name: Docs
|
|
2
4
|
|
|
3
5
|
on:
|
|
4
|
-
pull_request:
|
|
5
|
-
paths:
|
|
6
|
-
- "README.md"
|
|
7
|
-
- "book.toml"
|
|
8
|
-
- "docs/**"
|
|
9
|
-
- ".github/workflows/docs.yml"
|
|
10
6
|
push:
|
|
11
7
|
branches:
|
|
12
8
|
- main
|
|
@@ -18,16 +14,16 @@ on:
|
|
|
18
14
|
|
|
19
15
|
permissions:
|
|
20
16
|
contents: read
|
|
21
|
-
pages: write
|
|
22
|
-
id-token: write
|
|
23
17
|
|
|
24
18
|
concurrency:
|
|
25
|
-
group: docs-${{ github.ref }}
|
|
19
|
+
group: docs-pages-${{ github.ref }}
|
|
26
20
|
cancel-in-progress: true
|
|
27
21
|
|
|
28
22
|
jobs:
|
|
29
23
|
build:
|
|
30
24
|
runs-on: ubuntu-latest
|
|
25
|
+
permissions:
|
|
26
|
+
contents: read
|
|
31
27
|
steps:
|
|
32
28
|
- name: Checkout
|
|
33
29
|
uses: actions/checkout@v4
|
|
@@ -40,15 +36,17 @@ jobs:
|
|
|
40
36
|
- name: Build documentation site
|
|
41
37
|
run: mdbook build
|
|
42
38
|
|
|
43
|
-
- name: Upload
|
|
39
|
+
- name: Upload Pages deployment artifact
|
|
44
40
|
uses: actions/upload-pages-artifact@v3
|
|
45
41
|
with:
|
|
46
42
|
path: docs/book
|
|
47
43
|
|
|
48
44
|
deploy:
|
|
49
|
-
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
|
50
45
|
needs: build
|
|
51
46
|
runs-on: ubuntu-latest
|
|
47
|
+
permissions:
|
|
48
|
+
pages: write
|
|
49
|
+
id-token: write
|
|
52
50
|
environment:
|
|
53
51
|
name: github-pages
|
|
54
52
|
url: ${{ steps.deployment.outputs.page_url }}
|
|
@@ -223,6 +223,8 @@ Follow these steps in order. Do not skip any step.
|
|
|
223
223
|
|
|
224
224
|
8. **`README.md`**: Add a row to the "Anonymization strategies" table.
|
|
225
225
|
|
|
226
|
+
**`faker` strategy:** Config only carries string identifiers; Dumpling never evaluates user Rust from config. To ship a new generator, add dispatch in `src/faker_dispatch.rs` and validation in `validate_anonymizer_spec` for the `faker` branch. Upstream reference: [`fake` on docs.rs](https://docs.rs/fake/latest/fake/), [`fake::faker` module index](https://docs.rs/fake/latest/fake/faker/index.html), [source on GitHub](https://github.com/cksac/fake-rs).
|
|
227
|
+
|
|
226
228
|
---
|
|
227
229
|
|
|
228
230
|
## How to Add a New Row Filter Predicate Operator
|
|
@@ -274,15 +276,29 @@ Follow these steps in order. Do not skip any step.
|
|
|
274
276
|
|
|
275
277
|
This is a pure Rust CLI project with **no external services** (no database, Docker, or network dependencies). The Rust stable toolchain (rustc + cargo) is the only prerequisite.
|
|
276
278
|
|
|
279
|
+
### One-shot environment (agents and humans)
|
|
280
|
+
|
|
281
|
+
From the repository root:
|
|
282
|
+
|
|
283
|
+
```bash
|
|
284
|
+
./scripts/setup-dev.sh
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
This installs the **stable** toolchain with **rustfmt** and **clippy** (via `rustup` when available), runs **`cargo fetch`**, and installs a pinned **mdBook** binary under `.tools/` (same version as the Docs CI workflow) so you can run `mdbook build` without a global install. Add `.tools` to `PATH` for convenience, or invoke `.tools/mdbook build` directly.
|
|
288
|
+
|
|
289
|
+
The repo root **`rust-toolchain.toml`** pins **stable** and the **components** CI uses, so `cargo` automatically selects the right toolchain in fresh checkouts.
|
|
290
|
+
|
|
277
291
|
### Quick reference
|
|
278
292
|
|
|
279
293
|
| Task | Command |
|
|
280
294
|
|------|---------|
|
|
295
|
+
| Setup (toolchain + fetch + mdbook) | `./scripts/setup-dev.sh` |
|
|
281
296
|
| Build | `cargo build` |
|
|
282
297
|
| Test | `cargo test --all-targets --all-features` |
|
|
283
298
|
| Lint | `cargo clippy --all-targets --all-features` |
|
|
284
299
|
| Format check | `cargo fmt --all -- --check` |
|
|
285
300
|
| Auto-format | `cargo fmt` |
|
|
301
|
+
| Docs site (mdBook) | `mdbook build` or `.tools/mdbook build` after setup |
|
|
286
302
|
| Run CLI | `./target/debug/dumpling --help` |
|
|
287
303
|
|
|
288
304
|
### Running the CLI
|
|
@@ -295,6 +311,6 @@ Dumpling is fail-closed by default — it exits non-zero without a config file.
|
|
|
295
311
|
|
|
296
312
|
### Notes
|
|
297
313
|
|
|
298
|
-
- All
|
|
314
|
+
- All tests are inline `#[cfg(test)]` modules; there are no separate test files or fixtures to manage.
|
|
299
315
|
- The update script uses `cargo fetch` to pre-download crate dependencies. A full `cargo build` or `cargo test` will then compile from the local cache without network access.
|
|
300
316
|
- No environment variables or secrets are required for building, testing, or running the CLI locally.
|
|
@@ -7,6 +7,19 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.3.0] - 2026-05-02
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- **`faker` anonymization strategy** backed by the Rust [`fake`](https://crates.io/crates/fake) crate: select generators with `faker = "module::Type"` (for example `internet::SafeEmail`, `name::Name`). Unsupported targets fail at config load with a clear error; extending the allowlist requires a Dumpling release (see `src/faker_dispatch.rs`).
|
|
15
|
+
- **JSON path rules in `[rules]`**: column keys such as `payload.profile.email` or `payload__profile__email` apply strategies to nested fields inside JSON text columns while preserving document structure. Conflicts between a whole-column rule and JSON path rules for the same base column are rejected at validation.
|
|
16
|
+
- **`format` on `AnonymizerSpec`** for pattern-based faker generators such as `number::NumberWithFormat`.
|
|
17
|
+
|
|
18
|
+
### Changed
|
|
19
|
+
|
|
20
|
+
- **Legacy strategy names** `email`, `name`, `first_name`, and `last_name` in config are normalized at load time to `strategy = "faker"` with the same defaults as before (`internet::SafeEmail`, `name::Name`, `name::FirstName`, `name::LastName`), so existing configs keep working.
|
|
21
|
+
- **`locale`** applies to both `faker` and `phone` strategies.
|
|
22
|
+
|
|
10
23
|
## [0.2.0] - 2026-05-02
|
|
11
24
|
|
|
12
25
|
### Added
|
|
@@ -30,4 +43,5 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
|
|
|
30
43
|
- Configurable output scan severities and per-category thresholds via `[output_scan]`.
|
|
31
44
|
- JSON report section for output scan findings including category, count, threshold, severity, and sample locations.
|
|
32
45
|
|
|
46
|
+
[0.3.0]: https://github.com/ababic/dumpling/compare/v0.2.0...v0.3.0
|
|
33
47
|
[0.2.0]: https://github.com/ababic/dumpling/compare/v0.1.0...v0.2.0
|
|
@@ -13,6 +13,14 @@ For AI coding agents: also read `AGENTS.md`, which contains more detailed techni
|
|
|
13
13
|
- **Rust stable toolchain** — install via [rustup.rs](https://rustup.rs/).
|
|
14
14
|
- No database, Docker, or external services are required. Dumpling is a pure CLI tool.
|
|
15
15
|
|
|
16
|
+
### One-shot setup (recommended)
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
./scripts/setup-dev.sh
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Installs stable + `rustfmt` + `clippy`, prefetches crates, and downloads a pinned **mdBook** under `.tools/` (for `mdbook build`, same version as CI). Optional: `export PATH="$PWD/.tools:$PATH"`.
|
|
23
|
+
|
|
16
24
|
### Build and run
|
|
17
25
|
|
|
18
26
|
```bash
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dumpling-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Classifier: Development Status :: 4 - Beta
|
|
5
5
|
Classifier: Environment :: Console
|
|
6
6
|
Classifier: Intended Audience :: Developers
|
|
@@ -112,8 +112,8 @@ salt = "${DUMPLING_GLOBAL_SALT}"
|
|
|
112
112
|
|
|
113
113
|
# Rules are keyed by either "table" or "schema.table"
|
|
114
114
|
[rules."public.users"]
|
|
115
|
-
email = { strategy = "
|
|
116
|
-
name = { strategy = "name", locale = "de_de" } # German-locale name
|
|
115
|
+
email = { strategy = "faker", faker = "internet::SafeEmail", domain = "customer_identity", unique_within_domain = true }
|
|
116
|
+
name = { strategy = "faker", faker = "name::Name", locale = "de_de" } # German-locale name
|
|
117
117
|
ssn = { strategy = "hash", salt = "${env:DUMPLING_USERS_SSN_SALT}", as_string = true } # SHA-256 of original (salted)
|
|
118
118
|
age = { strategy = "int_range", min = 18, max = 90 }
|
|
119
119
|
|
|
@@ -153,8 +153,7 @@ token = "high"
|
|
|
153
153
|
| `redact` | Replace with `REDACTED` (string) |
|
|
154
154
|
| `uuid` | Random UUIDv4-like string |
|
|
155
155
|
| `hash` | SHA-256 hex of original value; supports per-column `salt` and global `salt` |
|
|
156
|
-
| `
|
|
157
|
-
| `name` / `first_name` / `last_name` | Locale-aware fake name (configurable via `locale`); defaults to English |
|
|
156
|
+
| `faker` | Values from the Rust [`fake`](https://crates.io/crates/fake) crate ([docs.rs](https://docs.rs/fake/latest/fake/), [`faker` modules](https://docs.rs/fake/latest/fake/faker/index.html)), chosen by a **string identifier** only (`faker = "module::Type"`, e.g. `internet::SafeEmail`). Config is **data only**: nothing from TOML is compiled or executed as Rust at runtime. Use `locale` for locale-aware generators; optional `min`/`max`, `length`, `format` as documented. Unsupported targets fail at config load. New generators require a **new Dumpling release** (or your own fork), not config-side code. |
|
|
158
157
|
| `phone` | Locale-aware fake phone number (configurable via `locale`); defaults to English format |
|
|
159
158
|
| `int_range` | Random integer in `[min, max]` |
|
|
160
159
|
| `string` | Random alphanumeric string (`length = 12` by default) |
|
|
@@ -162,6 +161,12 @@ token = "high"
|
|
|
162
161
|
| `time_fuzz` | Shifts a time-of-day by a random number of seconds in `[min_seconds, max_seconds]` with 24h wraparound (defaults: `-300..300`) |
|
|
163
162
|
| `datetime_fuzz` | Shifts a timestamp/timestamptz by a random number of seconds in `[min_seconds, max_seconds]` (defaults: `-86400..86400`) |
|
|
164
163
|
|
|
164
|
+
**`faker` reference (upstream `fake` crate):** Dumpling’s `faker = "module::Type"` strings mirror the Rust [`fake`](https://crates.io/crates/fake) crate’s [`faker`](https://docs.rs/fake/latest/fake/faker/index.html) module layout. Use these when picking or extending generators:
|
|
165
|
+
|
|
166
|
+
- [docs.rs — `fake` crate root](https://docs.rs/fake/latest/fake/) (overview, `Fake` / `Dummy` traits, locales)
|
|
167
|
+
- [docs.rs — `fake::faker` module index](https://docs.rs/fake/latest/fake/faker/index.html) (per-domain submodules: `address`, `internet`, `name`, …)
|
|
168
|
+
- [GitHub — `cksac/fake-rs`](https://github.com/cksac/fake-rs) (source, README with the CLI’s generator name list)
|
|
169
|
+
|
|
165
170
|
### Secret references
|
|
166
171
|
|
|
167
172
|
Dumpling resolves secret references in string config fields so plaintext salts/keys
|
|
@@ -209,7 +214,9 @@ dumpling --security-profile hardened --input dump.sql --check
|
|
|
209
214
|
- `unique_within_domain`: when true, different source values are assigned unique pseudonyms within the configured `domain`. NULL values are unaffected and always remain NULL.
|
|
210
215
|
- `min_days` / `max_days`: used by `date_fuzz`.
|
|
211
216
|
- `min_seconds` / `max_seconds`: used by `time_fuzz` and `datetime_fuzz`.
|
|
212
|
-
- `locale`: selects the language/regional format for the `
|
|
217
|
+
- `locale`: selects the language/regional format for the `faker` and `phone` strategies. Supported values: `en`, `fr_fr`, `de_de`, `it_it`, `pt_br`, `pt_pt`, `ar_sa`, `zh_cn`, `zh_tw`, `ja_jp`, `cy_gb`. Defaults to `en` when not specified.
|
|
218
|
+
- `faker`: required when `strategy = "faker"`. A plain string `"module::Type"` (case-insensitive) that maps to a **built-in** generator compiled into Dumpling—not arbitrary Rust or expressions. Names follow [`fake::faker`](https://docs.rs/fake/latest/fake/faker/index.html) (e.g. `internet::SafeEmail` → `faker::internet::SafeEmail` in the crate).
|
|
219
|
+
- `format`: used with `faker = "number::NumberWithFormat"`; pattern uses `#` (0–9) and `^` (1–9) per the [`fake` crate docs](https://docs.rs/fake/latest/fake/).
|
|
213
220
|
|
|
214
221
|
> **Note:** `table_options` are no longer supported; use explicit `rules` and optional `column_cases`.
|
|
215
222
|
|
|
@@ -353,7 +360,7 @@ Define default strategies in `rules."<table>"` and add ordered per-column cases
|
|
|
353
360
|
```toml
|
|
354
361
|
[rules."public.users"]
|
|
355
362
|
email = { strategy = "hash", as_string = true } # default
|
|
356
|
-
name = { strategy = "name" }
|
|
363
|
+
name = { strategy = "faker", faker = "name::Name" }
|
|
357
364
|
|
|
358
365
|
[[column_cases."public.users".email]]
|
|
359
366
|
when.any = [{ column = "is_admin", op = "eq", value = "true" }]
|
|
@@ -404,7 +411,7 @@ salt = "${DUMPLING_HMAC_KEY}"
|
|
|
404
411
|
|
|
405
412
|
[rules."public.users"]
|
|
406
413
|
ssn = { strategy = "hash", as_string = true }
|
|
407
|
-
email = { strategy = "
|
|
414
|
+
email = { strategy = "faker", faker = "internet::SafeEmail", domain = "users" }
|
|
408
415
|
```
|
|
409
416
|
|
|
410
417
|
```bash
|
|
@@ -91,8 +91,8 @@ salt = "${DUMPLING_GLOBAL_SALT}"
|
|
|
91
91
|
|
|
92
92
|
# Rules are keyed by either "table" or "schema.table"
|
|
93
93
|
[rules."public.users"]
|
|
94
|
-
email = { strategy = "
|
|
95
|
-
name = { strategy = "name", locale = "de_de" } # German-locale name
|
|
94
|
+
email = { strategy = "faker", faker = "internet::SafeEmail", domain = "customer_identity", unique_within_domain = true }
|
|
95
|
+
name = { strategy = "faker", faker = "name::Name", locale = "de_de" } # German-locale name
|
|
96
96
|
ssn = { strategy = "hash", salt = "${env:DUMPLING_USERS_SSN_SALT}", as_string = true } # SHA-256 of original (salted)
|
|
97
97
|
age = { strategy = "int_range", min = 18, max = 90 }
|
|
98
98
|
|
|
@@ -132,8 +132,7 @@ token = "high"
|
|
|
132
132
|
| `redact` | Replace with `REDACTED` (string) |
|
|
133
133
|
| `uuid` | Random UUIDv4-like string |
|
|
134
134
|
| `hash` | SHA-256 hex of original value; supports per-column `salt` and global `salt` |
|
|
135
|
-
| `
|
|
136
|
-
| `name` / `first_name` / `last_name` | Locale-aware fake name (configurable via `locale`); defaults to English |
|
|
135
|
+
| `faker` | Values from the Rust [`fake`](https://crates.io/crates/fake) crate ([docs.rs](https://docs.rs/fake/latest/fake/), [`faker` modules](https://docs.rs/fake/latest/fake/faker/index.html)), chosen by a **string identifier** only (`faker = "module::Type"`, e.g. `internet::SafeEmail`). Config is **data only**: nothing from TOML is compiled or executed as Rust at runtime. Use `locale` for locale-aware generators; optional `min`/`max`, `length`, `format` as documented. Unsupported targets fail at config load. New generators require a **new Dumpling release** (or your own fork), not config-side code. |
|
|
137
136
|
| `phone` | Locale-aware fake phone number (configurable via `locale`); defaults to English format |
|
|
138
137
|
| `int_range` | Random integer in `[min, max]` |
|
|
139
138
|
| `string` | Random alphanumeric string (`length = 12` by default) |
|
|
@@ -141,6 +140,12 @@ token = "high"
|
|
|
141
140
|
| `time_fuzz` | Shifts a time-of-day by a random number of seconds in `[min_seconds, max_seconds]` with 24h wraparound (defaults: `-300..300`) |
|
|
142
141
|
| `datetime_fuzz` | Shifts a timestamp/timestamptz by a random number of seconds in `[min_seconds, max_seconds]` (defaults: `-86400..86400`) |
|
|
143
142
|
|
|
143
|
+
**`faker` reference (upstream `fake` crate):** Dumpling’s `faker = "module::Type"` strings mirror the Rust [`fake`](https://crates.io/crates/fake) crate’s [`faker`](https://docs.rs/fake/latest/fake/faker/index.html) module layout. Use these when picking or extending generators:
|
|
144
|
+
|
|
145
|
+
- [docs.rs — `fake` crate root](https://docs.rs/fake/latest/fake/) (overview, `Fake` / `Dummy` traits, locales)
|
|
146
|
+
- [docs.rs — `fake::faker` module index](https://docs.rs/fake/latest/fake/faker/index.html) (per-domain submodules: `address`, `internet`, `name`, …)
|
|
147
|
+
- [GitHub — `cksac/fake-rs`](https://github.com/cksac/fake-rs) (source, README with the CLI’s generator name list)
|
|
148
|
+
|
|
144
149
|
### Secret references
|
|
145
150
|
|
|
146
151
|
Dumpling resolves secret references in string config fields so plaintext salts/keys
|
|
@@ -188,7 +193,9 @@ dumpling --security-profile hardened --input dump.sql --check
|
|
|
188
193
|
- `unique_within_domain`: when true, different source values are assigned unique pseudonyms within the configured `domain`. NULL values are unaffected and always remain NULL.
|
|
189
194
|
- `min_days` / `max_days`: used by `date_fuzz`.
|
|
190
195
|
- `min_seconds` / `max_seconds`: used by `time_fuzz` and `datetime_fuzz`.
|
|
191
|
-
- `locale`: selects the language/regional format for the `
|
|
196
|
+
- `locale`: selects the language/regional format for the `faker` and `phone` strategies. Supported values: `en`, `fr_fr`, `de_de`, `it_it`, `pt_br`, `pt_pt`, `ar_sa`, `zh_cn`, `zh_tw`, `ja_jp`, `cy_gb`. Defaults to `en` when not specified.
|
|
197
|
+
- `faker`: required when `strategy = "faker"`. A plain string `"module::Type"` (case-insensitive) that maps to a **built-in** generator compiled into Dumpling—not arbitrary Rust or expressions. Names follow [`fake::faker`](https://docs.rs/fake/latest/fake/faker/index.html) (e.g. `internet::SafeEmail` → `faker::internet::SafeEmail` in the crate).
|
|
198
|
+
- `format`: used with `faker = "number::NumberWithFormat"`; pattern uses `#` (0–9) and `^` (1–9) per the [`fake` crate docs](https://docs.rs/fake/latest/fake/).
|
|
192
199
|
|
|
193
200
|
> **Note:** `table_options` are no longer supported; use explicit `rules` and optional `column_cases`.
|
|
194
201
|
|
|
@@ -332,7 +339,7 @@ Define default strategies in `rules."<table>"` and add ordered per-column cases
|
|
|
332
339
|
```toml
|
|
333
340
|
[rules."public.users"]
|
|
334
341
|
email = { strategy = "hash", as_string = true } # default
|
|
335
|
-
name = { strategy = "name" }
|
|
342
|
+
name = { strategy = "faker", faker = "name::Name" }
|
|
336
343
|
|
|
337
344
|
[[column_cases."public.users".email]]
|
|
338
345
|
when.any = [{ column = "is_admin", op = "eq", value = "true" }]
|
|
@@ -383,7 +390,7 @@ salt = "${DUMPLING_HMAC_KEY}"
|
|
|
383
390
|
|
|
384
391
|
[rules."public.users"]
|
|
385
392
|
ssn = { strategy = "hash", as_string = true }
|
|
386
|
-
email = { strategy = "
|
|
393
|
+
email = { strategy = "faker", faker = "internet::SafeEmail", domain = "users" }
|
|
387
394
|
```
|
|
388
395
|
|
|
389
396
|
```bash
|
|
@@ -29,7 +29,7 @@ violations to stderr, and exits:
|
|
|
29
29
|
| `empty-rules-table` | warning | A `[rules]` entry has no column rules. Likely a stale or incomplete config section. |
|
|
30
30
|
| `empty-column-cases-table` | warning | A `[column_cases]` entry has no column cases. |
|
|
31
31
|
| `unsalted-hash` | warning | A `hash` strategy is used with no salt (neither per-column `salt` nor global `salt`). Unsalted hashes are reversible via precomputed lookup tables for low-entropy inputs (names, emails, common IDs). |
|
|
32
|
-
| `inconsistent-domain-strategy` | error | The same domain name is used with two or more different strategies. This breaks referential integrity: a domain shared between `
|
|
32
|
+
| `inconsistent-domain-strategy` | error | The same domain name is used with two or more different strategies. This breaks referential integrity: a domain shared between incompatible generators (for example `faker` with different `faker` targets, or `faker` vs `hash`) cannot maintain a single stable mapping. |
|
|
33
33
|
| `uncovered-sensitive-column` | error | A column listed in `[sensitive_columns]` has no matching anonymization rule or case. The column will pass through unmodified, making the sensitive declaration misleading. |
|
|
34
34
|
|
|
35
35
|
---
|
|
@@ -31,6 +31,16 @@ If no configuration is found, Dumpling fails closed by default and exits non-zer
|
|
|
31
31
|
Error output includes every checked location. If you intentionally want a no-op
|
|
32
32
|
run, pass `--allow-noop`.
|
|
33
33
|
|
|
34
|
+
## Faker strategy and the `fake` crate
|
|
35
|
+
|
|
36
|
+
When you use `strategy = "faker"` with `faker = "module::Type"`, those names align with the Rust [**`fake`**](https://crates.io/crates/fake) crate’s [`faker`](https://docs.rs/fake/latest/fake/faker/index.html) modules (for example `name::FirstName` ↔ `fake::faker::name::raw::FirstName`). Use the upstream docs to discover available generators and options:
|
|
37
|
+
|
|
38
|
+
- [docs.rs — `fake` (crate overview)](https://docs.rs/fake/latest/fake/)
|
|
39
|
+
- [docs.rs — `fake::faker` (all faker submodules)](https://docs.rs/fake/latest/fake/faker/index.html)
|
|
40
|
+
- [GitHub — `cksac/fake-rs` (source + README)](https://github.com/cksac/fake-rs)
|
|
41
|
+
|
|
42
|
+
Dumpling only exposes a **subset** wired in `src/faker_dispatch.rs`; unsupported `module::Type` pairs fail at config load.
|
|
43
|
+
|
|
34
44
|
## Baseline config template
|
|
35
45
|
|
|
36
46
|
```toml
|
|
@@ -38,7 +48,7 @@ salt = "${DUMPLING_GLOBAL_SALT}"
|
|
|
38
48
|
|
|
39
49
|
[rules."public.users"]
|
|
40
50
|
email = { strategy = "hash", salt = "${env:DUMPLING_USERS_EMAIL_SALT}", as_string = true }
|
|
41
|
-
|
|
51
|
+
full_name = { strategy = "faker", faker = "name::Name" }
|
|
42
52
|
|
|
43
53
|
[sensitive_columns]
|
|
44
54
|
"public.users" = ["employee_number", "tax_id"]
|
|
@@ -184,6 +194,15 @@ Nested JSON targeting is supported in predicate `column` values via either:
|
|
|
184
194
|
When a JSON path traverses an array, Dumpling checks each element (useful for
|
|
185
195
|
list-of-dicts JSON structures).
|
|
186
196
|
|
|
197
|
+
### JSON path rules (`json` / `jsonb` columns)
|
|
198
|
+
|
|
199
|
+
You can anonymise values **inside** a text column that holds JSON using the same path syntax as row-filter predicates, but on **`[rules]` keys**:
|
|
200
|
+
|
|
201
|
+
- Dot notation: `"payload.profile.email" = { strategy = "email", domain = "orders_email", as_string = true }`
|
|
202
|
+
- Django-style: `"payload__profile__email" = { strategy = "hash", salt = "${env:ORDER_SECRET_SALT}", as_string = true }`
|
|
203
|
+
|
|
204
|
+
The part before the first dot or `__` is the **SQL column name**; the rest is the path inside the parsed JSON document. Use **quoted** keys in TOML when the name contains dots. For a given table, you can use **either** path-level rules for a column **or** one whole-column rule for that column’s base name, not both (Dumpling rejects the conflict at startup). If a path is missing in a given row, that rule is skipped for that row. When only path rules apply (no whole-column rule), the rest of the JSON is left unchanged. Path rules are applied in **longest-path-first** order. `column_cases` still match the SQL column name only; use `when` predicates with nested `column` paths to branch on JSON content.
|
|
205
|
+
|
|
187
206
|
## Safety recommendations
|
|
188
207
|
|
|
189
208
|
- Prefer deterministic runs in CI by passing `--seed` (or `DUMPLING_SEED`).
|
|
@@ -202,7 +221,7 @@ list-of-dicts JSON structures).
|
|
|
202
221
|
- Sensitive columns are detected by:
|
|
203
222
|
1. built-in column-name patterns, and
|
|
204
223
|
2. explicit per-table lists under `[sensitive_columns]`.
|
|
205
|
-
- A sensitive column is considered covered only if it has an explicit `rules` or `column_cases` entry.
|
|
224
|
+
- A sensitive column is considered covered only if it has an explicit `rules` or `column_cases` entry (including JSON path rules whose base name is that column, e.g. `payload.x.y` covers `payload`).
|
|
206
225
|
- If uncovered sensitive columns are found, Dumpling exits non-zero.
|
|
207
226
|
|
|
208
227
|
When `--report` is enabled, coverage fields are added to JSON output:
|
|
@@ -2,9 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
## Prerequisites
|
|
4
4
|
|
|
5
|
-
- Rust stable toolchain (
|
|
5
|
+
- Rust **stable** toolchain (`rustup` recommended). The repo includes `rust-toolchain.toml` (stable + `rustfmt` + `clippy`) so CI and local `cargo` stay aligned.
|
|
6
6
|
- `cargo` on your `PATH`
|
|
7
7
|
|
|
8
|
+
Optional: run **`./scripts/setup-dev.sh`** once from the repo root — it installs toolchain components, **`cargo fetch`**, and a pinned **mdBook** under `.tools/` for the same docs build CI uses.
|
|
9
|
+
|
|
8
10
|
## Build
|
|
9
11
|
|
|
10
12
|
```bash
|
|
@@ -11,9 +11,9 @@ This documentation covers the operating model for day-to-day use:
|
|
|
11
11
|
|
|
12
12
|
## Documentation quality gate
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
The mdBook site is built in CI as follows:
|
|
15
15
|
|
|
16
|
-
-
|
|
17
|
-
-
|
|
16
|
+
- **Pull requests:** the **Docs (PR)** workflow runs `mdbook build` when docs-related paths change (no deploy).
|
|
17
|
+
- **`main`:** the **Docs** workflow builds and deploys to GitHub Pages when docs-related paths change.
|
|
18
18
|
|
|
19
19
|
This keeps the docs in a continuously deployable state instead of drifting from the codebase.
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# One-shot dev environment for Dumpling (Rust CLI + optional mdBook for docs).
|
|
3
|
+
# Safe to re-run; skips work that is already done.
|
|
4
|
+
#
|
|
5
|
+
# Usage: from repo root — ./scripts/setup-dev.sh
|
|
6
|
+
#
|
|
7
|
+
# Environment:
|
|
8
|
+
# MDBOOK_VERSION — mdBook release tag (default: 0.4.52, matches CI docs workflow)
|
|
9
|
+
|
|
10
|
+
set -euo pipefail
|
|
11
|
+
|
|
12
|
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
13
|
+
cd "$ROOT"
|
|
14
|
+
|
|
15
|
+
MDBOOK_VERSION="${MDBOOK_VERSION:-0.4.52}"
|
|
16
|
+
TOOLS_DIR="${ROOT}/.tools"
|
|
17
|
+
MDBOOK_BIN="${TOOLS_DIR}/mdbook"
|
|
18
|
+
|
|
19
|
+
require_rust() {
|
|
20
|
+
if ! command -v rustc >/dev/null 2>&1 || ! command -v cargo >/dev/null 2>&1; then
|
|
21
|
+
echo "error: rustc/cargo not found. Install Rust: https://rustup.rs/" >&2
|
|
22
|
+
exit 1
|
|
23
|
+
fi
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
mdbook_download_url() {
|
|
27
|
+
local arch
|
|
28
|
+
case "$(uname -sm)" in
|
|
29
|
+
Linux\ x86_64) arch="x86_64-unknown-linux-gnu" ;;
|
|
30
|
+
Darwin\ x86_64) arch="x86_64-apple-darwin" ;;
|
|
31
|
+
Darwin\ arm64) arch="aarch64-apple-darwin" ;;
|
|
32
|
+
*)
|
|
33
|
+
echo "error: unsupported OS/arch for prebuilt mdbook: $(uname -sm)" >&2
|
|
34
|
+
echo "Install mdbook yourself: https://github.com/rust-lang/mdBook/releases" >&2
|
|
35
|
+
exit 1
|
|
36
|
+
;;
|
|
37
|
+
esac
|
|
38
|
+
echo "https://github.com/rust-lang/mdBook/releases/download/v${MDBOOK_VERSION}/mdbook-v${MDBOOK_VERSION}-${arch}.tar.gz"
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
ensure_mdbook() {
|
|
42
|
+
if [[ -x "${MDBOOK_BIN}" ]]; then
|
|
43
|
+
installed="$("${MDBOOK_BIN}" --version 2>/dev/null | awk '{print $2}' || true)"
|
|
44
|
+
if [[ "${installed}" == "${MDBOOK_VERSION}" ]]; then
|
|
45
|
+
return 0
|
|
46
|
+
fi
|
|
47
|
+
fi
|
|
48
|
+
|
|
49
|
+
mkdir -p "${TOOLS_DIR}"
|
|
50
|
+
local url tmp
|
|
51
|
+
url="$(mdbook_download_url)"
|
|
52
|
+
tmp="$(mktemp -d)"
|
|
53
|
+
trap 'rm -rf "${tmp}"' EXIT
|
|
54
|
+
echo "Downloading mdbook v${MDBOOK_VERSION}…"
|
|
55
|
+
curl -fsSL "${url}" | tar xz -C "${tmp}"
|
|
56
|
+
mv "${tmp}/mdbook" "${MDBOOK_BIN}"
|
|
57
|
+
chmod +x "${MDBOOK_BIN}"
|
|
58
|
+
trap - EXIT
|
|
59
|
+
rm -rf "${tmp}"
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
main() {
|
|
63
|
+
require_rust
|
|
64
|
+
|
|
65
|
+
if command -v rustup >/dev/null 2>&1; then
|
|
66
|
+
echo "Installing stable toolchain + rustfmt + clippy (rustup)…"
|
|
67
|
+
rustup toolchain install stable
|
|
68
|
+
rustup component add rustfmt clippy --toolchain stable
|
|
69
|
+
else
|
|
70
|
+
echo "warning: rustup not found; ensure rustfmt and clippy are installed for stable CI parity." >&2
|
|
71
|
+
fi
|
|
72
|
+
|
|
73
|
+
echo "Prefetching crates (cargo fetch)…"
|
|
74
|
+
cargo fetch
|
|
75
|
+
|
|
76
|
+
ensure_mdbook
|
|
77
|
+
echo "mdbook: ${MDBOOK_BIN} ($("${MDBOOK_BIN}" --version))"
|
|
78
|
+
|
|
79
|
+
echo
|
|
80
|
+
echo "Done. Typical checks:"
|
|
81
|
+
echo " cargo fmt --all -- --check"
|
|
82
|
+
echo " cargo clippy --all-targets --all-features"
|
|
83
|
+
echo " cargo test --all-targets --all-features"
|
|
84
|
+
echo " ${MDBOOK_BIN} build # same as Docs CI (book.toml → docs/book)"
|
|
85
|
+
echo
|
|
86
|
+
echo "Tip: add ${TOOLS_DIR} to PATH for this shell: export PATH=\"${TOOLS_DIR}:\${PATH}\""
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
main "$@"
|