dumpling-cli 0.5.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/ci.yml +1 -1
  2. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/docs-pr.yml +1 -1
  3. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/docs.yml +3 -3
  4. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/platform-compat-latest.yml +1 -1
  5. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/platform-compat-matrix.yml +1 -1
  6. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/policy-lint.yml +1 -1
  7. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/publish.yml +8 -8
  8. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/release.yml +1 -1
  9. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/tests.yml +1 -1
  10. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/CHANGELOG.md +13 -0
  11. dumpling_cli-0.6.0/CONTRIBUTORS.md +6 -0
  12. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/Cargo.lock +1 -1
  13. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/Cargo.toml +1 -1
  14. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/PKG-INFO +11 -1
  15. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/README.md +10 -0
  16. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/pyproject.toml +1 -1
  17. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/filter.rs +12 -12
  18. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/main.rs +190 -8
  19. dumpling_cli-0.6.0/src/seal.rs +482 -0
  20. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/settings.rs +9 -9
  21. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/sql.rs +14 -4
  22. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/transform.rs +33 -10
  23. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.dumplingconf.example +0 -0
  24. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.gitignore +0 -0
  25. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/AGENTS.md +0 -0
  26. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/CONTRIBUTING.md +0 -0
  27. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/MAINTENANCE.md +0 -0
  28. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/assets/logo.svg +0 -0
  29. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/book.toml +0 -0
  30. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/datetime_out.sql +0 -0
  31. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/datetime_sample.sql +0 -0
  32. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/docs/src/SUMMARY.md +0 -0
  33. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/docs/src/ci-guardrails.md +0 -0
  34. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/docs/src/configuration.md +0 -0
  35. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/docs/src/getting-started.md +0 -0
  36. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/docs/src/index.md +0 -0
  37. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/docs/src/releasing.md +0 -0
  38. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/rust-toolchain.toml +0 -0
  39. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/scripts/setup-dev.sh +0 -0
  40. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/faker_dispatch.rs +0 -0
  41. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/lint.rs +0 -0
  42. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/report.rs +0 -0
  43. {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/scan.rs +0 -0
@@ -15,7 +15,7 @@ jobs:
15
15
  runs-on: ubuntu-latest
16
16
  steps:
17
17
  - name: Checkout
18
- uses: actions/checkout@v4
18
+ uses: actions/checkout@v6
19
19
 
20
20
  - name: Install Rust toolchain
21
21
  uses: dtolnay/rust-toolchain@stable
@@ -24,7 +24,7 @@ jobs:
24
24
  runs-on: ubuntu-latest
25
25
  steps:
26
26
  - name: Checkout
27
- uses: actions/checkout@v4
27
+ uses: actions/checkout@v6
28
28
 
29
29
  - name: Install mdBook
30
30
  uses: peaceiris/actions-mdbook@v2
@@ -26,7 +26,7 @@ jobs:
26
26
  contents: read
27
27
  steps:
28
28
  - name: Checkout
29
- uses: actions/checkout@v4
29
+ uses: actions/checkout@v6
30
30
 
31
31
  - name: Install mdBook
32
32
  uses: peaceiris/actions-mdbook@v2
@@ -37,7 +37,7 @@ jobs:
37
37
  run: mdbook build
38
38
 
39
39
  - name: Upload Pages deployment artifact
40
- uses: actions/upload-pages-artifact@v3
40
+ uses: actions/upload-pages-artifact@v5
41
41
  with:
42
42
  path: docs/book
43
43
 
@@ -53,4 +53,4 @@ jobs:
53
53
  steps:
54
54
  - name: Deploy docs to GitHub Pages
55
55
  id: deployment
56
- uses: actions/deploy-pages@v4
56
+ uses: actions/deploy-pages@v5
@@ -22,7 +22,7 @@ jobs:
22
22
  - windows-latest
23
23
  steps:
24
24
  - name: Checkout
25
- uses: actions/checkout@v4
25
+ uses: actions/checkout@v6
26
26
 
27
27
  - name: Install Rust toolchain
28
28
  uses: dtolnay/rust-toolchain@stable
@@ -20,7 +20,7 @@ jobs:
20
20
  - windows-2022
21
21
  steps:
22
22
  - name: Checkout
23
- uses: actions/checkout@v4
23
+ uses: actions/checkout@v6
24
24
 
25
25
  - name: Install Rust toolchain
26
26
  uses: dtolnay/rust-toolchain@stable
@@ -32,7 +32,7 @@ jobs:
32
32
 
33
33
  steps:
34
34
  - name: Checkout
35
- uses: actions/checkout@v4
35
+ uses: actions/checkout@v6
36
36
 
37
37
  - name: Install Rust toolchain
38
38
  uses: dtolnay/rust-toolchain@stable
@@ -32,7 +32,7 @@ jobs:
32
32
  - windows-latest
33
33
  steps:
34
34
  - name: Checkout
35
- uses: actions/checkout@v4
35
+ uses: actions/checkout@v6
36
36
 
37
37
  - name: Install Rust toolchain
38
38
  uses: dtolnay/rust-toolchain@stable
@@ -41,7 +41,7 @@ jobs:
41
41
  uses: Swatinem/rust-cache@v2
42
42
 
43
43
  - name: Set up Python
44
- uses: actions/setup-python@v5
44
+ uses: actions/setup-python@v6
45
45
  with:
46
46
  python-version: "3.12"
47
47
 
@@ -52,7 +52,7 @@ jobs:
52
52
  run: python -m maturin build --release --out dist
53
53
 
54
54
  - name: Upload wheel artifacts
55
- uses: actions/upload-artifact@v4
55
+ uses: actions/upload-artifact@v7
56
56
  with:
57
57
  name: wheels-${{ matrix.os }}
58
58
  path: dist/*.whl
@@ -63,10 +63,10 @@ jobs:
63
63
  runs-on: ubuntu-latest
64
64
  steps:
65
65
  - name: Checkout
66
- uses: actions/checkout@v4
66
+ uses: actions/checkout@v6
67
67
 
68
68
  - name: Set up Python
69
- uses: actions/setup-python@v5
69
+ uses: actions/setup-python@v6
70
70
  with:
71
71
  python-version: "3.12"
72
72
 
@@ -77,7 +77,7 @@ jobs:
77
77
  run: python -m maturin sdist --out dist
78
78
 
79
79
  - name: Upload sdist artifact
80
- uses: actions/upload-artifact@v4
80
+ uses: actions/upload-artifact@v7
81
81
  with:
82
82
  name: sdist
83
83
  path: dist/*.tar.gz
@@ -96,7 +96,7 @@ jobs:
96
96
  id-token: write
97
97
  steps:
98
98
  - name: Download built distributions
99
- uses: actions/download-artifact@v4
99
+ uses: actions/download-artifact@v8
100
100
  with:
101
101
  pattern: "*"
102
102
  path: dist
@@ -123,7 +123,7 @@ jobs:
123
123
  id-token: write
124
124
  steps:
125
125
  - name: Download built distributions
126
- uses: actions/download-artifact@v4
126
+ uses: actions/download-artifact@v8
127
127
  with:
128
128
  pattern: "*"
129
129
  path: dist
@@ -13,7 +13,7 @@ jobs:
13
13
  runs-on: ubuntu-latest
14
14
  steps:
15
15
  - name: Checkout
16
- uses: actions/checkout@v4
16
+ uses: actions/checkout@v6
17
17
 
18
18
  - name: Install Rust toolchain
19
19
  uses: dtolnay/rust-toolchain@stable
@@ -15,7 +15,7 @@ jobs:
15
15
  runs-on: ubuntu-latest
16
16
  steps:
17
17
  - name: Checkout
18
- uses: actions/checkout@v4
18
+ uses: actions/checkout@v6
19
19
 
20
20
  - name: Install Rust toolchain
21
21
  uses: dtolnay/rust-toolchain@stable
@@ -7,6 +7,18 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.6.0] - 2026-05-03
11
+
12
+ ### Added
13
+
14
+ - **Dump seal** (leading `-- dumpling-seal:` SQL comment): records Dumpling version, security profile, a SHA-256 fingerprint of the resolved policy, and runtime CLI options that affect transforms (`--format`, sorted `--include-table` / `--exclude-table`, effective PRNG seed in standard profile). When the input already begins with a **matching** seal, the remainder is copied through unchanged; stale or unknown seal lines are stripped and the dump is re-processed. See README for full semantics ([#58](https://github.com/ababic/dumpling/pull/58)).
15
+ - **`--stats`**: prints `wall_ms` plus `domain_cache_hits` and `domain_cache_misses` for quick profiling of large runs ([#59](https://github.com/ababic/dumpling/pull/59)).
16
+ - **`CONTRIBUTORS.md`** ([#59](https://github.com/ababic/dumpling/pull/59)).
17
+
18
+ ### Changed
19
+
20
+ - **Domain-mapped replacement values** use shared `Arc<str>` storage so repeated lookups reuse the same allocation ([#59](https://github.com/ababic/dumpling/pull/59)).
21
+
10
22
  ## [0.5.0] - 2026-05-03
11
23
 
12
24
  ### Added
@@ -89,6 +101,7 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
89
101
  - Configurable output scan severities and per-category thresholds via `[output_scan]`.
90
102
  - JSON report section for output scan findings including category, count, threshold, severity, and sample locations.
91
103
 
104
+ [0.6.0]: https://github.com/ababic/dumpling/compare/v0.5.0...v0.6.0
92
105
  [0.5.0]: https://github.com/ababic/dumpling/compare/v0.4.3...v0.5.0
93
106
  [0.4.3]: https://github.com/ababic/dumpling/compare/v0.4.2...v0.4.3
94
107
  [0.4.2]: https://github.com/ababic/dumpling/compare/v0.4.1...v0.4.2
@@ -0,0 +1,6 @@
1
+ # Contributors
2
+
3
+ Thank you to everyone who has helped improve Dumpling.
4
+
5
+ - **Andy Babic** — creator and maintainer
6
+ - **Jordan Hale** — performance and observability (including AI-assisted patches)
@@ -262,7 +262,7 @@ dependencies = [
262
262
 
263
263
  [[package]]
264
264
  name = "dumpling"
265
- version = "0.5.0"
265
+ version = "0.6.0"
266
266
  dependencies = [
267
267
  "anyhow",
268
268
  "chrono",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "dumpling"
3
- version = "0.5.0"
3
+ version = "0.6.0"
4
4
  edition = "2021"
5
5
  readme = "README.md"
6
6
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dumpling-cli
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Classifier: Development Status :: 4 - Beta
5
5
  Classifier: Environment :: Console
6
6
  Classifier: Intended Audience :: Developers
@@ -129,6 +129,16 @@ If no configuration is found, Dumpling fails closed by default and exits non-zer
129
129
  The error output lists every checked location. Use `--allow-noop` to explicitly
130
130
  permit no-op behavior.
131
131
 
132
+ ### Dump seal (always on)
133
+
134
+ Every successful run that writes output prefixes the stream with a single-line SQL comment:
135
+
136
+ `-- dumpling-seal: v=2 version=<semver> profile=<standard|hardened> sha256=<64 hex chars>`
137
+
138
+ The `sha256` is over canonical JSON that includes the Dumpling version, the active security profile, a stable encoding of the resolved policy (rules, row filters, column cases, sensitive columns, output scan, global salt), and **runtime options** that affect transforms: `--format`, sorted `--include-table` / `--exclude-table` patterns, and the effective `--seed` / `DUMPLING_SEED` value in standard profile (`null` in hardened, where seeds are ignored).
139
+
140
+ If the **input** already begins with a seal line and it **matches** the current run, Dumpling copies the rest of the file through unchanged. If the line looks like a seal but does **not** match (stale policy, different flags, or older `v=`), that line is **dropped** and the dump is re-processed so you do not end up with two seal lines. `--strict-coverage` cannot be combined with a matching seal (table definitions are not scanned in passthrough mode). `--check` writes no output and therefore emits no seal line.
141
+
132
142
  ---
133
143
 
134
144
  ## Configuration (TOML)
@@ -108,6 +108,16 @@ If no configuration is found, Dumpling fails closed by default and exits non-zer
108
108
  The error output lists every checked location. Use `--allow-noop` to explicitly
109
109
  permit no-op behavior.
110
110
 
111
+ ### Dump seal (always on)
112
+
113
+ Every successful run that writes output prefixes the stream with a single-line SQL comment:
114
+
115
+ `-- dumpling-seal: v=2 version=<semver> profile=<standard|hardened> sha256=<64 hex chars>`
116
+
117
+ The `sha256` is over canonical JSON that includes the Dumpling version, the active security profile, a stable encoding of the resolved policy (rules, row filters, column cases, sensitive columns, output scan, global salt), and **runtime options** that affect transforms: `--format`, sorted `--include-table` / `--exclude-table` patterns, and the effective `--seed` / `DUMPLING_SEED` value in standard profile (`null` in hardened, where seeds are ignored).
118
+
119
+ If the **input** already begins with a seal line and it **matches** the current run, Dumpling copies the rest of the file through unchanged. If the line looks like a seal but does **not** match (stale policy, different flags, or older `v=`), that line is **dropped** and the dump is re-processed so you do not end up with two seal lines. `--strict-coverage` cannot be combined with a matching seal (table definitions are not scanned in passthrough mode). `--check` writes no output and therefore emits no seal line.
120
+
111
121
  ---
112
122
 
113
123
  ## Configuration (TOML)
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "dumpling-cli"
7
- version = "0.5.0"
7
+ version = "0.6.0"
8
8
  description = "Static anonymizer for plain SQL dumps (PostgreSQL, SQLite, SQL Server)."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -219,10 +219,10 @@ fn replacement_to_json_value(repl: &Replacement) -> serde_json::Value {
219
219
  return serde_json::Value::Null;
220
220
  }
221
221
  if repl.force_quoted {
222
- return serde_json::Value::String(repl.value.clone());
222
+ return serde_json::Value::String(repl.value.as_ref().to_string());
223
223
  }
224
- serde_json::from_str(&repl.value)
225
- .unwrap_or_else(|_| serde_json::Value::String(repl.value.clone()))
224
+ serde_json::from_str(repl.value.as_ref())
225
+ .unwrap_or_else(|_| serde_json::Value::String(repl.value.as_ref().to_string()))
226
226
  }
227
227
 
228
228
  /// When rewriting JSON at a path, map `Replacement` back into [`serde_json::Value`] while keeping
@@ -238,11 +238,11 @@ fn coerce_json_path_replacement(
238
238
  }
239
239
  match original {
240
240
  serde_json::Value::Bool(_) => {
241
- if let Some(b) = parse_loose_json_bool(&repl.value) {
241
+ if let Some(b) = parse_loose_json_bool(repl.value.as_ref()) {
242
242
  return serde_json::Value::Bool(b);
243
243
  }
244
244
  if !repl.force_quoted {
245
- if let Ok(v) = serde_json::from_str::<serde_json::Value>(&repl.value) {
245
+ if let Ok(v) = serde_json::from_str::<serde_json::Value>(repl.value.as_ref()) {
246
246
  match v {
247
247
  serde_json::Value::Bool(b) => return serde_json::Value::Bool(b),
248
248
  serde_json::Value::Number(n) => {
@@ -257,27 +257,27 @@ fn coerce_json_path_replacement(
257
257
  }
258
258
  }
259
259
  }
260
- serde_json::Value::String(repl.value.clone())
260
+ serde_json::Value::String(repl.value.as_ref().to_string())
261
261
  }
262
262
  serde_json::Value::Number(_) => {
263
- if let Some(n) = parse_loose_json_number(&repl.value) {
263
+ if let Some(n) = parse_loose_json_number(repl.value.as_ref()) {
264
264
  return serde_json::Value::Number(n);
265
265
  }
266
266
  if !repl.force_quoted {
267
267
  if let Ok(serde_json::Value::Number(n)) =
268
- serde_json::from_str::<serde_json::Value>(&repl.value)
268
+ serde_json::from_str::<serde_json::Value>(repl.value.as_ref())
269
269
  {
270
270
  return serde_json::Value::Number(n);
271
271
  }
272
272
  }
273
- serde_json::Value::String(repl.value.clone())
273
+ serde_json::Value::String(repl.value.as_ref().to_string())
274
274
  }
275
275
  serde_json::Value::String(_) => {
276
276
  if repl.force_quoted {
277
- serde_json::Value::String(repl.value.clone())
277
+ serde_json::Value::String(repl.value.as_ref().to_string())
278
278
  } else {
279
- serde_json::from_str(&repl.value)
280
- .unwrap_or_else(|_| serde_json::Value::String(repl.value.clone()))
279
+ serde_json::from_str(repl.value.as_ref())
280
+ .unwrap_or_else(|_| serde_json::Value::String(repl.value.as_ref().to_string()))
281
281
  }
282
282
  }
283
283
  serde_json::Value::Null => replacement_to_json_value(repl),
@@ -1,10 +1,12 @@
1
1
  use std::fs::File;
2
2
  use std::io::{self, BufRead, BufReader, BufWriter, Write};
3
+ use std::sync::atomic::Ordering;
3
4
 
4
5
  /// Larger than default 8 KiB to reduce syscall overhead on big dumps.
5
6
  const IO_BUF_CAPACITY: usize = 256 * 1024;
6
7
  use std::path::{Path, PathBuf};
7
8
  use std::process::{Command, Stdio};
9
+ use std::time::Instant;
8
10
 
9
11
  use clap::{ArgAction, Parser, Subcommand};
10
12
 
@@ -13,6 +15,7 @@ mod filter;
13
15
  mod lint;
14
16
  mod report;
15
17
  mod scan;
18
+ mod seal;
16
19
  mod settings;
17
20
  mod sql;
18
21
  mod transform;
@@ -21,9 +24,16 @@ use anyhow::Context;
21
24
  use regex::Regex;
22
25
  use report::Reporter;
23
26
  use scan::{OutputScanner, ScanningWriter};
27
+ use seal::{
28
+ compute_seal_digest, format_seal_line, read_first_line_for_seal, FirstLineReplayBufRead,
29
+ SealFirstLine, SealRuntimeParams,
30
+ };
24
31
  use settings::ResolvedConfig;
25
32
  use sql::{DumpFormat, SqlStreamProcessor};
26
- use transform::{set_hardened_profile, set_random_seed, AnonymizerRegistry, SecurityProfile};
33
+ use transform::{
34
+ prng_seed_override_for_fingerprint, set_hardened_profile, set_random_seed, AnonymizerRegistry,
35
+ SecurityProfile,
36
+ };
27
37
 
28
38
  #[derive(Parser, Debug)]
29
39
  #[command(
@@ -290,6 +300,13 @@ fn run_anonymize(cli: Cli) -> anyhow::Result<()> {
290
300
  let include_res = compile_patterns(&cli.include_table)?;
291
301
  let exclude_res = compile_patterns(&cli.exclude_table)?;
292
302
 
303
+ let seal_runtime = SealRuntimeParams::new(
304
+ dump_format,
305
+ &cli.include_table,
306
+ &cli.exclude_table,
307
+ prng_seed_override_for_fingerprint(),
308
+ );
309
+
293
310
  // Determine IO (optional pg_restore child when --dump-decode)
294
311
  let mut pg_restore_child: Option<std::process::Child> = None;
295
312
  let (mut reader, input_path_for_inplace): (Box<dyn BufRead>, Option<PathBuf>) = if cli
@@ -429,7 +446,6 @@ fn run_anonymize(cli: Cli) -> anyhow::Result<()> {
429
446
  .unwrap_or_else(|| Reporter::new(false));
430
447
  reporter.report.security_profile = security_profile_name.to_string();
431
448
 
432
- // Process SQL stream
433
449
  let mut processor = SqlStreamProcessor::new(
434
450
  anonymizers,
435
451
  resolved_config,
@@ -438,12 +454,65 @@ fn run_anonymize(cli: Cli) -> anyhow::Result<()> {
438
454
  Some(&mut reporter),
439
455
  dump_format,
440
456
  );
457
+
458
+ let seal_digest = if cli.check {
459
+ None
460
+ } else {
461
+ Some(compute_seal_digest(
462
+ processor.config_snapshot(),
463
+ security_profile_name,
464
+ &seal_runtime,
465
+ )?)
466
+ };
467
+
441
468
  let mut writer = output;
442
- let proc_res = if let Some(scanner) = output_scanner.as_mut() {
443
- let mut scanning_writer = ScanningWriter::new(&mut writer, scanner);
444
- processor.process(&mut reader, &mut scanning_writer)
469
+
470
+ let seal_first = read_first_line_for_seal(
471
+ reader.as_mut(),
472
+ processor.config_snapshot(),
473
+ security_profile_name,
474
+ &seal_runtime,
475
+ )?;
476
+
477
+ if matches!(seal_first, SealFirstLine::TrustedPassthrough) && cli.strict_coverage {
478
+ anyhow::bail!(
479
+ "--strict-coverage cannot be used when the input begins with a matching seal; \
480
+ the dump is passed through without parsing table definitions"
481
+ );
482
+ }
483
+
484
+ let replay_first = match &seal_first {
485
+ SealFirstLine::TrustedPassthrough | SealFirstLine::StaleSealStripped => None,
486
+ SealFirstLine::Replay(v) if v.is_empty() => None,
487
+ SealFirstLine::Replay(v) => Some(v.clone()),
488
+ };
489
+ let mut adapted_reader = FirstLineReplayBufRead::new(reader.as_mut(), replay_first);
490
+
491
+ let run_started = Instant::now();
492
+ let proc_res: anyhow::Result<()> = if matches!(seal_first, SealFirstLine::TrustedPassthrough) {
493
+ if let Some(ref digest) = seal_digest {
494
+ writer.write_all(format_seal_line(security_profile_name, digest).as_bytes())?;
495
+ }
496
+ if let Some(scanner) = output_scanner.as_mut() {
497
+ let mut scanning_writer = ScanningWriter::new(&mut writer, scanner);
498
+ std::io::copy(&mut adapted_reader, &mut scanning_writer)
499
+ .map(|_| ())
500
+ .map_err(anyhow::Error::from)
501
+ } else {
502
+ std::io::copy(&mut adapted_reader, &mut writer)
503
+ .map(|_| ())
504
+ .map_err(anyhow::Error::from)
505
+ }
445
506
  } else {
446
- processor.process(&mut reader, &mut writer)
507
+ if let Some(ref digest) = seal_digest {
508
+ writer.write_all(format_seal_line(security_profile_name, digest).as_bytes())?;
509
+ }
510
+ if let Some(scanner) = output_scanner.as_mut() {
511
+ let mut scanning_writer = ScanningWriter::new(&mut writer, scanner);
512
+ processor.process(&mut adapted_reader, &mut scanning_writer)
513
+ } else {
514
+ processor.process(&mut adapted_reader, &mut writer)
515
+ }
447
516
  };
448
517
 
449
518
  if let Some(mut child) = pg_restore_child {
@@ -509,11 +578,23 @@ fn run_anonymize(cli: Cli) -> anyhow::Result<()> {
509
578
 
510
579
  // Emit stats or report if requested
511
580
  if cli.stats {
581
+ let elapsed_ms = run_started.elapsed().as_millis();
582
+ let domain_hits = processor
583
+ .anonymizers()
584
+ .domain_cache_hits
585
+ .load(Ordering::Relaxed);
586
+ let domain_misses = processor
587
+ .anonymizers()
588
+ .domain_cache_misses
589
+ .load(Ordering::Relaxed);
512
590
  eprintln!(
513
- "dumpling: rows processed={}, rows dropped={}, cells changed={}",
591
+ "dumpling: rows processed={}, rows dropped={}, cells changed={}, wall_ms={}, domain_cache_hits={}, domain_cache_misses={}",
514
592
  reporter.report.total_rows_processed,
515
593
  reporter.report.total_rows_dropped,
516
- reporter.report.total_cells_changed
594
+ reporter.report.total_cells_changed,
595
+ elapsed_ms,
596
+ domain_hits,
597
+ domain_misses
517
598
  );
518
599
  }
519
600
  if let Some(path) = cli.report.as_ref() {
@@ -591,7 +672,108 @@ fn has_allowed_extension(path: &Path, allow_exts: &[String]) -> bool {
591
672
  mod tests_main {
592
673
  use super::{has_allowed_extension, Cli, Commands};
593
674
  use clap::Parser;
675
+ use std::fs;
676
+ use std::io::Read;
594
677
  use std::path::PathBuf;
678
+ use std::process::Command;
679
+
680
+ #[test]
681
+ fn seal_emit_then_trust_roundtrip() {
682
+ let exe = match option_env!("CARGO_BIN_EXE_dumpling") {
683
+ Some(p) => PathBuf::from(p),
684
+ None => return,
685
+ };
686
+ let base =
687
+ std::env::temp_dir().join(format!("dumpling_seal_integration_{}", std::process::id()));
688
+ let conf = base.with_extension("toml");
689
+ let pass1_in = base.with_extension("p1.sql");
690
+ let pass1_out = base.with_extension("p2.sql");
691
+ let pass2_out = base.with_extension("p3.sql");
692
+
693
+ fs::write(
694
+ &conf,
695
+ r#"
696
+ [rules."public.users"]
697
+ email = { strategy = "email" }
698
+ "#,
699
+ )
700
+ .unwrap();
701
+ fs::write(
702
+ &pass1_in,
703
+ "INSERT INTO public.users (email) VALUES ('alice@example.com');\n",
704
+ )
705
+ .unwrap();
706
+
707
+ let s1 = Command::new(&exe)
708
+ .args([
709
+ "-c",
710
+ conf.to_str().unwrap(),
711
+ "-i",
712
+ pass1_in.to_str().unwrap(),
713
+ "-o",
714
+ pass1_out.to_str().unwrap(),
715
+ "--seed",
716
+ "42",
717
+ ])
718
+ .output()
719
+ .unwrap();
720
+ assert!(
721
+ s1.status.success(),
722
+ "pass1 stderr={}",
723
+ String::from_utf8_lossy(&s1.stderr)
724
+ );
725
+
726
+ let mut sealed = String::new();
727
+ fs::File::open(&pass1_out)
728
+ .unwrap()
729
+ .read_to_string(&mut sealed)
730
+ .unwrap();
731
+ let first = sealed.lines().next().unwrap_or("");
732
+ assert!(
733
+ first.starts_with("-- dumpling-seal:"),
734
+ "expected seal prefix, got: {first:?}"
735
+ );
736
+ assert!(
737
+ !sealed.contains("alice@example.com"),
738
+ "expected anonymization in pass1"
739
+ );
740
+
741
+ let s2 = Command::new(&exe)
742
+ .args([
743
+ "-c",
744
+ conf.to_str().unwrap(),
745
+ "-i",
746
+ pass1_out.to_str().unwrap(),
747
+ "-o",
748
+ pass2_out.to_str().unwrap(),
749
+ "--seed",
750
+ "42",
751
+ ])
752
+ .output()
753
+ .unwrap();
754
+ assert!(
755
+ s2.status.success(),
756
+ "pass2 stderr={}",
757
+ String::from_utf8_lossy(&s2.stderr)
758
+ );
759
+
760
+ let mut final_out = String::new();
761
+ fs::File::open(&pass2_out)
762
+ .unwrap()
763
+ .read_to_string(&mut final_out)
764
+ .unwrap();
765
+ let rest_mid: String = sealed.lines().skip(1).collect::<Vec<_>>().join("\n");
766
+ let rest_out: String = final_out.lines().skip(1).collect::<Vec<_>>().join("\n");
767
+ assert_eq!(
768
+ rest_mid, rest_out,
769
+ "trusted pass-through should preserve dump body after seal line"
770
+ );
771
+
772
+ let _ = fs::remove_file(&conf);
773
+ let _ = fs::remove_file(&pass1_in);
774
+ let _ = fs::remove_file(&pass1_out);
775
+ let _ = fs::remove_file(&pass2_out);
776
+ }
595
777
 
596
778
  #[test]
597
779
  fn test_allowed_extensions() {