dumpling-cli 0.5.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/ci.yml +1 -1
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/docs-pr.yml +1 -1
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/docs.yml +3 -3
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/platform-compat-latest.yml +1 -1
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/platform-compat-matrix.yml +1 -1
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/policy-lint.yml +1 -1
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/publish.yml +8 -8
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/release.yml +1 -1
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.github/workflows/tests.yml +1 -1
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/CHANGELOG.md +13 -0
- dumpling_cli-0.6.0/CONTRIBUTORS.md +6 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/Cargo.lock +1 -1
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/Cargo.toml +1 -1
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/PKG-INFO +11 -1
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/README.md +10 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/pyproject.toml +1 -1
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/filter.rs +12 -12
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/main.rs +190 -8
- dumpling_cli-0.6.0/src/seal.rs +482 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/settings.rs +9 -9
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/sql.rs +14 -4
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/transform.rs +33 -10
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.dumplingconf.example +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/.gitignore +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/AGENTS.md +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/CONTRIBUTING.md +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/MAINTENANCE.md +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/assets/logo.svg +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/book.toml +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/datetime_out.sql +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/datetime_sample.sql +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/docs/src/SUMMARY.md +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/docs/src/ci-guardrails.md +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/docs/src/configuration.md +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/docs/src/getting-started.md +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/docs/src/index.md +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/docs/src/releasing.md +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/rust-toolchain.toml +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/scripts/setup-dev.sh +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/faker_dispatch.rs +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/lint.rs +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/report.rs +0 -0
- {dumpling_cli-0.5.0 → dumpling_cli-0.6.0}/src/scan.rs +0 -0
|
@@ -26,7 +26,7 @@ jobs:
|
|
|
26
26
|
contents: read
|
|
27
27
|
steps:
|
|
28
28
|
- name: Checkout
|
|
29
|
-
uses: actions/checkout@
|
|
29
|
+
uses: actions/checkout@v6
|
|
30
30
|
|
|
31
31
|
- name: Install mdBook
|
|
32
32
|
uses: peaceiris/actions-mdbook@v2
|
|
@@ -37,7 +37,7 @@ jobs:
|
|
|
37
37
|
run: mdbook build
|
|
38
38
|
|
|
39
39
|
- name: Upload Pages deployment artifact
|
|
40
|
-
uses: actions/upload-pages-artifact@
|
|
40
|
+
uses: actions/upload-pages-artifact@v5
|
|
41
41
|
with:
|
|
42
42
|
path: docs/book
|
|
43
43
|
|
|
@@ -53,4 +53,4 @@ jobs:
|
|
|
53
53
|
steps:
|
|
54
54
|
- name: Deploy docs to GitHub Pages
|
|
55
55
|
id: deployment
|
|
56
|
-
uses: actions/deploy-pages@
|
|
56
|
+
uses: actions/deploy-pages@v5
|
|
@@ -32,7 +32,7 @@ jobs:
|
|
|
32
32
|
- windows-latest
|
|
33
33
|
steps:
|
|
34
34
|
- name: Checkout
|
|
35
|
-
uses: actions/checkout@
|
|
35
|
+
uses: actions/checkout@v6
|
|
36
36
|
|
|
37
37
|
- name: Install Rust toolchain
|
|
38
38
|
uses: dtolnay/rust-toolchain@stable
|
|
@@ -41,7 +41,7 @@ jobs:
|
|
|
41
41
|
uses: Swatinem/rust-cache@v2
|
|
42
42
|
|
|
43
43
|
- name: Set up Python
|
|
44
|
-
uses: actions/setup-python@
|
|
44
|
+
uses: actions/setup-python@v6
|
|
45
45
|
with:
|
|
46
46
|
python-version: "3.12"
|
|
47
47
|
|
|
@@ -52,7 +52,7 @@ jobs:
|
|
|
52
52
|
run: python -m maturin build --release --out dist
|
|
53
53
|
|
|
54
54
|
- name: Upload wheel artifacts
|
|
55
|
-
uses: actions/upload-artifact@
|
|
55
|
+
uses: actions/upload-artifact@v7
|
|
56
56
|
with:
|
|
57
57
|
name: wheels-${{ matrix.os }}
|
|
58
58
|
path: dist/*.whl
|
|
@@ -63,10 +63,10 @@ jobs:
|
|
|
63
63
|
runs-on: ubuntu-latest
|
|
64
64
|
steps:
|
|
65
65
|
- name: Checkout
|
|
66
|
-
uses: actions/checkout@
|
|
66
|
+
uses: actions/checkout@v6
|
|
67
67
|
|
|
68
68
|
- name: Set up Python
|
|
69
|
-
uses: actions/setup-python@
|
|
69
|
+
uses: actions/setup-python@v6
|
|
70
70
|
with:
|
|
71
71
|
python-version: "3.12"
|
|
72
72
|
|
|
@@ -77,7 +77,7 @@ jobs:
|
|
|
77
77
|
run: python -m maturin sdist --out dist
|
|
78
78
|
|
|
79
79
|
- name: Upload sdist artifact
|
|
80
|
-
uses: actions/upload-artifact@
|
|
80
|
+
uses: actions/upload-artifact@v7
|
|
81
81
|
with:
|
|
82
82
|
name: sdist
|
|
83
83
|
path: dist/*.tar.gz
|
|
@@ -96,7 +96,7 @@ jobs:
|
|
|
96
96
|
id-token: write
|
|
97
97
|
steps:
|
|
98
98
|
- name: Download built distributions
|
|
99
|
-
uses: actions/download-artifact@
|
|
99
|
+
uses: actions/download-artifact@v8
|
|
100
100
|
with:
|
|
101
101
|
pattern: "*"
|
|
102
102
|
path: dist
|
|
@@ -123,7 +123,7 @@ jobs:
|
|
|
123
123
|
id-token: write
|
|
124
124
|
steps:
|
|
125
125
|
- name: Download built distributions
|
|
126
|
-
uses: actions/download-artifact@
|
|
126
|
+
uses: actions/download-artifact@v8
|
|
127
127
|
with:
|
|
128
128
|
pattern: "*"
|
|
129
129
|
path: dist
|
|
@@ -7,6 +7,18 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.6.0] - 2026-05-03
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- **Dump seal** (leading `-- dumpling-seal:` SQL comment): records Dumpling version, security profile, a SHA-256 fingerprint of the resolved policy, and runtime CLI options that affect transforms (`--format`, sorted `--include-table` / `--exclude-table`, effective PRNG seed in standard profile). When the input already begins with a **matching** seal, the remainder is copied through unchanged; stale or unknown seal lines are stripped and the dump is re-processed. See README for full semantics ([#58](https://github.com/ababic/dumpling/pull/58)).
|
|
15
|
+
- **`--stats`**: prints `wall_ms` plus `domain_cache_hits` and `domain_cache_misses` for quick profiling of large runs ([#59](https://github.com/ababic/dumpling/pull/59)).
|
|
16
|
+
- **`CONTRIBUTORS.md`** ([#59](https://github.com/ababic/dumpling/pull/59)).
|
|
17
|
+
|
|
18
|
+
### Changed
|
|
19
|
+
|
|
20
|
+
- **Domain-mapped replacement values** use shared `Arc<str>` storage so repeated lookups reuse the same allocation ([#59](https://github.com/ababic/dumpling/pull/59)).
|
|
21
|
+
|
|
10
22
|
## [0.5.0] - 2026-05-03
|
|
11
23
|
|
|
12
24
|
### Added
|
|
@@ -89,6 +101,7 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
|
|
|
89
101
|
- Configurable output scan severities and per-category thresholds via `[output_scan]`.
|
|
90
102
|
- JSON report section for output scan findings including category, count, threshold, severity, and sample locations.
|
|
91
103
|
|
|
104
|
+
[0.6.0]: https://github.com/ababic/dumpling/compare/v0.5.0...v0.6.0
|
|
92
105
|
[0.5.0]: https://github.com/ababic/dumpling/compare/v0.4.3...v0.5.0
|
|
93
106
|
[0.4.3]: https://github.com/ababic/dumpling/compare/v0.4.2...v0.4.3
|
|
94
107
|
[0.4.2]: https://github.com/ababic/dumpling/compare/v0.4.1...v0.4.2
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dumpling-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Classifier: Development Status :: 4 - Beta
|
|
5
5
|
Classifier: Environment :: Console
|
|
6
6
|
Classifier: Intended Audience :: Developers
|
|
@@ -129,6 +129,16 @@ If no configuration is found, Dumpling fails closed by default and exits non-zer
|
|
|
129
129
|
The error output lists every checked location. Use `--allow-noop` to explicitly
|
|
130
130
|
permit no-op behavior.
|
|
131
131
|
|
|
132
|
+
### Dump seal (always on)
|
|
133
|
+
|
|
134
|
+
Every successful run that writes output prefixes the stream with a single-line SQL comment:
|
|
135
|
+
|
|
136
|
+
`-- dumpling-seal: v=2 version=<semver> profile=<standard|hardened> sha256=<64 hex chars>`
|
|
137
|
+
|
|
138
|
+
The `sha256` is over canonical JSON that includes the Dumpling version, the active security profile, a stable encoding of the resolved policy (rules, row filters, column cases, sensitive columns, output scan, global salt), and **runtime options** that affect transforms: `--format`, sorted `--include-table` / `--exclude-table` patterns, and the effective `--seed` / `DUMPLING_SEED` value in standard profile (`null` in hardened, where seeds are ignored).
|
|
139
|
+
|
|
140
|
+
If the **input** already begins with a seal line and it **matches** the current run, Dumpling copies the rest of the file through unchanged. If the line looks like a seal but does **not** match (stale policy, different flags, or older `v=`), that line is **dropped** and the dump is re-processed so you do not end up with two seal lines. `--strict-coverage` cannot be combined with a matching seal (table definitions are not scanned in passthrough mode). `--check` writes no output and therefore emits no seal line.
|
|
141
|
+
|
|
132
142
|
---
|
|
133
143
|
|
|
134
144
|
## Configuration (TOML)
|
|
@@ -108,6 +108,16 @@ If no configuration is found, Dumpling fails closed by default and exits non-zer
|
|
|
108
108
|
The error output lists every checked location. Use `--allow-noop` to explicitly
|
|
109
109
|
permit no-op behavior.
|
|
110
110
|
|
|
111
|
+
### Dump seal (always on)
|
|
112
|
+
|
|
113
|
+
Every successful run that writes output prefixes the stream with a single-line SQL comment:
|
|
114
|
+
|
|
115
|
+
`-- dumpling-seal: v=2 version=<semver> profile=<standard|hardened> sha256=<64 hex chars>`
|
|
116
|
+
|
|
117
|
+
The `sha256` is over canonical JSON that includes the Dumpling version, the active security profile, a stable encoding of the resolved policy (rules, row filters, column cases, sensitive columns, output scan, global salt), and **runtime options** that affect transforms: `--format`, sorted `--include-table` / `--exclude-table` patterns, and the effective `--seed` / `DUMPLING_SEED` value in standard profile (`null` in hardened, where seeds are ignored).
|
|
118
|
+
|
|
119
|
+
If the **input** already begins with a seal line and it **matches** the current run, Dumpling copies the rest of the file through unchanged. If the line looks like a seal but does **not** match (stale policy, different flags, or older `v=`), that line is **dropped** and the dump is re-processed so you do not end up with two seal lines. `--strict-coverage` cannot be combined with a matching seal (table definitions are not scanned in passthrough mode). `--check` writes no output and therefore emits no seal line.
|
|
120
|
+
|
|
111
121
|
---
|
|
112
122
|
|
|
113
123
|
## Configuration (TOML)
|
|
@@ -219,10 +219,10 @@ fn replacement_to_json_value(repl: &Replacement) -> serde_json::Value {
|
|
|
219
219
|
return serde_json::Value::Null;
|
|
220
220
|
}
|
|
221
221
|
if repl.force_quoted {
|
|
222
|
-
return serde_json::Value::String(repl.value.
|
|
222
|
+
return serde_json::Value::String(repl.value.as_ref().to_string());
|
|
223
223
|
}
|
|
224
|
-
serde_json::from_str(
|
|
225
|
-
.unwrap_or_else(|_| serde_json::Value::String(repl.value.
|
|
224
|
+
serde_json::from_str(repl.value.as_ref())
|
|
225
|
+
.unwrap_or_else(|_| serde_json::Value::String(repl.value.as_ref().to_string()))
|
|
226
226
|
}
|
|
227
227
|
|
|
228
228
|
/// When rewriting JSON at a path, map `Replacement` back into [`serde_json::Value`] while keeping
|
|
@@ -238,11 +238,11 @@ fn coerce_json_path_replacement(
|
|
|
238
238
|
}
|
|
239
239
|
match original {
|
|
240
240
|
serde_json::Value::Bool(_) => {
|
|
241
|
-
if let Some(b) = parse_loose_json_bool(
|
|
241
|
+
if let Some(b) = parse_loose_json_bool(repl.value.as_ref()) {
|
|
242
242
|
return serde_json::Value::Bool(b);
|
|
243
243
|
}
|
|
244
244
|
if !repl.force_quoted {
|
|
245
|
-
if let Ok(v) = serde_json::from_str::<serde_json::Value>(
|
|
245
|
+
if let Ok(v) = serde_json::from_str::<serde_json::Value>(repl.value.as_ref()) {
|
|
246
246
|
match v {
|
|
247
247
|
serde_json::Value::Bool(b) => return serde_json::Value::Bool(b),
|
|
248
248
|
serde_json::Value::Number(n) => {
|
|
@@ -257,27 +257,27 @@ fn coerce_json_path_replacement(
|
|
|
257
257
|
}
|
|
258
258
|
}
|
|
259
259
|
}
|
|
260
|
-
serde_json::Value::String(repl.value.
|
|
260
|
+
serde_json::Value::String(repl.value.as_ref().to_string())
|
|
261
261
|
}
|
|
262
262
|
serde_json::Value::Number(_) => {
|
|
263
|
-
if let Some(n) = parse_loose_json_number(
|
|
263
|
+
if let Some(n) = parse_loose_json_number(repl.value.as_ref()) {
|
|
264
264
|
return serde_json::Value::Number(n);
|
|
265
265
|
}
|
|
266
266
|
if !repl.force_quoted {
|
|
267
267
|
if let Ok(serde_json::Value::Number(n)) =
|
|
268
|
-
serde_json::from_str::<serde_json::Value>(
|
|
268
|
+
serde_json::from_str::<serde_json::Value>(repl.value.as_ref())
|
|
269
269
|
{
|
|
270
270
|
return serde_json::Value::Number(n);
|
|
271
271
|
}
|
|
272
272
|
}
|
|
273
|
-
serde_json::Value::String(repl.value.
|
|
273
|
+
serde_json::Value::String(repl.value.as_ref().to_string())
|
|
274
274
|
}
|
|
275
275
|
serde_json::Value::String(_) => {
|
|
276
276
|
if repl.force_quoted {
|
|
277
|
-
serde_json::Value::String(repl.value.
|
|
277
|
+
serde_json::Value::String(repl.value.as_ref().to_string())
|
|
278
278
|
} else {
|
|
279
|
-
serde_json::from_str(
|
|
280
|
-
.unwrap_or_else(|_| serde_json::Value::String(repl.value.
|
|
279
|
+
serde_json::from_str(repl.value.as_ref())
|
|
280
|
+
.unwrap_or_else(|_| serde_json::Value::String(repl.value.as_ref().to_string()))
|
|
281
281
|
}
|
|
282
282
|
}
|
|
283
283
|
serde_json::Value::Null => replacement_to_json_value(repl),
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
use std::fs::File;
|
|
2
2
|
use std::io::{self, BufRead, BufReader, BufWriter, Write};
|
|
3
|
+
use std::sync::atomic::Ordering;
|
|
3
4
|
|
|
4
5
|
/// Larger than default 8 KiB to reduce syscall overhead on big dumps.
|
|
5
6
|
const IO_BUF_CAPACITY: usize = 256 * 1024;
|
|
6
7
|
use std::path::{Path, PathBuf};
|
|
7
8
|
use std::process::{Command, Stdio};
|
|
9
|
+
use std::time::Instant;
|
|
8
10
|
|
|
9
11
|
use clap::{ArgAction, Parser, Subcommand};
|
|
10
12
|
|
|
@@ -13,6 +15,7 @@ mod filter;
|
|
|
13
15
|
mod lint;
|
|
14
16
|
mod report;
|
|
15
17
|
mod scan;
|
|
18
|
+
mod seal;
|
|
16
19
|
mod settings;
|
|
17
20
|
mod sql;
|
|
18
21
|
mod transform;
|
|
@@ -21,9 +24,16 @@ use anyhow::Context;
|
|
|
21
24
|
use regex::Regex;
|
|
22
25
|
use report::Reporter;
|
|
23
26
|
use scan::{OutputScanner, ScanningWriter};
|
|
27
|
+
use seal::{
|
|
28
|
+
compute_seal_digest, format_seal_line, read_first_line_for_seal, FirstLineReplayBufRead,
|
|
29
|
+
SealFirstLine, SealRuntimeParams,
|
|
30
|
+
};
|
|
24
31
|
use settings::ResolvedConfig;
|
|
25
32
|
use sql::{DumpFormat, SqlStreamProcessor};
|
|
26
|
-
use transform::{
|
|
33
|
+
use transform::{
|
|
34
|
+
prng_seed_override_for_fingerprint, set_hardened_profile, set_random_seed, AnonymizerRegistry,
|
|
35
|
+
SecurityProfile,
|
|
36
|
+
};
|
|
27
37
|
|
|
28
38
|
#[derive(Parser, Debug)]
|
|
29
39
|
#[command(
|
|
@@ -290,6 +300,13 @@ fn run_anonymize(cli: Cli) -> anyhow::Result<()> {
|
|
|
290
300
|
let include_res = compile_patterns(&cli.include_table)?;
|
|
291
301
|
let exclude_res = compile_patterns(&cli.exclude_table)?;
|
|
292
302
|
|
|
303
|
+
let seal_runtime = SealRuntimeParams::new(
|
|
304
|
+
dump_format,
|
|
305
|
+
&cli.include_table,
|
|
306
|
+
&cli.exclude_table,
|
|
307
|
+
prng_seed_override_for_fingerprint(),
|
|
308
|
+
);
|
|
309
|
+
|
|
293
310
|
// Determine IO (optional pg_restore child when --dump-decode)
|
|
294
311
|
let mut pg_restore_child: Option<std::process::Child> = None;
|
|
295
312
|
let (mut reader, input_path_for_inplace): (Box<dyn BufRead>, Option<PathBuf>) = if cli
|
|
@@ -429,7 +446,6 @@ fn run_anonymize(cli: Cli) -> anyhow::Result<()> {
|
|
|
429
446
|
.unwrap_or_else(|| Reporter::new(false));
|
|
430
447
|
reporter.report.security_profile = security_profile_name.to_string();
|
|
431
448
|
|
|
432
|
-
// Process SQL stream
|
|
433
449
|
let mut processor = SqlStreamProcessor::new(
|
|
434
450
|
anonymizers,
|
|
435
451
|
resolved_config,
|
|
@@ -438,12 +454,65 @@ fn run_anonymize(cli: Cli) -> anyhow::Result<()> {
|
|
|
438
454
|
Some(&mut reporter),
|
|
439
455
|
dump_format,
|
|
440
456
|
);
|
|
457
|
+
|
|
458
|
+
let seal_digest = if cli.check {
|
|
459
|
+
None
|
|
460
|
+
} else {
|
|
461
|
+
Some(compute_seal_digest(
|
|
462
|
+
processor.config_snapshot(),
|
|
463
|
+
security_profile_name,
|
|
464
|
+
&seal_runtime,
|
|
465
|
+
)?)
|
|
466
|
+
};
|
|
467
|
+
|
|
441
468
|
let mut writer = output;
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
469
|
+
|
|
470
|
+
let seal_first = read_first_line_for_seal(
|
|
471
|
+
reader.as_mut(),
|
|
472
|
+
processor.config_snapshot(),
|
|
473
|
+
security_profile_name,
|
|
474
|
+
&seal_runtime,
|
|
475
|
+
)?;
|
|
476
|
+
|
|
477
|
+
if matches!(seal_first, SealFirstLine::TrustedPassthrough) && cli.strict_coverage {
|
|
478
|
+
anyhow::bail!(
|
|
479
|
+
"--strict-coverage cannot be used when the input begins with a matching seal; \
|
|
480
|
+
the dump is passed through without parsing table definitions"
|
|
481
|
+
);
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
let replay_first = match &seal_first {
|
|
485
|
+
SealFirstLine::TrustedPassthrough | SealFirstLine::StaleSealStripped => None,
|
|
486
|
+
SealFirstLine::Replay(v) if v.is_empty() => None,
|
|
487
|
+
SealFirstLine::Replay(v) => Some(v.clone()),
|
|
488
|
+
};
|
|
489
|
+
let mut adapted_reader = FirstLineReplayBufRead::new(reader.as_mut(), replay_first);
|
|
490
|
+
|
|
491
|
+
let run_started = Instant::now();
|
|
492
|
+
let proc_res: anyhow::Result<()> = if matches!(seal_first, SealFirstLine::TrustedPassthrough) {
|
|
493
|
+
if let Some(ref digest) = seal_digest {
|
|
494
|
+
writer.write_all(format_seal_line(security_profile_name, digest).as_bytes())?;
|
|
495
|
+
}
|
|
496
|
+
if let Some(scanner) = output_scanner.as_mut() {
|
|
497
|
+
let mut scanning_writer = ScanningWriter::new(&mut writer, scanner);
|
|
498
|
+
std::io::copy(&mut adapted_reader, &mut scanning_writer)
|
|
499
|
+
.map(|_| ())
|
|
500
|
+
.map_err(anyhow::Error::from)
|
|
501
|
+
} else {
|
|
502
|
+
std::io::copy(&mut adapted_reader, &mut writer)
|
|
503
|
+
.map(|_| ())
|
|
504
|
+
.map_err(anyhow::Error::from)
|
|
505
|
+
}
|
|
445
506
|
} else {
|
|
446
|
-
|
|
507
|
+
if let Some(ref digest) = seal_digest {
|
|
508
|
+
writer.write_all(format_seal_line(security_profile_name, digest).as_bytes())?;
|
|
509
|
+
}
|
|
510
|
+
if let Some(scanner) = output_scanner.as_mut() {
|
|
511
|
+
let mut scanning_writer = ScanningWriter::new(&mut writer, scanner);
|
|
512
|
+
processor.process(&mut adapted_reader, &mut scanning_writer)
|
|
513
|
+
} else {
|
|
514
|
+
processor.process(&mut adapted_reader, &mut writer)
|
|
515
|
+
}
|
|
447
516
|
};
|
|
448
517
|
|
|
449
518
|
if let Some(mut child) = pg_restore_child {
|
|
@@ -509,11 +578,23 @@ fn run_anonymize(cli: Cli) -> anyhow::Result<()> {
|
|
|
509
578
|
|
|
510
579
|
// Emit stats or report if requested
|
|
511
580
|
if cli.stats {
|
|
581
|
+
let elapsed_ms = run_started.elapsed().as_millis();
|
|
582
|
+
let domain_hits = processor
|
|
583
|
+
.anonymizers()
|
|
584
|
+
.domain_cache_hits
|
|
585
|
+
.load(Ordering::Relaxed);
|
|
586
|
+
let domain_misses = processor
|
|
587
|
+
.anonymizers()
|
|
588
|
+
.domain_cache_misses
|
|
589
|
+
.load(Ordering::Relaxed);
|
|
512
590
|
eprintln!(
|
|
513
|
-
"dumpling: rows processed={}, rows dropped={}, cells changed={}",
|
|
591
|
+
"dumpling: rows processed={}, rows dropped={}, cells changed={}, wall_ms={}, domain_cache_hits={}, domain_cache_misses={}",
|
|
514
592
|
reporter.report.total_rows_processed,
|
|
515
593
|
reporter.report.total_rows_dropped,
|
|
516
|
-
reporter.report.total_cells_changed
|
|
594
|
+
reporter.report.total_cells_changed,
|
|
595
|
+
elapsed_ms,
|
|
596
|
+
domain_hits,
|
|
597
|
+
domain_misses
|
|
517
598
|
);
|
|
518
599
|
}
|
|
519
600
|
if let Some(path) = cli.report.as_ref() {
|
|
@@ -591,7 +672,108 @@ fn has_allowed_extension(path: &Path, allow_exts: &[String]) -> bool {
|
|
|
591
672
|
mod tests_main {
|
|
592
673
|
use super::{has_allowed_extension, Cli, Commands};
|
|
593
674
|
use clap::Parser;
|
|
675
|
+
use std::fs;
|
|
676
|
+
use std::io::Read;
|
|
594
677
|
use std::path::PathBuf;
|
|
678
|
+
use std::process::Command;
|
|
679
|
+
|
|
680
|
+
#[test]
|
|
681
|
+
fn seal_emit_then_trust_roundtrip() {
|
|
682
|
+
let exe = match option_env!("CARGO_BIN_EXE_dumpling") {
|
|
683
|
+
Some(p) => PathBuf::from(p),
|
|
684
|
+
None => return,
|
|
685
|
+
};
|
|
686
|
+
let base =
|
|
687
|
+
std::env::temp_dir().join(format!("dumpling_seal_integration_{}", std::process::id()));
|
|
688
|
+
let conf = base.with_extension("toml");
|
|
689
|
+
let pass1_in = base.with_extension("p1.sql");
|
|
690
|
+
let pass1_out = base.with_extension("p2.sql");
|
|
691
|
+
let pass2_out = base.with_extension("p3.sql");
|
|
692
|
+
|
|
693
|
+
fs::write(
|
|
694
|
+
&conf,
|
|
695
|
+
r#"
|
|
696
|
+
[rules."public.users"]
|
|
697
|
+
email = { strategy = "email" }
|
|
698
|
+
"#,
|
|
699
|
+
)
|
|
700
|
+
.unwrap();
|
|
701
|
+
fs::write(
|
|
702
|
+
&pass1_in,
|
|
703
|
+
"INSERT INTO public.users (email) VALUES ('alice@example.com');\n",
|
|
704
|
+
)
|
|
705
|
+
.unwrap();
|
|
706
|
+
|
|
707
|
+
let s1 = Command::new(&exe)
|
|
708
|
+
.args([
|
|
709
|
+
"-c",
|
|
710
|
+
conf.to_str().unwrap(),
|
|
711
|
+
"-i",
|
|
712
|
+
pass1_in.to_str().unwrap(),
|
|
713
|
+
"-o",
|
|
714
|
+
pass1_out.to_str().unwrap(),
|
|
715
|
+
"--seed",
|
|
716
|
+
"42",
|
|
717
|
+
])
|
|
718
|
+
.output()
|
|
719
|
+
.unwrap();
|
|
720
|
+
assert!(
|
|
721
|
+
s1.status.success(),
|
|
722
|
+
"pass1 stderr={}",
|
|
723
|
+
String::from_utf8_lossy(&s1.stderr)
|
|
724
|
+
);
|
|
725
|
+
|
|
726
|
+
let mut sealed = String::new();
|
|
727
|
+
fs::File::open(&pass1_out)
|
|
728
|
+
.unwrap()
|
|
729
|
+
.read_to_string(&mut sealed)
|
|
730
|
+
.unwrap();
|
|
731
|
+
let first = sealed.lines().next().unwrap_or("");
|
|
732
|
+
assert!(
|
|
733
|
+
first.starts_with("-- dumpling-seal:"),
|
|
734
|
+
"expected seal prefix, got: {first:?}"
|
|
735
|
+
);
|
|
736
|
+
assert!(
|
|
737
|
+
!sealed.contains("alice@example.com"),
|
|
738
|
+
"expected anonymization in pass1"
|
|
739
|
+
);
|
|
740
|
+
|
|
741
|
+
let s2 = Command::new(&exe)
|
|
742
|
+
.args([
|
|
743
|
+
"-c",
|
|
744
|
+
conf.to_str().unwrap(),
|
|
745
|
+
"-i",
|
|
746
|
+
pass1_out.to_str().unwrap(),
|
|
747
|
+
"-o",
|
|
748
|
+
pass2_out.to_str().unwrap(),
|
|
749
|
+
"--seed",
|
|
750
|
+
"42",
|
|
751
|
+
])
|
|
752
|
+
.output()
|
|
753
|
+
.unwrap();
|
|
754
|
+
assert!(
|
|
755
|
+
s2.status.success(),
|
|
756
|
+
"pass2 stderr={}",
|
|
757
|
+
String::from_utf8_lossy(&s2.stderr)
|
|
758
|
+
);
|
|
759
|
+
|
|
760
|
+
let mut final_out = String::new();
|
|
761
|
+
fs::File::open(&pass2_out)
|
|
762
|
+
.unwrap()
|
|
763
|
+
.read_to_string(&mut final_out)
|
|
764
|
+
.unwrap();
|
|
765
|
+
let rest_mid: String = sealed.lines().skip(1).collect::<Vec<_>>().join("\n");
|
|
766
|
+
let rest_out: String = final_out.lines().skip(1).collect::<Vec<_>>().join("\n");
|
|
767
|
+
assert_eq!(
|
|
768
|
+
rest_mid, rest_out,
|
|
769
|
+
"trusted pass-through should preserve dump body after seal line"
|
|
770
|
+
);
|
|
771
|
+
|
|
772
|
+
let _ = fs::remove_file(&conf);
|
|
773
|
+
let _ = fs::remove_file(&pass1_in);
|
|
774
|
+
let _ = fs::remove_file(&pass1_out);
|
|
775
|
+
let _ = fs::remove_file(&pass2_out);
|
|
776
|
+
}
|
|
595
777
|
|
|
596
778
|
#[test]
|
|
597
779
|
fn test_allowed_extensions() {
|