dumpling-cli 0.3.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/CHANGELOG.md +13 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/Cargo.lock +1 -1
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/Cargo.toml +1 -1
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/PKG-INFO +12 -3
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/README.md +11 -2
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/docs/src/configuration.md +23 -1
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/docs/src/getting-started.md +2 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/docs/src/index.md +1 -1
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/pyproject.toml +1 -1
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/src/main.rs +177 -26
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/.dumplingconf.example +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/.github/workflows/ci.yml +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/.github/workflows/docs-pr.yml +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/.github/workflows/docs.yml +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/.github/workflows/platform-compat-latest.yml +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/.github/workflows/platform-compat-matrix.yml +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/.github/workflows/policy-lint.yml +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/.github/workflows/publish.yml +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/.github/workflows/release.yml +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/.github/workflows/tests.yml +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/.gitignore +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/AGENTS.md +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/CONTRIBUTING.md +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/MAINTENANCE.md +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/book.toml +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/datetime_out.sql +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/datetime_sample.sql +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/docs/src/SUMMARY.md +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/docs/src/ci-guardrails.md +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/docs/src/releasing.md +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/rust-toolchain.toml +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/scripts/setup-dev.sh +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/src/faker_dispatch.rs +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/src/filter.rs +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/src/lint.rs +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/src/report.rs +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/src/scan.rs +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/src/settings.rs +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/src/sql.rs +0 -0
- {dumpling_cli-0.3.0 → dumpling_cli-0.4.0}/src/transform.rs +0 -0
|
@@ -7,6 +7,18 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.4.0] - 2026-05-02
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- **`--dump-decode` CLI**: Decode PostgreSQL **custom-format** (`pg_dump -Fc`) or **directory-format** archives by running **`pg_restore -f -`** (plain SQL to stdout, no database), then anonymize—built for workflows such as **`heroku pg:backups:download`**. Requires PostgreSQL client tools (`pg_restore` on `PATH`, or **`--pg-restore-path`**).
|
|
15
|
+
- **`--dump-decode-arg`** (repeatable): Extra arguments forwarded to `pg_restore`.
|
|
16
|
+
- **`--dump-decode-keep-input`**: Keep the archive after a successful run. **By default** the `--input` path is **removed** after success so only anonymized output remains. **`--check`** with **`--dump-decode`** requires **`--dump-decode-keep-input`** (otherwise the dump would be deleted before config iteration).
|
|
17
|
+
|
|
18
|
+
### Changed
|
|
19
|
+
|
|
20
|
+
- README and mdBook documentation for PostgreSQL archive decoding and Heroku-style examples.
|
|
21
|
+
|
|
10
22
|
## [0.3.0] - 2026-05-02
|
|
11
23
|
|
|
12
24
|
### Added
|
|
@@ -43,5 +55,6 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
|
|
|
43
55
|
- Configurable output scan severities and per-category thresholds via `[output_scan]`.
|
|
44
56
|
- JSON report section for output scan findings including category, count, threshold, severity, and sample locations.
|
|
45
57
|
|
|
58
|
+
[0.4.0]: https://github.com/ababic/dumpling/compare/v0.3.0...v0.4.0
|
|
46
59
|
[0.3.0]: https://github.com/ababic/dumpling/compare/v0.2.0...v0.3.0
|
|
47
60
|
[0.2.0]: https://github.com/ababic/dumpling/compare/v0.1.0...v0.2.0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dumpling-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Classifier: Development Status :: 4 - Beta
|
|
5
5
|
Classifier: Environment :: Console
|
|
6
6
|
Classifier: Intended Audience :: Developers
|
|
@@ -289,7 +289,16 @@ Produced by `pg_dump --format=plain`. Handles:
|
|
|
289
289
|
- `"double-quoted"` identifiers
|
|
290
290
|
- `''`-escaped string literals
|
|
291
291
|
|
|
292
|
-
Binary, custom, and directory formats from `pg_dump` are not
|
|
292
|
+
Binary, custom, and directory formats from `pg_dump` are not parsed directly — Dumpling’s SQL pipeline expects plain text. Use either:
|
|
293
|
+
|
|
294
|
+
- **`pg_dump --format=plain`** when you control capture, or
|
|
295
|
+
- **`dumpling --dump-decode`** with `--input` set to a **custom-format** (`.dump`) or **directory-format** folder: Dumpling runs `pg_restore -f -` and streams the resulting SQL (same as a manual `pg_restore` “script” output, no database required). Requires PostgreSQL client tools on `PATH` (`pg_restore`), or set `--pg-restore-path`. Use `--dump-decode-arg` to pass extra flags (e.g. `--no-owner --no-acl`). **By default** the archive is removed after a fully successful run; pass **`--dump-decode-keep-input`** to retain it. **`--check`** requires **`--dump-decode-keep-input`** so the archive still exists if changes would be detected.
|
|
296
|
+
|
|
297
|
+
Example (e.g. after `heroku pg:backups:download`):
|
|
298
|
+
|
|
299
|
+
```bash
|
|
300
|
+
dumpling --dump-decode -i latest.dump -c .dumplingconf -o anonymized.sql
|
|
301
|
+
```
|
|
293
302
|
|
|
294
303
|
### SQLite (`--format sqlite`)
|
|
295
304
|
|
|
@@ -477,5 +486,5 @@ See the [CI guardrails documentation](docs/src/ci-guardrails.md) for full pipeli
|
|
|
477
486
|
|
|
478
487
|
## Full documentation
|
|
479
488
|
|
|
480
|
-
Detailed docs, including the configuration reference and release process, are available at the project's [GitHub Pages site](https://github.
|
|
489
|
+
Detailed docs, including the configuration reference and release process, are available at the project's [GitHub Pages site](https://ababic.github.io/dumpling/) (built from `docs/src/`).
|
|
481
490
|
|
|
@@ -268,7 +268,16 @@ Produced by `pg_dump --format=plain`. Handles:
|
|
|
268
268
|
- `"double-quoted"` identifiers
|
|
269
269
|
- `''`-escaped string literals
|
|
270
270
|
|
|
271
|
-
Binary, custom, and directory formats from `pg_dump` are not
|
|
271
|
+
Binary, custom, and directory formats from `pg_dump` are not parsed directly — Dumpling’s SQL pipeline expects plain text. Use either:
|
|
272
|
+
|
|
273
|
+
- **`pg_dump --format=plain`** when you control capture, or
|
|
274
|
+
- **`dumpling --dump-decode`** with `--input` set to a **custom-format** (`.dump`) or **directory-format** folder: Dumpling runs `pg_restore -f -` and streams the resulting SQL (same as a manual `pg_restore` “script” output, no database required). Requires PostgreSQL client tools on `PATH` (`pg_restore`), or set `--pg-restore-path`. Use `--dump-decode-arg` to pass extra flags (e.g. `--no-owner --no-acl`). **By default** the archive is removed after a fully successful run; pass **`--dump-decode-keep-input`** to retain it. **`--check`** requires **`--dump-decode-keep-input`** so the archive still exists if changes would be detected.
|
|
275
|
+
|
|
276
|
+
Example (e.g. after `heroku pg:backups:download`):
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
dumpling --dump-decode -i latest.dump -c .dumplingconf -o anonymized.sql
|
|
280
|
+
```
|
|
272
281
|
|
|
273
282
|
### SQLite (`--format sqlite`)
|
|
274
283
|
|
|
@@ -456,4 +465,4 @@ See the [CI guardrails documentation](docs/src/ci-guardrails.md) for full pipeli
|
|
|
456
465
|
|
|
457
466
|
## Full documentation
|
|
458
467
|
|
|
459
|
-
Detailed docs, including the configuration reference and release process, are available at the project's [GitHub Pages site](https://github.
|
|
468
|
+
Detailed docs, including the configuration reference and release process, are available at the project's [GitHub Pages site](https://ababic.github.io/dumpling/) (built from `docs/src/`).
|
|
@@ -6,7 +6,7 @@ Use `--format` to declare the SQL dialect of your input file:
|
|
|
6
6
|
|
|
7
7
|
| Value | Description |
|
|
8
8
|
|---|---|
|
|
9
|
-
| `postgres` (default) | PostgreSQL `pg_dump` plain-text format. Supports `COPY … FROM stdin` blocks, `"double-quoted"` identifiers, `''`-escaped strings. |
|
|
9
|
+
| `postgres` (default) | PostgreSQL `pg_dump` plain-text format. Supports `COPY … FROM stdin` blocks, `"double-quoted"` identifiers, `''`-escaped strings. Custom-format (`-Fc`) or directory dumps can be decoded on the fly with `dumpling --dump-decode` (wraps `pg_restore -f -`; requires client tools). By default the archive is deleted after success; use `--dump-decode-keep-input` to retain it. |
|
|
10
10
|
| `sqlite` | SQLite `.dump` format. Adds `INSERT OR REPLACE INTO` / `INSERT OR IGNORE INTO` support. No COPY blocks. |
|
|
11
11
|
| `mssql` | SQL Server / MSSQL plain SQL. Adds `[bracket]` identifier quoting, `N'…'` Unicode string literals, and `nvarchar(n)` / `nchar(n)` length extraction. No COPY blocks. |
|
|
12
12
|
|
|
@@ -17,6 +17,28 @@ dumpling --format sqlite -i data.db.sql -o anonymized.sql
|
|
|
17
17
|
dumpling --format mssql -i backup.sql -o anonymized.sql
|
|
18
18
|
```
|
|
19
19
|
|
|
20
|
+
### PostgreSQL custom-format archives (`--dump-decode`)
|
|
21
|
+
|
|
22
|
+
Heroku PGBackups and many pipelines ship **`pg_dump` custom format** (`-Fc`) or **directory-format** dumps to save bandwidth. Dumpling’s SQL engine still expects **plain text**; use **`--dump-decode`** so Dumpling runs **`pg_restore -f -`** (script to stdout, no database) and pipes the result through the same anonymizer as a normal plain-SQL file.
|
|
23
|
+
|
|
24
|
+
**Requirements:** PostgreSQL client tools on `PATH` (`pg_restore`), or set **`--pg-restore-path`**. Use **`--dump-decode-arg`** (repeatable) for extra `pg_restore` flags, e.g. `--dump-decode-arg=--no-owner --dump-decode-arg=--no-acl`.
|
|
25
|
+
|
|
26
|
+
**Input deletion:** After a **fully successful** run, Dumpling **removes** the `--input` path (single file or directory-format folder) by default so only the anonymized output remains. Pass **`--dump-decode-keep-input`** to retain the archive.
|
|
27
|
+
|
|
28
|
+
**Check mode:** **`--check`** with **`--dump-decode`** requires **`--dump-decode-keep-input`**. Otherwise the default would delete the dump before you can iterate on config.
|
|
29
|
+
|
|
30
|
+
Example (e.g. after `heroku pg:backups:download`):
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
dumpling --dump-decode -i latest.dump -c .dumplingconf -o anonymized.sql
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Dry run while keeping the downloaded file:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
dumpling --dump-decode --dump-decode-keep-input --check -i latest.dump -c .dumplingconf
|
|
40
|
+
```
|
|
41
|
+
|
|
20
42
|
---
|
|
21
43
|
|
|
22
44
|
## Configuration sources
|
|
@@ -28,4 +28,6 @@ cargo test --all-targets --all-features
|
|
|
28
28
|
dumpling -i dump.sql -o sanitized.sql
|
|
29
29
|
```
|
|
30
30
|
|
|
31
|
+
If your input is a PostgreSQL **custom-format** file (not plain SQL), decode and anonymize in one step with **`--dump-decode`** (needs `pg_restore` from PostgreSQL client tools). See [PostgreSQL custom-format archives](configuration.md#postgresql-custom-format-archives---dump-decode) in the configuration guide.
|
|
32
|
+
|
|
31
33
|
For full command examples and strategy options, see the repository `README.md`.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Dumpling documentation
|
|
2
2
|
|
|
3
|
-
Dumpling is a streaming anonymizer for plain SQL dumps. It supports PostgreSQL (`pg_dump` plain format), SQLite (`.dump`), and SQL Server / MSSQL (SSMS / mssql-scripter plain SQL output).
|
|
3
|
+
Dumpling is a streaming anonymizer for plain SQL dumps. It supports PostgreSQL (`pg_dump` plain format), SQLite (`.dump`), and SQL Server / MSSQL (SSMS / mssql-scripter plain SQL output). For PostgreSQL **custom-format** archives (e.g. Heroku `pg:backups:download`), use **`--dump-decode`** so Dumpling invokes `pg_restore` and streams plain SQL—see [Dump format](configuration.html#postgresql-custom-format-archives---dump-decode) in the configuration guide.
|
|
4
4
|
|
|
5
5
|
This documentation covers the operating model for day-to-day use:
|
|
6
6
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
use std::fs::File;
|
|
2
2
|
use std::io::{self, BufRead, BufReader, BufWriter, Write};
|
|
3
3
|
use std::path::{Path, PathBuf};
|
|
4
|
+
use std::process::{Command, Stdio};
|
|
4
5
|
|
|
5
6
|
use clap::{ArgAction, Parser, Subcommand};
|
|
6
7
|
|
|
@@ -13,6 +14,7 @@ mod settings;
|
|
|
13
14
|
mod sql;
|
|
14
15
|
mod transform;
|
|
15
16
|
|
|
17
|
+
use anyhow::Context;
|
|
16
18
|
use regex::Regex;
|
|
17
19
|
use report::Reporter;
|
|
18
20
|
use scan::{OutputScanner, ScanningWriter};
|
|
@@ -105,6 +107,26 @@ struct Cli {
|
|
|
105
107
|
#[arg(long = "security-profile", default_value = "standard")]
|
|
106
108
|
security_profile: String,
|
|
107
109
|
|
|
110
|
+
/// Decode PostgreSQL custom-format or directory-format dumps via `pg_restore -f -` before anonymizing.
|
|
111
|
+
/// Requires `--input` pointing at the archive file or directory and `--format postgres`. Requires a
|
|
112
|
+
/// PostgreSQL client install (`pg_restore` on PATH unless overridden by `--pg-restore-path`).
|
|
113
|
+
#[arg(long = "dump-decode", action = ArgAction::SetTrue)]
|
|
114
|
+
dump_decode: bool,
|
|
115
|
+
|
|
116
|
+
/// Keep the input archive after `--dump-decode` (default: delete file or directory after a fully
|
|
117
|
+
/// successful run). Cannot retain the archive with `--check` (would delete before verifying changes).
|
|
118
|
+
#[arg(long = "dump-decode-keep-input", action = ArgAction::SetTrue)]
|
|
119
|
+
dump_decode_keep_input: bool,
|
|
120
|
+
|
|
121
|
+
/// `pg_restore` executable to use with `--dump-decode` (default: `pg_restore` on PATH).
|
|
122
|
+
#[arg(long = "pg-restore-path", default_value = "pg_restore")]
|
|
123
|
+
pg_restore_path: PathBuf,
|
|
124
|
+
|
|
125
|
+
/// Extra arguments forwarded to `pg_restore` before the archive path (repeatable). Example:
|
|
126
|
+
/// `--dump-decode-arg=--no-owner` `--dump-decode-arg=--no-acl`
|
|
127
|
+
#[arg(long = "dump-decode-arg")]
|
|
128
|
+
dump_decode_arg: Vec<String>,
|
|
129
|
+
|
|
108
130
|
#[command(subcommand)]
|
|
109
131
|
command: Option<Commands>,
|
|
110
132
|
}
|
|
@@ -184,6 +206,14 @@ fn run_anonymize(cli: Cli) -> anyhow::Result<()> {
|
|
|
184
206
|
if cli.check && (cli.in_place || cli.output.is_some()) {
|
|
185
207
|
anyhow::bail!("--check cannot be used together with --output or --in-place");
|
|
186
208
|
}
|
|
209
|
+
if cli.dump_decode && !cli.dump_decode_keep_input && cli.check {
|
|
210
|
+
anyhow::bail!(
|
|
211
|
+
"--dump-decode removes the input archive on success by default; use --dump-decode-keep-input with --check"
|
|
212
|
+
);
|
|
213
|
+
}
|
|
214
|
+
if cli.dump_decode && cli.in_place {
|
|
215
|
+
anyhow::bail!("--dump-decode cannot be used with --in-place");
|
|
216
|
+
}
|
|
187
217
|
|
|
188
218
|
// Resolve config from provided path or discover in CWD
|
|
189
219
|
let resolved_config: ResolvedConfig =
|
|
@@ -247,36 +277,97 @@ fn run_anonymize(cli: Cli) -> anyhow::Result<()> {
|
|
|
247
277
|
other
|
|
248
278
|
),
|
|
249
279
|
};
|
|
280
|
+
if cli.dump_decode && dump_format != DumpFormat::Postgres {
|
|
281
|
+
anyhow::bail!(
|
|
282
|
+
"--dump-decode only applies to PostgreSQL dumps; use --format postgres (default)"
|
|
283
|
+
);
|
|
284
|
+
}
|
|
250
285
|
|
|
251
286
|
// Compile table include/exclude regex patterns
|
|
252
287
|
let include_res = compile_patterns(&cli.include_table)?;
|
|
253
288
|
let exclude_res = compile_patterns(&cli.exclude_table)?;
|
|
254
289
|
|
|
255
|
-
// Determine IO
|
|
256
|
-
let
|
|
290
|
+
// Determine IO (optional pg_restore child when --dump-decode)
|
|
291
|
+
let mut pg_restore_child: Option<std::process::Child> = None;
|
|
292
|
+
let (mut reader, input_path_for_inplace): (Box<dyn BufRead>, Option<PathBuf>) = if cli
|
|
293
|
+
.dump_decode
|
|
257
294
|
{
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
)
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
295
|
+
let archive_path = cli
|
|
296
|
+
.input
|
|
297
|
+
.as_ref()
|
|
298
|
+
.ok_or_else(|| {
|
|
299
|
+
anyhow::anyhow!(
|
|
300
|
+
"--dump-decode requires --input pointing at a pg_dump custom-format file or directory-format directory"
|
|
301
|
+
)
|
|
302
|
+
})?;
|
|
303
|
+
if !cli.allow_ext.is_empty() && !has_allowed_extension(archive_path, &cli.allow_ext) {
|
|
304
|
+
let actual = archive_path
|
|
305
|
+
.extension()
|
|
306
|
+
.and_then(|s| s.to_str())
|
|
307
|
+
.unwrap_or("<none>")
|
|
308
|
+
.to_string();
|
|
309
|
+
anyhow::bail!(
|
|
310
|
+
"input file extension '{}' is not in allowed set {:?}",
|
|
311
|
+
actual,
|
|
312
|
+
cli.allow_ext
|
|
313
|
+
);
|
|
314
|
+
}
|
|
315
|
+
if !archive_path.exists() {
|
|
316
|
+
anyhow::bail!(
|
|
317
|
+
"--dump-decode input path does not exist: {}",
|
|
318
|
+
archive_path.display()
|
|
319
|
+
);
|
|
320
|
+
}
|
|
321
|
+
eprintln!(
|
|
322
|
+
"dumpling: decoding PostgreSQL archive via {} -f - {}",
|
|
323
|
+
cli.pg_restore_path.display(),
|
|
324
|
+
archive_path.display()
|
|
325
|
+
);
|
|
326
|
+
let mut cmd = Command::new(&cli.pg_restore_path);
|
|
327
|
+
for a in &cli.dump_decode_arg {
|
|
328
|
+
cmd.arg(a);
|
|
274
329
|
}
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
330
|
+
cmd.arg("-f")
|
|
331
|
+
.arg("-")
|
|
332
|
+
.arg(archive_path)
|
|
333
|
+
.stdout(Stdio::piped())
|
|
334
|
+
.stderr(Stdio::inherit());
|
|
335
|
+
let mut child = cmd.spawn().with_context(|| {
|
|
336
|
+
format!(
|
|
337
|
+
"failed to spawn `{}`; install PostgreSQL client tools or set --pg-restore-path",
|
|
338
|
+
cli.pg_restore_path.display()
|
|
339
|
+
)
|
|
340
|
+
})?;
|
|
341
|
+
let stdout = child
|
|
342
|
+
.stdout
|
|
343
|
+
.take()
|
|
344
|
+
.ok_or_else(|| anyhow::anyhow!("pg_restore stdout missing"))?;
|
|
345
|
+
pg_restore_child = Some(child);
|
|
346
|
+
(Box::new(BufReader::new(stdout)), Some(archive_path.clone()))
|
|
347
|
+
} else {
|
|
348
|
+
match &cli.input {
|
|
349
|
+
Some(path) => {
|
|
350
|
+
if !cli.allow_ext.is_empty() && !has_allowed_extension(path, &cli.allow_ext) {
|
|
351
|
+
let actual = path
|
|
352
|
+
.extension()
|
|
353
|
+
.and_then(|s| s.to_str())
|
|
354
|
+
.unwrap_or("<none>")
|
|
355
|
+
.to_string();
|
|
356
|
+
anyhow::bail!(
|
|
357
|
+
"input file extension '{}' is not in allowed set {:?}",
|
|
358
|
+
actual,
|
|
359
|
+
cli.allow_ext
|
|
360
|
+
);
|
|
361
|
+
}
|
|
362
|
+
let f = File::open(path)?;
|
|
363
|
+
(Box::new(BufReader::new(f)), Some(path.clone()))
|
|
364
|
+
}
|
|
365
|
+
None => {
|
|
366
|
+
if !cli.allow_ext.is_empty() {
|
|
367
|
+
eprintln!("dumpling: --allow-ext provided but no --input file; extension check is ignored for stdin");
|
|
368
|
+
}
|
|
369
|
+
(Box::new(BufReader::new(io::stdin())), None)
|
|
278
370
|
}
|
|
279
|
-
(Box::new(BufReader::new(io::stdin())), None)
|
|
280
371
|
}
|
|
281
372
|
};
|
|
282
373
|
|
|
@@ -330,12 +421,30 @@ fn run_anonymize(cli: Cli) -> anyhow::Result<()> {
|
|
|
330
421
|
dump_format,
|
|
331
422
|
);
|
|
332
423
|
let mut writer = output;
|
|
333
|
-
if let Some(scanner) = output_scanner.as_mut() {
|
|
424
|
+
let proc_res = if let Some(scanner) = output_scanner.as_mut() {
|
|
334
425
|
let mut scanning_writer = ScanningWriter::new(&mut writer, scanner);
|
|
335
|
-
processor.process(&mut reader, &mut scanning_writer)
|
|
426
|
+
processor.process(&mut reader, &mut scanning_writer)
|
|
336
427
|
} else {
|
|
337
|
-
processor.process(&mut reader, &mut writer)
|
|
428
|
+
processor.process(&mut reader, &mut writer)
|
|
429
|
+
};
|
|
430
|
+
|
|
431
|
+
if let Some(mut child) = pg_restore_child {
|
|
432
|
+
if proc_res.is_err() {
|
|
433
|
+
let _ = child.kill();
|
|
434
|
+
}
|
|
435
|
+
let status = child
|
|
436
|
+
.wait()
|
|
437
|
+
.with_context(|| format!("waiting for `{}`", cli.pg_restore_path.display()))?;
|
|
438
|
+
if proc_res.is_ok() && !status.success() {
|
|
439
|
+
anyhow::bail!(
|
|
440
|
+
"`{}` exited with status {}",
|
|
441
|
+
cli.pg_restore_path.display(),
|
|
442
|
+
status
|
|
443
|
+
);
|
|
444
|
+
}
|
|
338
445
|
}
|
|
446
|
+
|
|
447
|
+
proc_res?;
|
|
339
448
|
let coverage = processor.sensitive_coverage_summary();
|
|
340
449
|
reporter.report.sensitive_columns_detected = coverage.detected.clone();
|
|
341
450
|
reporter.report.sensitive_columns_covered = coverage.covered.clone();
|
|
@@ -363,7 +472,10 @@ fn run_anonymize(cli: Cli) -> anyhow::Result<()> {
|
|
|
363
472
|
|
|
364
473
|
// If in-place, do the swap now
|
|
365
474
|
if cli.in_place {
|
|
366
|
-
let input_path = input_path_for_inplace
|
|
475
|
+
let input_path = input_path_for_inplace
|
|
476
|
+
.as_ref()
|
|
477
|
+
.ok_or_else(|| anyhow::anyhow!("--in-place requires an --input path"))?
|
|
478
|
+
.clone();
|
|
367
479
|
let mut tmp = input_path.clone();
|
|
368
480
|
tmp.set_extension("sql.dumpling.tmp");
|
|
369
481
|
writer.flush()?;
|
|
@@ -405,9 +517,30 @@ fn run_anonymize(cli: Cli) -> anyhow::Result<()> {
|
|
|
405
517
|
std::process::exit(1);
|
|
406
518
|
}
|
|
407
519
|
|
|
520
|
+
if cli.dump_decode && !cli.dump_decode_keep_input {
|
|
521
|
+
if let Some(ref p) = input_path_for_inplace {
|
|
522
|
+
match remove_pg_archive(p) {
|
|
523
|
+
Ok(()) => eprintln!("dumpling: removed input archive {}", p.display()),
|
|
524
|
+
Err(e) => eprintln!(
|
|
525
|
+
"dumpling: warning: could not remove input archive {}: {}",
|
|
526
|
+
p.display(),
|
|
527
|
+
e
|
|
528
|
+
),
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
|
|
408
533
|
Ok(())
|
|
409
534
|
}
|
|
410
535
|
|
|
536
|
+
fn remove_pg_archive(path: &Path) -> std::io::Result<()> {
|
|
537
|
+
if path.is_dir() {
|
|
538
|
+
std::fs::remove_dir_all(path)
|
|
539
|
+
} else {
|
|
540
|
+
std::fs::remove_file(path)
|
|
541
|
+
}
|
|
542
|
+
}
|
|
543
|
+
|
|
411
544
|
fn compile_patterns(patterns: &[String]) -> anyhow::Result<Vec<Regex>> {
|
|
412
545
|
let mut out = Vec::new();
|
|
413
546
|
for p in patterns {
|
|
@@ -494,6 +627,24 @@ mod tests_main {
|
|
|
494
627
|
}
|
|
495
628
|
}
|
|
496
629
|
|
|
630
|
+
#[test]
|
|
631
|
+
fn test_dump_decode_flags_parse() {
|
|
632
|
+
let cli = Cli::parse_from([
|
|
633
|
+
"dumpling",
|
|
634
|
+
"--dump-decode",
|
|
635
|
+
"--dump-decode-keep-input",
|
|
636
|
+
"--pg-restore-path",
|
|
637
|
+
"/usr/bin/pg_restore",
|
|
638
|
+
"--dump-decode-arg=--no-owner",
|
|
639
|
+
"-i",
|
|
640
|
+
"/tmp/latest.dump",
|
|
641
|
+
]);
|
|
642
|
+
assert!(cli.dump_decode);
|
|
643
|
+
assert!(cli.dump_decode_keep_input);
|
|
644
|
+
assert_eq!(cli.pg_restore_path, PathBuf::from("/usr/bin/pg_restore"));
|
|
645
|
+
assert_eq!(cli.dump_decode_arg, vec!["--no-owner"]);
|
|
646
|
+
}
|
|
647
|
+
|
|
497
648
|
#[test]
|
|
498
649
|
fn test_lint_policy_allow_noop_flag() {
|
|
499
650
|
let cli = Cli::parse_from(["dumpling", "lint-policy", "--allow-noop"]);
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|