dumpling-cli 0.4.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/CHANGELOG.md +7 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/Cargo.lock +1 -1
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/Cargo.toml +1 -1
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/PKG-INFO +55 -9
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/README.md +54 -8
- dumpling_cli-0.4.1/assets/logo.svg +33 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/pyproject.toml +1 -1
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/src/sql.rs +124 -12
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/.dumplingconf.example +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/.github/workflows/ci.yml +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/.github/workflows/docs-pr.yml +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/.github/workflows/docs.yml +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/.github/workflows/platform-compat-latest.yml +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/.github/workflows/platform-compat-matrix.yml +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/.github/workflows/policy-lint.yml +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/.github/workflows/publish.yml +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/.github/workflows/release.yml +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/.github/workflows/tests.yml +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/.gitignore +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/AGENTS.md +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/CONTRIBUTING.md +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/MAINTENANCE.md +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/book.toml +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/datetime_out.sql +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/datetime_sample.sql +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/docs/src/SUMMARY.md +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/docs/src/ci-guardrails.md +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/docs/src/configuration.md +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/docs/src/getting-started.md +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/docs/src/index.md +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/docs/src/releasing.md +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/rust-toolchain.toml +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/scripts/setup-dev.sh +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/src/faker_dispatch.rs +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/src/filter.rs +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/src/lint.rs +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/src/main.rs +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/src/report.rs +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/src/scan.rs +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/src/settings.rs +0 -0
- {dumpling_cli-0.4.0 → dumpling_cli-0.4.1}/src/transform.rs +0 -0
|
@@ -7,6 +7,12 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.4.1] - 2026-05-03
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
|
|
14
|
+
- **INSERT row parsing with JSON casts**: Values such as `'{"k":1}'::jsonb` are parsed so the cell’s unescaped payload is valid JSON for JSON path rules and anonymization; trailing casts like `::jsonb` / `::text` are preserved on output.
|
|
15
|
+
|
|
10
16
|
## [0.4.0] - 2026-05-02
|
|
11
17
|
|
|
12
18
|
### Added
|
|
@@ -55,6 +61,7 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
|
|
|
55
61
|
- Configurable output scan severities and per-category thresholds via `[output_scan]`.
|
|
56
62
|
- JSON report section for output scan findings including category, count, threshold, severity, and sample locations.
|
|
57
63
|
|
|
64
|
+
[0.4.1]: https://github.com/ababic/dumpling/compare/v0.4.0...v0.4.1
|
|
58
65
|
[0.4.0]: https://github.com/ababic/dumpling/compare/v0.3.0...v0.4.0
|
|
59
66
|
[0.3.0]: https://github.com/ababic/dumpling/compare/v0.2.0...v0.3.0
|
|
60
67
|
[0.2.0]: https://github.com/ababic/dumpling/compare/v0.1.0...v0.2.0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dumpling-cli
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Classifier: Development Status :: 4 - Beta
|
|
5
5
|
Classifier: Environment :: Console
|
|
6
6
|
Classifier: Intended Audience :: Developers
|
|
@@ -19,18 +19,48 @@ Keywords: postgres,sqlite,mssql,sql,anonymization,cli,rust
|
|
|
19
19
|
Requires-Python: >=3.8
|
|
20
20
|
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
21
21
|
|
|
22
|
-
|
|
22
|
+
<p align="center">
|
|
23
|
+
<img src="assets/logo.svg" width="140" height="140" alt="Dumpling logo: a dumpling with steam" />
|
|
24
|
+
</p>
|
|
23
25
|
|
|
24
|
-
|
|
26
|
+
<h1 align="center">Dumpling</h1>
|
|
27
|
+
|
|
28
|
+
<p align="center">
|
|
29
|
+
<strong>Sanitize SQL dumps before they go anywhere.</strong><br />
|
|
30
|
+
Turn huge <code>pg_dump</code> / SQLite / SQL Server exports into shareable, test-friendly snapshots — no DB connection, no secrets left by accident.
|
|
31
|
+
</p>
|
|
32
|
+
|
|
33
|
+
<p align="center">
|
|
34
|
+
<a href="https://pypi.org/project/dumpling-cli/"><img src="https://img.shields.io/pypi/v/dumpling-cli.svg" alt="PyPI version" /></a>
|
|
35
|
+
<a href="https://pypi.org/project/dumpling-cli/"><img src="https://img.shields.io/pypi/pyversions/dumpling-cli.svg" alt="Python versions" /></a>
|
|
36
|
+
<a href="https://pypi.org/project/dumpling-cli/"><img src="https://img.shields.io/pypi/l/dumpling-cli.svg" alt="PyPI license" /></a>
|
|
37
|
+
<a href="https://github.com/ababic/dumpling/actions/workflows/tests.yml"><img src="https://github.com/ababic/dumpling/actions/workflows/tests.yml/badge.svg" alt="Tests" /></a>
|
|
38
|
+
<a href="https://github.com/ababic/dumpling/actions/workflows/ci.yml"><img src="https://github.com/ababic/dumpling/actions/workflows/ci.yml/badge.svg" alt="Lint" /></a>
|
|
39
|
+
<img src="https://img.shields.io/badge/rust-stable-orange?logo=rust" alt="Rust stable" />
|
|
40
|
+
</p>
|
|
41
|
+
|
|
42
|
+
<p align="center">
|
|
43
|
+
<a href="https://ababic.github.io/dumpling/"><strong>Documentation</strong></a>
|
|
44
|
+
·
|
|
45
|
+
<a href="https://github.com/ababic/dumpling"><strong>GitHub</strong></a>
|
|
46
|
+
</p>
|
|
47
|
+
|
|
48
|
+
<p align="center">
|
|
49
|
+
<sub><em>Disclaimer: This project is entirely vibe-coded, but with strong human guidance, review, and attention to quality and safety.</em></sub>
|
|
50
|
+
</p>
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
**Dumpling** reads plain-text SQL dumps (PostgreSQL `pg_dump`, SQLite `.dump`, SQL Server / MSSQL scripts) and rewrites sensitive columns using rules you define in TOML. Everything runs offline on files — ideal for CI, staging share-outs, and compliance-minded workflows.
|
|
25
55
|
|
|
26
56
|
## Why Dumpling?
|
|
27
57
|
|
|
28
|
-
- **
|
|
29
|
-
- **
|
|
30
|
-
- **
|
|
31
|
-
- **
|
|
32
|
-
- **
|
|
33
|
-
- **
|
|
58
|
+
- **Offline by design** — works on dump files only; nothing connects to your database.
|
|
59
|
+
- **Streams giant files** — line-by-line processing keeps multi‑GB dumps reasonable on modest hardware.
|
|
60
|
+
- **Fails loud, not silent** — missing config exits non‑zero and lists where Dumpling looked; use `--allow-noop` only when you mean it.
|
|
61
|
+
- **Stable pseudonyms** — optional domain mappings keep the same source value as the same fake value across tables (foreign keys stay consistent).
|
|
62
|
+
- **Pipeline-ready** — `--check`, strict coverage, JSON reports, and residual PII scans fit pre-merge gates and release automation.
|
|
63
|
+
- **Configure once** — `.dumplingconf` or `[tool.dumpling]` in `pyproject.toml`; install via **Rust** (`cargo`) or **`pip install dumpling-cli`**.
|
|
34
64
|
|
|
35
65
|
---
|
|
36
66
|
|
|
@@ -344,6 +374,22 @@ Supported predicate operators:
|
|
|
344
374
|
|
|
345
375
|
Predicates can target nested JSON values using dot notation (`payload.profile.tier`) or Django-style notation (`payload__profile__tier`). For JSON arrays, path segments are evaluated against each element, so list-of-dicts structures can be matched naturally.
|
|
346
376
|
|
|
377
|
+
### JSON path list targeting
|
|
378
|
+
|
|
379
|
+
JSON list/array traversal is automatic once a path segment resolves to an array.
|
|
380
|
+
|
|
381
|
+
- **All elements in an array**: use the next field name directly.
|
|
382
|
+
- `payload.items.kind` or `payload__items__kind`
|
|
383
|
+
- Matches/rewrites `kind` for every object in `items`.
|
|
384
|
+
- **Specific array index**: use a numeric segment.
|
|
385
|
+
- `payload.items.0.kind` or `payload__items__0__kind`
|
|
386
|
+
- Targets only the first element.
|
|
387
|
+
- **Nested arrays**: combine field and index segments as needed.
|
|
388
|
+
- `payload.groups.members.email`
|
|
389
|
+
- `payload.groups.1.members.0.email`
|
|
390
|
+
|
|
391
|
+
This path behavior is shared by both `row_filters` predicates and JSON-path anonymization rules in `[rules]`.
|
|
392
|
+
|
|
347
393
|
```toml
|
|
348
394
|
[row_filters."public.users"]
|
|
349
395
|
retain = [
|
|
@@ -1,15 +1,45 @@
|
|
|
1
|
-
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="assets/logo.svg" width="140" height="140" alt="Dumpling logo: a dumpling with steam" />
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
<h1 align="center">Dumpling</h1>
|
|
6
|
+
|
|
7
|
+
<p align="center">
|
|
8
|
+
<strong>Sanitize SQL dumps before they go anywhere.</strong><br />
|
|
9
|
+
Turn huge <code>pg_dump</code> / SQLite / SQL Server exports into shareable, test-friendly snapshots — no DB connection, no secrets left by accident.
|
|
10
|
+
</p>
|
|
11
|
+
|
|
12
|
+
<p align="center">
|
|
13
|
+
<a href="https://pypi.org/project/dumpling-cli/"><img src="https://img.shields.io/pypi/v/dumpling-cli.svg" alt="PyPI version" /></a>
|
|
14
|
+
<a href="https://pypi.org/project/dumpling-cli/"><img src="https://img.shields.io/pypi/pyversions/dumpling-cli.svg" alt="Python versions" /></a>
|
|
15
|
+
<a href="https://pypi.org/project/dumpling-cli/"><img src="https://img.shields.io/pypi/l/dumpling-cli.svg" alt="PyPI license" /></a>
|
|
16
|
+
<a href="https://github.com/ababic/dumpling/actions/workflows/tests.yml"><img src="https://github.com/ababic/dumpling/actions/workflows/tests.yml/badge.svg" alt="Tests" /></a>
|
|
17
|
+
<a href="https://github.com/ababic/dumpling/actions/workflows/ci.yml"><img src="https://github.com/ababic/dumpling/actions/workflows/ci.yml/badge.svg" alt="Lint" /></a>
|
|
18
|
+
<img src="https://img.shields.io/badge/rust-stable-orange?logo=rust" alt="Rust stable" />
|
|
19
|
+
</p>
|
|
20
|
+
|
|
21
|
+
<p align="center">
|
|
22
|
+
<a href="https://ababic.github.io/dumpling/"><strong>Documentation</strong></a>
|
|
23
|
+
·
|
|
24
|
+
<a href="https://github.com/ababic/dumpling"><strong>GitHub</strong></a>
|
|
25
|
+
</p>
|
|
26
|
+
|
|
27
|
+
<p align="center">
|
|
28
|
+
<sub><em>Disclaimer: This project is entirely vibe-coded, but with strong human guidance, review, and attention to quality and safety.</em></sub>
|
|
29
|
+
</p>
|
|
2
30
|
|
|
3
|
-
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
**Dumpling** reads plain-text SQL dumps (PostgreSQL `pg_dump`, SQLite `.dump`, SQL Server / MSSQL scripts) and rewrites sensitive columns using rules you define in TOML. Everything runs offline on files — ideal for CI, staging share-outs, and compliance-minded workflows.
|
|
4
34
|
|
|
5
35
|
## Why Dumpling?
|
|
6
36
|
|
|
7
|
-
- **
|
|
8
|
-
- **
|
|
9
|
-
- **
|
|
10
|
-
- **
|
|
11
|
-
- **
|
|
12
|
-
- **
|
|
37
|
+
- **Offline by design** — works on dump files only; nothing connects to your database.
|
|
38
|
+
- **Streams giant files** — line-by-line processing keeps multi‑GB dumps reasonable on modest hardware.
|
|
39
|
+
- **Fails loud, not silent** — missing config exits non‑zero and lists where Dumpling looked; use `--allow-noop` only when you mean it.
|
|
40
|
+
- **Stable pseudonyms** — optional domain mappings keep the same source value as the same fake value across tables (foreign keys stay consistent).
|
|
41
|
+
- **Pipeline-ready** — `--check`, strict coverage, JSON reports, and residual PII scans fit pre-merge gates and release automation.
|
|
42
|
+
- **Configure once** — `.dumplingconf` or `[tool.dumpling]` in `pyproject.toml`; install via **Rust** (`cargo`) or **`pip install dumpling-cli`**.
|
|
13
43
|
|
|
14
44
|
---
|
|
15
45
|
|
|
@@ -323,6 +353,22 @@ Supported predicate operators:
|
|
|
323
353
|
|
|
324
354
|
Predicates can target nested JSON values using dot notation (`payload.profile.tier`) or Django-style notation (`payload__profile__tier`). For JSON arrays, path segments are evaluated against each element, so list-of-dicts structures can be matched naturally.
|
|
325
355
|
|
|
356
|
+
### JSON path list targeting
|
|
357
|
+
|
|
358
|
+
JSON list/array traversal is automatic once a path segment resolves to an array.
|
|
359
|
+
|
|
360
|
+
- **All elements in an array**: use the next field name directly.
|
|
361
|
+
- `payload.items.kind` or `payload__items__kind`
|
|
362
|
+
- Matches/rewrites `kind` for every object in `items`.
|
|
363
|
+
- **Specific array index**: use a numeric segment.
|
|
364
|
+
- `payload.items.0.kind` or `payload__items__0__kind`
|
|
365
|
+
- Targets only the first element.
|
|
366
|
+
- **Nested arrays**: combine field and index segments as needed.
|
|
367
|
+
- `payload.groups.members.email`
|
|
368
|
+
- `payload.groups.1.members.0.email`
|
|
369
|
+
|
|
370
|
+
This path behavior is shared by both `row_filters` predicates and JSON-path anonymization rules in `[rules]`.
|
|
371
|
+
|
|
326
372
|
```toml
|
|
327
373
|
[row_filters."public.users"]
|
|
328
374
|
retain = [
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 128 128" role="img" aria-label="Dumpling logo">
|
|
2
|
+
<defs>
|
|
3
|
+
<linearGradient id="steam" x1="0%" y1="100%" x2="0%" y2="0%">
|
|
4
|
+
<stop offset="0%" stop-color="#e8f4fc" stop-opacity="0"/>
|
|
5
|
+
<stop offset="50%" stop-color="#cfe9fb" stop-opacity="0.85"/>
|
|
6
|
+
<stop offset="100%" stop-color="#b8dff8" stop-opacity="0"/>
|
|
7
|
+
</linearGradient>
|
|
8
|
+
<linearGradient id="dough" x1="0%" y1="0%" x2="100%" y2="100%">
|
|
9
|
+
<stop offset="0%" stop-color="#fff8ef"/>
|
|
10
|
+
<stop offset="45%" stop-color="#f4dcc4"/>
|
|
11
|
+
<stop offset="100%" stop-color="#e8b896"/>
|
|
12
|
+
</linearGradient>
|
|
13
|
+
<linearGradient id="shadow" x1="0%" y1="0%" x2="0%" y2="100%">
|
|
14
|
+
<stop offset="0%" stop-color="#c4865a" stop-opacity="0.35"/>
|
|
15
|
+
<stop offset="100%" stop-color="#8b5a3c" stop-opacity="0.15"/>
|
|
16
|
+
</linearGradient>
|
|
17
|
+
</defs>
|
|
18
|
+
<!-- Steam -->
|
|
19
|
+
<path fill="url(#steam)" d="M44 18c2-6 8-10 14-8s8 10 4 15c-3 4-2 9 2 12 5 4 5 12 0 16-6 5-16 4-20-3-2-4 0-9 4-11 3-2 3-6 0-9-4-4-4-10 0-12z"/>
|
|
20
|
+
<path fill="url(#steam)" opacity="0.75" d="M64 14c3-5 9-7 14-4 5 3 6 10 2 14-4 4-3 10 2 13 6 4 7 13 1 18-7 6-19 4-24-5-2-5 1-11 6-13 4-2 4-7 1-10-5-4-5-11 0-13z"/>
|
|
21
|
+
<path fill="url(#steam)" opacity="0.6" d="M82 20c2-5 8-8 13-5 5 3 7 10 3 15-3 4-2 9 3 12 5 3 7 11 2 16-6 6-17 5-22-3-2-4 0-9 5-11 3-2 4-6 1-9-4-4-4-10 0-13z"/>
|
|
22
|
+
<!-- Plate -->
|
|
23
|
+
<ellipse cx="64" cy="108" rx="52" ry="10" fill="#dfe8ef"/>
|
|
24
|
+
<ellipse cx="64" cy="106" rx="48" ry="8" fill="#eef4f8"/>
|
|
25
|
+
<!-- Dumpling body -->
|
|
26
|
+
<ellipse cx="64" cy="82" rx="42" ry="28" fill="url(#dough)" stroke="#d4a574" stroke-width="2"/>
|
|
27
|
+
<ellipse cx="64" cy="96" rx="38" ry="12" fill="url(#shadow)"/>
|
|
28
|
+
<!-- Pleats -->
|
|
29
|
+
<path fill="none" stroke="#c9956a" stroke-width="1.8" stroke-linecap="round" d="M34 58c6 10 14 16 30 16s24-6 30-16"/>
|
|
30
|
+
<path fill="none" stroke="#d9b08a" stroke-width="1.2" stroke-linecap="round" opacity="0.9" d="M42 54c5 8 13 13 22 13s17-5 22-13"/>
|
|
31
|
+
<!-- Highlight -->
|
|
32
|
+
<ellipse cx="48" cy="76" rx="10" ry="6" fill="#ffffff" opacity="0.35"/>
|
|
33
|
+
</svg>
|
|
@@ -1155,20 +1155,22 @@ struct Cell {
|
|
|
1155
1155
|
original: Option<String>, // None for NULL
|
|
1156
1156
|
was_quoted: bool,
|
|
1157
1157
|
was_default: bool,
|
|
1158
|
+
trailing_expr: Option<String>,
|
|
1158
1159
|
}
|
|
1159
1160
|
|
|
1160
1161
|
impl Cell {
|
|
1161
1162
|
fn render_original(&self) -> String {
|
|
1163
|
+
let trailing = self.trailing_expr.as_deref().unwrap_or("");
|
|
1162
1164
|
if self.was_default {
|
|
1163
|
-
return "DEFAULT"
|
|
1165
|
+
return format!("DEFAULT{trailing}");
|
|
1164
1166
|
}
|
|
1165
1167
|
match &self.original {
|
|
1166
|
-
None => "NULL"
|
|
1168
|
+
None => format!("NULL{trailing}"),
|
|
1167
1169
|
Some(s) => {
|
|
1168
1170
|
if self.was_quoted {
|
|
1169
|
-
format!("'{}'", s.replace('\'', "''"))
|
|
1171
|
+
format!("'{}'{trailing}", s.replace('\'', "''"))
|
|
1170
1172
|
} else {
|
|
1171
|
-
s
|
|
1173
|
+
format!("{s}{trailing}")
|
|
1172
1174
|
}
|
|
1173
1175
|
}
|
|
1174
1176
|
}
|
|
@@ -1176,14 +1178,15 @@ impl Cell {
|
|
|
1176
1178
|
}
|
|
1177
1179
|
|
|
1178
1180
|
fn render_cell(repl: &Replacement, original: &Cell) -> String {
|
|
1181
|
+
let trailing = original.trailing_expr.as_deref().unwrap_or("");
|
|
1179
1182
|
if repl.is_null {
|
|
1180
|
-
return "NULL"
|
|
1183
|
+
return format!("NULL{trailing}");
|
|
1181
1184
|
}
|
|
1182
1185
|
let should_quote = repl.force_quoted || original.was_quoted;
|
|
1183
1186
|
if should_quote {
|
|
1184
|
-
format!("'{}'", repl.value.replace('\'', "''"))
|
|
1187
|
+
format!("'{}'{trailing}", repl.value.replace('\'', "''"))
|
|
1185
1188
|
} else {
|
|
1186
|
-
repl.value
|
|
1189
|
+
format!("{}{trailing}", repl.value)
|
|
1187
1190
|
}
|
|
1188
1191
|
}
|
|
1189
1192
|
|
|
@@ -1243,7 +1246,9 @@ fn parse_parenthesized_values(s: &str) -> anyhow::Result<(Vec<Cell>, usize)> {
|
|
|
1243
1246
|
let mut cells: Vec<Cell> = Vec::new();
|
|
1244
1247
|
let mut in_single = false;
|
|
1245
1248
|
let mut buf = String::new();
|
|
1249
|
+
let mut trailing_expr = String::new();
|
|
1246
1250
|
let mut was_quoted = false;
|
|
1251
|
+
let mut closed_quoted_literal = false;
|
|
1247
1252
|
while i < chs.len() {
|
|
1248
1253
|
let c = chs[i];
|
|
1249
1254
|
if in_single {
|
|
@@ -1255,6 +1260,7 @@ fn parse_parenthesized_values(s: &str) -> anyhow::Result<(Vec<Cell>, usize)> {
|
|
|
1255
1260
|
continue;
|
|
1256
1261
|
} else {
|
|
1257
1262
|
in_single = false;
|
|
1263
|
+
closed_quoted_literal = true;
|
|
1258
1264
|
i += 1;
|
|
1259
1265
|
continue;
|
|
1260
1266
|
}
|
|
@@ -1282,17 +1288,19 @@ fn parse_parenthesized_values(s: &str) -> anyhow::Result<(Vec<Cell>, usize)> {
|
|
|
1282
1288
|
}
|
|
1283
1289
|
')' => {
|
|
1284
1290
|
// end cell, end row
|
|
1285
|
-
let cell = finalize_cell(&buf, was_quoted);
|
|
1291
|
+
let cell = finalize_cell(&buf, was_quoted, &trailing_expr);
|
|
1286
1292
|
cells.push(cell);
|
|
1287
1293
|
i += 1;
|
|
1288
1294
|
return Ok((cells, i));
|
|
1289
1295
|
}
|
|
1290
1296
|
',' => {
|
|
1291
1297
|
// end cell
|
|
1292
|
-
let cell = finalize_cell(&buf, was_quoted);
|
|
1298
|
+
let cell = finalize_cell(&buf, was_quoted, &trailing_expr);
|
|
1293
1299
|
cells.push(cell);
|
|
1294
1300
|
buf.clear();
|
|
1301
|
+
trailing_expr.clear();
|
|
1295
1302
|
was_quoted = false;
|
|
1303
|
+
closed_quoted_literal = false;
|
|
1296
1304
|
i += 1;
|
|
1297
1305
|
// consume following spaces
|
|
1298
1306
|
while i < chs.len() && chs[i].is_whitespace() {
|
|
@@ -1300,11 +1308,19 @@ fn parse_parenthesized_values(s: &str) -> anyhow::Result<(Vec<Cell>, usize)> {
|
|
|
1300
1308
|
}
|
|
1301
1309
|
}
|
|
1302
1310
|
c if c.is_whitespace() => {
|
|
1303
|
-
//
|
|
1311
|
+
// Preserve whitespace after a quoted literal so explicit SQL casts stay intact.
|
|
1312
|
+
if was_quoted && closed_quoted_literal {
|
|
1313
|
+
trailing_expr.push(c);
|
|
1314
|
+
}
|
|
1315
|
+
// Skip insignificant whitespace between tokens when unquoted.
|
|
1304
1316
|
i += 1;
|
|
1305
1317
|
}
|
|
1306
1318
|
other => {
|
|
1307
|
-
|
|
1319
|
+
if was_quoted && closed_quoted_literal {
|
|
1320
|
+
trailing_expr.push(other);
|
|
1321
|
+
} else {
|
|
1322
|
+
buf.push(other);
|
|
1323
|
+
}
|
|
1308
1324
|
i += 1;
|
|
1309
1325
|
}
|
|
1310
1326
|
}
|
|
@@ -1313,12 +1329,21 @@ fn parse_parenthesized_values(s: &str) -> anyhow::Result<(Vec<Cell>, usize)> {
|
|
|
1313
1329
|
anyhow::bail!("unterminated values row")
|
|
1314
1330
|
}
|
|
1315
1331
|
|
|
1316
|
-
fn finalize_cell(buf: &str, was_quoted: bool) -> Cell {
|
|
1332
|
+
fn finalize_cell(buf: &str, was_quoted: bool, trailing_expr: &str) -> Cell {
|
|
1333
|
+
let trailing = {
|
|
1334
|
+
let t = trailing_expr.trim();
|
|
1335
|
+
if t.is_empty() {
|
|
1336
|
+
None
|
|
1337
|
+
} else {
|
|
1338
|
+
Some(t.to_string())
|
|
1339
|
+
}
|
|
1340
|
+
};
|
|
1317
1341
|
if was_quoted {
|
|
1318
1342
|
Cell {
|
|
1319
1343
|
original: Some(buf.to_string()),
|
|
1320
1344
|
was_quoted: true,
|
|
1321
1345
|
was_default: false,
|
|
1346
|
+
trailing_expr: trailing,
|
|
1322
1347
|
}
|
|
1323
1348
|
} else {
|
|
1324
1349
|
let t = buf.trim();
|
|
@@ -1327,18 +1352,21 @@ fn finalize_cell(buf: &str, was_quoted: bool) -> Cell {
|
|
|
1327
1352
|
original: None,
|
|
1328
1353
|
was_quoted: false,
|
|
1329
1354
|
was_default: false,
|
|
1355
|
+
trailing_expr: None,
|
|
1330
1356
|
}
|
|
1331
1357
|
} else if t.eq_ignore_ascii_case("default") {
|
|
1332
1358
|
Cell {
|
|
1333
1359
|
original: None,
|
|
1334
1360
|
was_quoted: false,
|
|
1335
1361
|
was_default: true,
|
|
1362
|
+
trailing_expr: None,
|
|
1336
1363
|
}
|
|
1337
1364
|
} else {
|
|
1338
1365
|
Cell {
|
|
1339
1366
|
original: Some(t.to_string()),
|
|
1340
1367
|
was_quoted: false,
|
|
1341
1368
|
was_default: false,
|
|
1369
|
+
trailing_expr: None,
|
|
1342
1370
|
}
|
|
1343
1371
|
}
|
|
1344
1372
|
}
|
|
@@ -2370,6 +2398,90 @@ COPY public.events (id, payload) FROM stdin;
|
|
|
2370
2398
|
);
|
|
2371
2399
|
}
|
|
2372
2400
|
|
|
2401
|
+
#[test]
|
|
2402
|
+
fn parse_values_rows_tracks_trailing_cast_for_quoted_literals() {
|
|
2403
|
+
let rows =
|
|
2404
|
+
parse_values_rows("(1, '{\"profile\":{\"secret\":\"alpha\"}}'::jsonb, 'note'::text)")
|
|
2405
|
+
.unwrap();
|
|
2406
|
+
assert_eq!(rows.len(), 1);
|
|
2407
|
+
assert_eq!(rows[0].len(), 3);
|
|
2408
|
+
assert_eq!(
|
|
2409
|
+
rows[0][1].original.as_deref(),
|
|
2410
|
+
Some("{\"profile\":{\"secret\":\"alpha\"}}")
|
|
2411
|
+
);
|
|
2412
|
+
assert_eq!(rows[0][1].trailing_expr.as_deref(), Some("::jsonb"));
|
|
2413
|
+
assert_eq!(rows[0][2].original.as_deref(), Some("note"));
|
|
2414
|
+
assert_eq!(rows[0][2].trailing_expr.as_deref(), Some("::text"));
|
|
2415
|
+
}
|
|
2416
|
+
|
|
2417
|
+
#[test]
|
|
2418
|
+
fn pipeline_anonymizes_nested_json_paths_for_jsonb_cast_insert_rows() {
|
|
2419
|
+
let mut rules: HashMap<String, HashMap<String, AnonymizerSpec>> = HashMap::new();
|
|
2420
|
+
let mut cols: HashMap<String, AnonymizerSpec> = HashMap::new();
|
|
2421
|
+
cols.insert(
|
|
2422
|
+
"payload.profile.secret".to_string(),
|
|
2423
|
+
AnonymizerSpec {
|
|
2424
|
+
strategy: "string".to_string(),
|
|
2425
|
+
salt: None,
|
|
2426
|
+
min: None,
|
|
2427
|
+
max: None,
|
|
2428
|
+
length: Some(8),
|
|
2429
|
+
min_days: None,
|
|
2430
|
+
max_days: None,
|
|
2431
|
+
min_seconds: None,
|
|
2432
|
+
max_seconds: None,
|
|
2433
|
+
domain: Some("secrets".to_string()),
|
|
2434
|
+
unique_within_domain: None,
|
|
2435
|
+
as_string: Some(true),
|
|
2436
|
+
locale: None,
|
|
2437
|
+
faker: None,
|
|
2438
|
+
format: None,
|
|
2439
|
+
},
|
|
2440
|
+
);
|
|
2441
|
+
rules.insert("public.events".to_string(), cols);
|
|
2442
|
+
let cfg = ResolvedConfig {
|
|
2443
|
+
salt: None,
|
|
2444
|
+
rules,
|
|
2445
|
+
row_filters: HashMap::new(),
|
|
2446
|
+
column_cases: HashMap::new(),
|
|
2447
|
+
sensitive_columns: HashMap::new(),
|
|
2448
|
+
output_scan: crate::settings::OutputScanConfig::default(),
|
|
2449
|
+
source_path: None,
|
|
2450
|
+
};
|
|
2451
|
+
let reg = AnonymizerRegistry::from_config(&cfg);
|
|
2452
|
+
let mut proc =
|
|
2453
|
+
SqlStreamProcessor::new(reg, cfg, Vec::new(), Vec::new(), None, DumpFormat::Postgres);
|
|
2454
|
+
let input = r#"
|
|
2455
|
+
CREATE TABLE public.events (id int, payload jsonb);
|
|
2456
|
+
INSERT INTO public.events (id, payload) VALUES
|
|
2457
|
+
(1, '{"profile":{"tier":"gold","secret":"alpha"}}'::jsonb),
|
|
2458
|
+
(2, '{"profile":{"tier":"gold","secret":"alpha"}}'::jsonb);
|
|
2459
|
+
"#;
|
|
2460
|
+
let mut reader = std::io::BufReader::new(input.as_bytes());
|
|
2461
|
+
let mut out = Vec::new();
|
|
2462
|
+
proc.process(&mut reader, &mut out).unwrap();
|
|
2463
|
+
let s = String::from_utf8(out).unwrap();
|
|
2464
|
+
assert!(!s.contains("alpha"), "nested secret should be anonymized");
|
|
2465
|
+
assert!(s.contains("::jsonb"), "jsonb cast should be preserved");
|
|
2466
|
+
|
|
2467
|
+
let insert_pos = s.find("INSERT INTO public.events").unwrap();
|
|
2468
|
+
let insert_tail = &s[insert_pos..];
|
|
2469
|
+
let insert_end = insert_tail.find(";\n").unwrap() + insert_pos;
|
|
2470
|
+
let ins_stmt = &s[insert_pos..=insert_end];
|
|
2471
|
+
let vals_idx = ins_stmt.to_uppercase().find("VALUES").unwrap();
|
|
2472
|
+
let ins_block = strip_trailing_semicolon(ins_stmt[vals_idx + "VALUES".len()..].trim());
|
|
2473
|
+
let ins_rows = parse_values_rows(ins_block).unwrap();
|
|
2474
|
+
assert_eq!(ins_rows[0][1].trailing_expr.as_deref(), Some("::jsonb"));
|
|
2475
|
+
assert_eq!(ins_rows[1][1].trailing_expr.as_deref(), Some("::jsonb"));
|
|
2476
|
+
let v0 =
|
|
2477
|
+
serde_json::from_str::<serde_json::Value>(ins_rows[0][1].original.as_ref().unwrap())
|
|
2478
|
+
.unwrap();
|
|
2479
|
+
let v1 =
|
|
2480
|
+
serde_json::from_str::<serde_json::Value>(ins_rows[1][1].original.as_ref().unwrap())
|
|
2481
|
+
.unwrap();
|
|
2482
|
+
assert_eq!(v0["profile"]["secret"], v1["profile"]["secret"]);
|
|
2483
|
+
}
|
|
2484
|
+
|
|
2373
2485
|
#[test]
|
|
2374
2486
|
fn generated_values_fit_length_restricted_columns_from_create_table() {
|
|
2375
2487
|
let mut rules: HashMap<String, HashMap<String, AnonymizerSpec>> = HashMap::new();
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|