dumpling-cli 0.4.1__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/CHANGELOG.md +8 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/Cargo.lock +1 -1
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/Cargo.toml +1 -1
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/PKG-INFO +1 -1
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/pyproject.toml +1 -1
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/filter.rs +234 -5
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/sql.rs +155 -1
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.dumplingconf.example +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/ci.yml +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/docs-pr.yml +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/docs.yml +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/platform-compat-latest.yml +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/platform-compat-matrix.yml +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/policy-lint.yml +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/publish.yml +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/release.yml +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/tests.yml +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.gitignore +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/AGENTS.md +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/CONTRIBUTING.md +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/MAINTENANCE.md +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/README.md +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/assets/logo.svg +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/book.toml +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/datetime_out.sql +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/datetime_sample.sql +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/docs/src/SUMMARY.md +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/docs/src/ci-guardrails.md +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/docs/src/configuration.md +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/docs/src/getting-started.md +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/docs/src/index.md +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/docs/src/releasing.md +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/rust-toolchain.toml +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/scripts/setup-dev.sh +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/faker_dispatch.rs +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/lint.rs +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/main.rs +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/report.rs +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/scan.rs +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/settings.rs +0 -0
- {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/transform.rs +0 -0
|
@@ -7,6 +7,13 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.4.2] - 2026-05-03
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
|
|
14
|
+
- **JSON path rules on non-JSON cells**: Path-based `[rules]` anonymization is skipped when the cell is not strict JSON, leaving the original value unchanged (consistent with row-filter JSON path behavior).
|
|
15
|
+
- **JSON scalar types in path-based anonymization**: Replacements at JSON paths preserve number and boolean leaf types where possible (numeric and boolean coercion from generated text).
|
|
16
|
+
|
|
10
17
|
## [0.4.1] - 2026-05-03
|
|
11
18
|
|
|
12
19
|
### Fixed
|
|
@@ -61,6 +68,7 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
|
|
|
61
68
|
- Configurable output scan severities and per-category thresholds via `[output_scan]`.
|
|
62
69
|
- JSON report section for output scan findings including category, count, threshold, severity, and sample locations.
|
|
63
70
|
|
|
71
|
+
[0.4.2]: https://github.com/ababic/dumpling/compare/v0.4.1...v0.4.2
|
|
64
72
|
[0.4.1]: https://github.com/ababic/dumpling/compare/v0.4.0...v0.4.1
|
|
65
73
|
[0.4.0]: https://github.com/ababic/dumpling/compare/v0.3.0...v0.4.0
|
|
66
74
|
[0.3.0]: https://github.com/ababic/dumpling/compare/v0.2.0...v0.3.0
|
|
@@ -224,25 +224,118 @@ fn replacement_to_json_value(repl: &Replacement) -> serde_json::Value {
|
|
|
224
224
|
.unwrap_or_else(|_| serde_json::Value::String(repl.value.clone()))
|
|
225
225
|
}
|
|
226
226
|
|
|
227
|
+
/// When rewriting JSON at a path, map `Replacement` back into [`serde_json::Value`] while keeping
|
|
228
|
+
/// the leaf's JSON type when the strategy still returns text (e.g. `Replacement::quoted` for
|
|
229
|
+
/// `string`, `hash`, etc.): numeric and boolean leaves stay JSON numbers/bools if the replacement
|
|
230
|
+
/// text parses as such.
|
|
231
|
+
fn coerce_json_path_replacement(
|
|
232
|
+
original: &serde_json::Value,
|
|
233
|
+
repl: &Replacement,
|
|
234
|
+
) -> serde_json::Value {
|
|
235
|
+
if repl.is_null {
|
|
236
|
+
return serde_json::Value::Null;
|
|
237
|
+
}
|
|
238
|
+
match original {
|
|
239
|
+
serde_json::Value::Bool(_) => {
|
|
240
|
+
if let Some(b) = parse_loose_json_bool(&repl.value) {
|
|
241
|
+
return serde_json::Value::Bool(b);
|
|
242
|
+
}
|
|
243
|
+
if !repl.force_quoted {
|
|
244
|
+
if let Ok(v) = serde_json::from_str::<serde_json::Value>(&repl.value) {
|
|
245
|
+
match v {
|
|
246
|
+
serde_json::Value::Bool(b) => return serde_json::Value::Bool(b),
|
|
247
|
+
serde_json::Value::Number(n) => {
|
|
248
|
+
if n.as_u64() == Some(0) || n.as_i64() == Some(0) {
|
|
249
|
+
return serde_json::Value::Bool(false);
|
|
250
|
+
}
|
|
251
|
+
if n.as_u64() == Some(1) || n.as_i64() == Some(1) {
|
|
252
|
+
return serde_json::Value::Bool(true);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
_ => {}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
serde_json::Value::String(repl.value.clone())
|
|
260
|
+
}
|
|
261
|
+
serde_json::Value::Number(_) => {
|
|
262
|
+
if let Some(n) = parse_loose_json_number(&repl.value) {
|
|
263
|
+
return serde_json::Value::Number(n);
|
|
264
|
+
}
|
|
265
|
+
if !repl.force_quoted {
|
|
266
|
+
if let Ok(serde_json::Value::Number(n)) =
|
|
267
|
+
serde_json::from_str::<serde_json::Value>(&repl.value)
|
|
268
|
+
{
|
|
269
|
+
return serde_json::Value::Number(n);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
serde_json::Value::String(repl.value.clone())
|
|
273
|
+
}
|
|
274
|
+
serde_json::Value::String(_) => {
|
|
275
|
+
if repl.force_quoted {
|
|
276
|
+
serde_json::Value::String(repl.value.clone())
|
|
277
|
+
} else {
|
|
278
|
+
serde_json::from_str(&repl.value)
|
|
279
|
+
.unwrap_or_else(|_| serde_json::Value::String(repl.value.clone()))
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
serde_json::Value::Null => replacement_to_json_value(repl),
|
|
283
|
+
serde_json::Value::Array(_) | serde_json::Value::Object(_) => {
|
|
284
|
+
replacement_to_json_value(repl)
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
fn parse_loose_json_bool(s: &str) -> Option<bool> {
|
|
290
|
+
match s.trim().to_ascii_lowercase().as_str() {
|
|
291
|
+
"true" => Some(true),
|
|
292
|
+
"false" => Some(false),
|
|
293
|
+
_ => None,
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
fn parse_loose_json_number(s: &str) -> Option<serde_json::Number> {
|
|
298
|
+
let t = s.trim();
|
|
299
|
+
if t.is_empty() {
|
|
300
|
+
return None;
|
|
301
|
+
}
|
|
302
|
+
if let Ok(i) = t.parse::<i64>() {
|
|
303
|
+
return Some(i.into());
|
|
304
|
+
}
|
|
305
|
+
if let Ok(u) = t.parse::<u64>() {
|
|
306
|
+
return Some(u.into());
|
|
307
|
+
}
|
|
308
|
+
let f = t.parse::<f64>().ok()?;
|
|
309
|
+
serde_json::Number::from_f64(f)
|
|
310
|
+
}
|
|
311
|
+
|
|
227
312
|
fn apply_leaf_replacement(target: &mut serde_json::Value, repl: &Replacement) {
|
|
228
|
-
|
|
313
|
+
let original = target.clone();
|
|
314
|
+
*target = coerce_json_path_replacement(&original, repl);
|
|
229
315
|
}
|
|
230
316
|
|
|
231
317
|
/// Mutate JSON document strings at configured paths using the same path semantics as predicates.
|
|
318
|
+
///
|
|
319
|
+
/// Returns [`None`] when `raw_json` is not valid strict JSON (same tolerance as row-filter JSON
|
|
320
|
+
/// path extraction): path rules are skipped for that cell and callers should passthrough the
|
|
321
|
+
/// original value unchanged.
|
|
232
322
|
pub fn rewrite_json_paths_with_rules(
|
|
233
323
|
registry: &AnonymizerRegistry,
|
|
234
324
|
column_max_len: Option<usize>,
|
|
235
325
|
json_rules: &[(Vec<String>, AnonymizerSpec)],
|
|
236
326
|
raw_json: &str,
|
|
237
|
-
) -> anyhow::Result<String
|
|
238
|
-
let mut root = serde_json::from_str::<serde_json::Value>(raw_json)
|
|
327
|
+
) -> anyhow::Result<Option<String>> {
|
|
328
|
+
let mut root = match serde_json::from_str::<serde_json::Value>(raw_json) {
|
|
329
|
+
Ok(v) => v,
|
|
330
|
+
Err(_) => return Ok(None),
|
|
331
|
+
};
|
|
239
332
|
for (path, spec) in json_rules {
|
|
240
333
|
let mut apply = |original_cell: Option<String>| {
|
|
241
334
|
apply_anonymizer(registry, spec, original_cell.as_deref(), column_max_len)
|
|
242
335
|
};
|
|
243
336
|
mutate_json_at_path(&mut root, path, &mut apply)?;
|
|
244
337
|
}
|
|
245
|
-
Ok(root.to_string())
|
|
338
|
+
Ok(Some(root.to_string()))
|
|
246
339
|
}
|
|
247
340
|
|
|
248
341
|
fn mutate_json_at_path<F>(
|
|
@@ -457,7 +550,8 @@ fn get_cached_regex(pat: &str, case_insensitive: bool) -> regex::Regex {
|
|
|
457
550
|
#[cfg(test)]
|
|
458
551
|
mod tests {
|
|
459
552
|
use super::*;
|
|
460
|
-
use crate::settings::{ResolvedConfig, RowFilterSet};
|
|
553
|
+
use crate::settings::{AnonymizerSpec, ResolvedConfig, RowFilterSet};
|
|
554
|
+
use crate::transform::AnonymizerRegistry;
|
|
461
555
|
use std::collections::HashMap;
|
|
462
556
|
|
|
463
557
|
#[test]
|
|
@@ -630,4 +724,139 @@ mod tests {
|
|
|
630
724
|
&[Some(r#"{"items":[{"kind":"secondary"}]}"#.to_string())]
|
|
631
725
|
));
|
|
632
726
|
}
|
|
727
|
+
|
|
728
|
+
#[test]
|
|
729
|
+
fn rewrite_json_paths_skips_non_json_cells_like_row_filters() {
|
|
730
|
+
let mut rules: HashMap<String, HashMap<String, AnonymizerSpec>> = HashMap::new();
|
|
731
|
+
let spec = AnonymizerSpec {
|
|
732
|
+
strategy: "string".to_string(),
|
|
733
|
+
salt: None,
|
|
734
|
+
min: None,
|
|
735
|
+
max: None,
|
|
736
|
+
length: Some(4),
|
|
737
|
+
min_days: None,
|
|
738
|
+
max_days: None,
|
|
739
|
+
min_seconds: None,
|
|
740
|
+
max_seconds: None,
|
|
741
|
+
domain: None,
|
|
742
|
+
unique_within_domain: None,
|
|
743
|
+
as_string: Some(true),
|
|
744
|
+
locale: None,
|
|
745
|
+
faker: None,
|
|
746
|
+
format: None,
|
|
747
|
+
};
|
|
748
|
+
rules.insert("public.t".to_string(), HashMap::new());
|
|
749
|
+
let cfg = ResolvedConfig {
|
|
750
|
+
salt: None,
|
|
751
|
+
rules,
|
|
752
|
+
row_filters: HashMap::new(),
|
|
753
|
+
column_cases: HashMap::new(),
|
|
754
|
+
sensitive_columns: HashMap::new(),
|
|
755
|
+
output_scan: crate::settings::OutputScanConfig::default(),
|
|
756
|
+
source_path: None,
|
|
757
|
+
};
|
|
758
|
+
let registry = AnonymizerRegistry::from_config(&cfg);
|
|
759
|
+
let json_rules: Vec<(Vec<String>, AnonymizerSpec)> = vec![(
|
|
760
|
+
vec!["profile".to_string(), "secret".to_string()],
|
|
761
|
+
spec.clone(),
|
|
762
|
+
)];
|
|
763
|
+
assert!(
|
|
764
|
+
rewrite_json_paths_with_rules(®istry, None, &json_rules, "{not json")
|
|
765
|
+
.unwrap()
|
|
766
|
+
.is_none()
|
|
767
|
+
);
|
|
768
|
+
let out = rewrite_json_paths_with_rules(
|
|
769
|
+
®istry,
|
|
770
|
+
None,
|
|
771
|
+
&json_rules,
|
|
772
|
+
r#"{"profile":{"secret":"x"}}"#,
|
|
773
|
+
)
|
|
774
|
+
.unwrap()
|
|
775
|
+
.expect("valid JSON should rewrite");
|
|
776
|
+
let v: serde_json::Value = serde_json::from_str(&out).unwrap();
|
|
777
|
+
assert_ne!(v["profile"]["secret"], "x");
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
#[test]
|
|
781
|
+
fn rewrite_json_paths_preserves_number_and_bool_leaf_types_for_quoted_replacements() {
|
|
782
|
+
let mut rules: HashMap<String, HashMap<String, AnonymizerSpec>> = HashMap::new();
|
|
783
|
+
rules.insert("public.t".to_string(), HashMap::new());
|
|
784
|
+
let cfg = ResolvedConfig {
|
|
785
|
+
salt: None,
|
|
786
|
+
rules,
|
|
787
|
+
row_filters: HashMap::new(),
|
|
788
|
+
column_cases: HashMap::new(),
|
|
789
|
+
sensitive_columns: HashMap::new(),
|
|
790
|
+
output_scan: crate::settings::OutputScanConfig::default(),
|
|
791
|
+
source_path: None,
|
|
792
|
+
};
|
|
793
|
+
let registry = AnonymizerRegistry::from_config(&cfg);
|
|
794
|
+
|
|
795
|
+
let int_spec = AnonymizerSpec {
|
|
796
|
+
strategy: "int_range".to_string(),
|
|
797
|
+
salt: None,
|
|
798
|
+
min: Some(0),
|
|
799
|
+
max: Some(9),
|
|
800
|
+
length: None,
|
|
801
|
+
min_days: None,
|
|
802
|
+
max_days: None,
|
|
803
|
+
min_seconds: None,
|
|
804
|
+
max_seconds: None,
|
|
805
|
+
domain: Some("coerce_int_leaf".to_string()),
|
|
806
|
+
unique_within_domain: None,
|
|
807
|
+
as_string: None,
|
|
808
|
+
locale: None,
|
|
809
|
+
faker: None,
|
|
810
|
+
format: None,
|
|
811
|
+
};
|
|
812
|
+
let out = rewrite_json_paths_with_rules(
|
|
813
|
+
®istry,
|
|
814
|
+
None,
|
|
815
|
+
&[(vec!["n".to_string()], int_spec)],
|
|
816
|
+
r#"{"n":1,"b":true,"s":"x"}"#,
|
|
817
|
+
)
|
|
818
|
+
.unwrap()
|
|
819
|
+
.unwrap();
|
|
820
|
+
let v: serde_json::Value = serde_json::from_str(&out).unwrap();
|
|
821
|
+
assert!(
|
|
822
|
+
v["n"].is_number(),
|
|
823
|
+
"int_range replacement should stay JSON number, got {:?}",
|
|
824
|
+
v["n"]
|
|
825
|
+
);
|
|
826
|
+
assert_eq!(v["b"], true);
|
|
827
|
+
assert_eq!(v["s"], "x");
|
|
828
|
+
|
|
829
|
+
let string_spec = AnonymizerSpec {
|
|
830
|
+
strategy: "int_range".to_string(),
|
|
831
|
+
salt: None,
|
|
832
|
+
min: Some(0),
|
|
833
|
+
max: Some(0),
|
|
834
|
+
length: None,
|
|
835
|
+
min_days: None,
|
|
836
|
+
max_days: None,
|
|
837
|
+
min_seconds: None,
|
|
838
|
+
max_seconds: None,
|
|
839
|
+
domain: Some("coerce_bool_leaf".to_string()),
|
|
840
|
+
unique_within_domain: None,
|
|
841
|
+
as_string: None,
|
|
842
|
+
locale: None,
|
|
843
|
+
faker: None,
|
|
844
|
+
format: None,
|
|
845
|
+
};
|
|
846
|
+
let out2 = rewrite_json_paths_with_rules(
|
|
847
|
+
®istry,
|
|
848
|
+
None,
|
|
849
|
+
&[(vec!["b".to_string()], string_spec)],
|
|
850
|
+
r#"{"b":false}"#,
|
|
851
|
+
)
|
|
852
|
+
.unwrap()
|
|
853
|
+
.unwrap();
|
|
854
|
+
let v2: serde_json::Value = serde_json::from_str(&out2).unwrap();
|
|
855
|
+
assert!(
|
|
856
|
+
v2["b"].is_boolean(),
|
|
857
|
+
"unquoted 0 from int_range should coerce to bool at bool leaf, got {:?}",
|
|
858
|
+
v2["b"]
|
|
859
|
+
);
|
|
860
|
+
assert_eq!(v2["b"], false);
|
|
861
|
+
}
|
|
633
862
|
}
|
|
@@ -562,7 +562,11 @@ impl SqlStreamProcessor {
|
|
|
562
562
|
None => return Ok(None),
|
|
563
563
|
};
|
|
564
564
|
let specs: Vec<AnonymizerSpec> = json_owned.iter().map(|(_, s)| s.clone()).collect();
|
|
565
|
-
let out =
|
|
565
|
+
let Some(out) =
|
|
566
|
+
rewrite_json_paths_with_rules(&self.anonymizers, col_len, &json_owned, raw)?
|
|
567
|
+
else {
|
|
568
|
+
return Ok(None);
|
|
569
|
+
};
|
|
566
570
|
let repl = Replacement::quoted(out);
|
|
567
571
|
Ok(Some((repl, specs)))
|
|
568
572
|
}
|
|
@@ -2398,6 +2402,156 @@ COPY public.events (id, payload) FROM stdin;
|
|
|
2398
2402
|
);
|
|
2399
2403
|
}
|
|
2400
2404
|
|
|
2405
|
+
#[test]
|
|
2406
|
+
fn pipeline_json_path_rules_passthrough_non_json_cells() {
|
|
2407
|
+
let mut rules: HashMap<String, HashMap<String, AnonymizerSpec>> = HashMap::new();
|
|
2408
|
+
let mut cols: HashMap<String, AnonymizerSpec> = HashMap::new();
|
|
2409
|
+
cols.insert(
|
|
2410
|
+
"payload.profile.secret".to_string(),
|
|
2411
|
+
AnonymizerSpec {
|
|
2412
|
+
strategy: "string".to_string(),
|
|
2413
|
+
salt: None,
|
|
2414
|
+
min: None,
|
|
2415
|
+
max: None,
|
|
2416
|
+
length: Some(8),
|
|
2417
|
+
min_days: None,
|
|
2418
|
+
max_days: None,
|
|
2419
|
+
min_seconds: None,
|
|
2420
|
+
max_seconds: None,
|
|
2421
|
+
domain: Some("secrets".to_string()),
|
|
2422
|
+
unique_within_domain: None,
|
|
2423
|
+
as_string: Some(true),
|
|
2424
|
+
locale: None,
|
|
2425
|
+
faker: None,
|
|
2426
|
+
format: None,
|
|
2427
|
+
},
|
|
2428
|
+
);
|
|
2429
|
+
rules.insert("public.events".to_string(), cols);
|
|
2430
|
+
let cfg = ResolvedConfig {
|
|
2431
|
+
salt: None,
|
|
2432
|
+
rules,
|
|
2433
|
+
row_filters: HashMap::new(),
|
|
2434
|
+
column_cases: HashMap::new(),
|
|
2435
|
+
sensitive_columns: HashMap::new(),
|
|
2436
|
+
output_scan: crate::settings::OutputScanConfig::default(),
|
|
2437
|
+
source_path: None,
|
|
2438
|
+
};
|
|
2439
|
+
let reg = AnonymizerRegistry::from_config(&cfg);
|
|
2440
|
+
let mut proc =
|
|
2441
|
+
SqlStreamProcessor::new(reg, cfg, Vec::new(), Vec::new(), None, DumpFormat::Postgres);
|
|
2442
|
+
let input = r#"
|
|
2443
|
+
CREATE TABLE public.events (id int, payload jsonb);
|
|
2444
|
+
INSERT INTO public.events (id, payload) VALUES
|
|
2445
|
+
(1, '{not strict json}'),
|
|
2446
|
+
(2, '{"profile":{"tier":"gold","secret":"alpha"}}');
|
|
2447
|
+
|
|
2448
|
+
COPY public.events (id, payload) FROM stdin;
|
|
2449
|
+
3 {not strict json}
|
|
2450
|
+
4 {"profile":{"tier":"gold","secret":"alpha"}}
|
|
2451
|
+
\.
|
|
2452
|
+
"#;
|
|
2453
|
+
let mut reader = std::io::BufReader::new(input.as_bytes());
|
|
2454
|
+
let mut out = Vec::new();
|
|
2455
|
+
proc.process(&mut reader, &mut out).unwrap();
|
|
2456
|
+
let s = String::from_utf8(out).unwrap();
|
|
2457
|
+
assert!(
|
|
2458
|
+
s.contains("(1, '{not strict json}')"),
|
|
2459
|
+
"non-JSON INSERT cell should passthrough unchanged, got:\n{s}"
|
|
2460
|
+
);
|
|
2461
|
+
assert!(
|
|
2462
|
+
!s.contains("alpha"),
|
|
2463
|
+
"valid JSON INSERT row should still anonymize nested paths, got:\n{s}"
|
|
2464
|
+
);
|
|
2465
|
+
assert!(
|
|
2466
|
+
s.contains("\n3\t{not strict json}\n"),
|
|
2467
|
+
"non-JSON COPY cell should passthrough unchanged, got:\n{s}"
|
|
2468
|
+
);
|
|
2469
|
+
assert!(
|
|
2470
|
+
!s.contains("\n4\t{\"profile\":{\"tier\":\"gold\",\"secret\":\"alpha\"}}\n"),
|
|
2471
|
+
"valid JSON COPY row should anonymize nested secret, got:\n{s}"
|
|
2472
|
+
);
|
|
2473
|
+
}
|
|
2474
|
+
|
|
2475
|
+
#[test]
|
|
2476
|
+
fn pipeline_json_path_int_range_preserves_json_number_type() {
|
|
2477
|
+
let mut rules: HashMap<String, HashMap<String, AnonymizerSpec>> = HashMap::new();
|
|
2478
|
+
let mut cols: HashMap<String, AnonymizerSpec> = HashMap::new();
|
|
2479
|
+
cols.insert(
|
|
2480
|
+
"payload.score".to_string(),
|
|
2481
|
+
AnonymizerSpec {
|
|
2482
|
+
strategy: "int_range".to_string(),
|
|
2483
|
+
salt: None,
|
|
2484
|
+
min: Some(0),
|
|
2485
|
+
max: Some(100),
|
|
2486
|
+
length: None,
|
|
2487
|
+
min_days: None,
|
|
2488
|
+
max_days: None,
|
|
2489
|
+
min_seconds: None,
|
|
2490
|
+
max_seconds: None,
|
|
2491
|
+
domain: Some("pipeline_json_num".to_string()),
|
|
2492
|
+
unique_within_domain: None,
|
|
2493
|
+
as_string: None,
|
|
2494
|
+
locale: None,
|
|
2495
|
+
faker: None,
|
|
2496
|
+
format: None,
|
|
2497
|
+
},
|
|
2498
|
+
);
|
|
2499
|
+
rules.insert("public.events".to_string(), cols);
|
|
2500
|
+
let cfg = ResolvedConfig {
|
|
2501
|
+
salt: None,
|
|
2502
|
+
rules,
|
|
2503
|
+
row_filters: HashMap::new(),
|
|
2504
|
+
column_cases: HashMap::new(),
|
|
2505
|
+
sensitive_columns: HashMap::new(),
|
|
2506
|
+
output_scan: crate::settings::OutputScanConfig::default(),
|
|
2507
|
+
source_path: None,
|
|
2508
|
+
};
|
|
2509
|
+
let reg = AnonymizerRegistry::from_config(&cfg);
|
|
2510
|
+
let mut proc =
|
|
2511
|
+
SqlStreamProcessor::new(reg, cfg, Vec::new(), Vec::new(), None, DumpFormat::Postgres);
|
|
2512
|
+
let input = r#"
|
|
2513
|
+
CREATE TABLE public.events (id int, payload jsonb);
|
|
2514
|
+
INSERT INTO public.events (id, payload) VALUES
|
|
2515
|
+
(1, '{"score":42,"label":"x"}');
|
|
2516
|
+
|
|
2517
|
+
COPY public.events (id, payload) FROM stdin;
|
|
2518
|
+
2 {"score":42,"label":"x"}
|
|
2519
|
+
\.
|
|
2520
|
+
"#;
|
|
2521
|
+
let mut reader = std::io::BufReader::new(input.as_bytes());
|
|
2522
|
+
let mut out = Vec::new();
|
|
2523
|
+
proc.process(&mut reader, &mut out).unwrap();
|
|
2524
|
+
let s = String::from_utf8(out).unwrap();
|
|
2525
|
+
let insert_pos = s.find("INSERT INTO public.events").unwrap();
|
|
2526
|
+
let insert_tail = &s[insert_pos..];
|
|
2527
|
+
let insert_end = insert_tail.find(";\n").unwrap() + insert_pos;
|
|
2528
|
+
let ins_stmt = &s[insert_pos..=insert_end];
|
|
2529
|
+
let vals_idx = ins_stmt.to_uppercase().find("VALUES").unwrap();
|
|
2530
|
+
let ins_block = strip_trailing_semicolon(ins_stmt[vals_idx + "VALUES".len()..].trim());
|
|
2531
|
+
let ins_rows = parse_values_rows(ins_block).unwrap();
|
|
2532
|
+
let copy_line = s
|
|
2533
|
+
.lines()
|
|
2534
|
+
.find(|l| l.starts_with("2\t{"))
|
|
2535
|
+
.expect("expected COPY data row");
|
|
2536
|
+
let copy_json = copy_line.split_once('\t').unwrap().1;
|
|
2537
|
+
let v_ins =
|
|
2538
|
+
serde_json::from_str::<serde_json::Value>(ins_rows[0][1].original.as_ref().unwrap())
|
|
2539
|
+
.unwrap();
|
|
2540
|
+
let v_copy = serde_json::from_str::<serde_json::Value>(copy_json).unwrap();
|
|
2541
|
+
assert!(
|
|
2542
|
+
v_ins["score"].is_number(),
|
|
2543
|
+
"INSERT payload.score should remain JSON number, got {:?}",
|
|
2544
|
+
v_ins["score"]
|
|
2545
|
+
);
|
|
2546
|
+
assert!(
|
|
2547
|
+
v_copy["score"].is_number(),
|
|
2548
|
+
"COPY payload.score should remain JSON number, got {:?}",
|
|
2549
|
+
v_copy["score"]
|
|
2550
|
+
);
|
|
2551
|
+
assert_eq!(v_ins["score"], v_copy["score"]);
|
|
2552
|
+
assert_eq!(v_ins["label"], "x");
|
|
2553
|
+
}
|
|
2554
|
+
|
|
2401
2555
|
#[test]
|
|
2402
2556
|
fn parse_values_rows_tracks_trailing_cast_for_quoted_literals() {
|
|
2403
2557
|
let rows =
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|