dumpling-cli 0.4.1__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/CHANGELOG.md +8 -0
  2. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/Cargo.lock +1 -1
  3. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/Cargo.toml +1 -1
  4. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/PKG-INFO +1 -1
  5. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/pyproject.toml +1 -1
  6. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/filter.rs +234 -5
  7. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/sql.rs +155 -1
  8. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.dumplingconf.example +0 -0
  9. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/ci.yml +0 -0
  10. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/docs-pr.yml +0 -0
  11. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/docs.yml +0 -0
  12. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/platform-compat-latest.yml +0 -0
  13. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/platform-compat-matrix.yml +0 -0
  14. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/policy-lint.yml +0 -0
  15. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/publish.yml +0 -0
  16. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/release.yml +0 -0
  17. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.github/workflows/tests.yml +0 -0
  18. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/.gitignore +0 -0
  19. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/AGENTS.md +0 -0
  20. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/CONTRIBUTING.md +0 -0
  21. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/MAINTENANCE.md +0 -0
  22. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/README.md +0 -0
  23. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/assets/logo.svg +0 -0
  24. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/book.toml +0 -0
  25. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/datetime_out.sql +0 -0
  26. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/datetime_sample.sql +0 -0
  27. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/docs/src/SUMMARY.md +0 -0
  28. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/docs/src/ci-guardrails.md +0 -0
  29. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/docs/src/configuration.md +0 -0
  30. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/docs/src/getting-started.md +0 -0
  31. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/docs/src/index.md +0 -0
  32. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/docs/src/releasing.md +0 -0
  33. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/rust-toolchain.toml +0 -0
  34. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/scripts/setup-dev.sh +0 -0
  35. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/faker_dispatch.rs +0 -0
  36. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/lint.rs +0 -0
  37. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/main.rs +0 -0
  38. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/report.rs +0 -0
  39. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/scan.rs +0 -0
  40. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/settings.rs +0 -0
  41. {dumpling_cli-0.4.1 → dumpling_cli-0.4.2}/src/transform.rs +0 -0
@@ -7,6 +7,13 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.4.2] - 2026-05-03
11
+
12
+ ### Fixed
13
+
14
+ - **JSON path rules on non-JSON cells**: Path-based `[rules]` anonymization is skipped when the cell is not strict JSON, leaving the original value unchanged (consistent with row-filter JSON path behavior).
15
+ - **JSON scalar types in path-based anonymization**: Replacements at JSON paths preserve number and boolean leaf types where possible (numeric and boolean coercion from generated text).
16
+
10
17
  ## [0.4.1] - 2026-05-03
11
18
 
12
19
  ### Fixed
@@ -61,6 +68,7 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
61
68
  - Configurable output scan severities and per-category thresholds via `[output_scan]`.
62
69
  - JSON report section for output scan findings including category, count, threshold, severity, and sample locations.
63
70
 
71
+ [0.4.2]: https://github.com/ababic/dumpling/compare/v0.4.1...v0.4.2
64
72
  [0.4.1]: https://github.com/ababic/dumpling/compare/v0.4.0...v0.4.1
65
73
  [0.4.0]: https://github.com/ababic/dumpling/compare/v0.3.0...v0.4.0
66
74
  [0.3.0]: https://github.com/ababic/dumpling/compare/v0.2.0...v0.3.0
@@ -262,7 +262,7 @@ dependencies = [
262
262
 
263
263
  [[package]]
264
264
  name = "dumpling"
265
- version = "0.4.1"
265
+ version = "0.4.2"
266
266
  dependencies = [
267
267
  "anyhow",
268
268
  "chrono",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "dumpling"
3
- version = "0.4.1"
3
+ version = "0.4.2"
4
4
  edition = "2021"
5
5
  readme = "README.md"
6
6
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dumpling-cli
3
- Version: 0.4.1
3
+ Version: 0.4.2
4
4
  Classifier: Development Status :: 4 - Beta
5
5
  Classifier: Environment :: Console
6
6
  Classifier: Intended Audience :: Developers
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "dumpling-cli"
7
- version = "0.4.1"
7
+ version = "0.4.2"
8
8
  description = "Static anonymizer for plain SQL dumps (PostgreSQL, SQLite, SQL Server)."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -224,25 +224,118 @@ fn replacement_to_json_value(repl: &Replacement) -> serde_json::Value {
224
224
  .unwrap_or_else(|_| serde_json::Value::String(repl.value.clone()))
225
225
  }
226
226
 
227
+ /// When rewriting JSON at a path, map `Replacement` back into [`serde_json::Value`] while keeping
228
+ /// the leaf's JSON type when the strategy still returns text (e.g. `Replacement::quoted` for
229
+ /// `string`, `hash`, etc.): numeric and boolean leaves stay JSON numbers/bools if the replacement
230
+ /// text parses as such.
231
+ fn coerce_json_path_replacement(
232
+ original: &serde_json::Value,
233
+ repl: &Replacement,
234
+ ) -> serde_json::Value {
235
+ if repl.is_null {
236
+ return serde_json::Value::Null;
237
+ }
238
+ match original {
239
+ serde_json::Value::Bool(_) => {
240
+ if let Some(b) = parse_loose_json_bool(&repl.value) {
241
+ return serde_json::Value::Bool(b);
242
+ }
243
+ if !repl.force_quoted {
244
+ if let Ok(v) = serde_json::from_str::<serde_json::Value>(&repl.value) {
245
+ match v {
246
+ serde_json::Value::Bool(b) => return serde_json::Value::Bool(b),
247
+ serde_json::Value::Number(n) => {
248
+ if n.as_u64() == Some(0) || n.as_i64() == Some(0) {
249
+ return serde_json::Value::Bool(false);
250
+ }
251
+ if n.as_u64() == Some(1) || n.as_i64() == Some(1) {
252
+ return serde_json::Value::Bool(true);
253
+ }
254
+ }
255
+ _ => {}
256
+ }
257
+ }
258
+ }
259
+ serde_json::Value::String(repl.value.clone())
260
+ }
261
+ serde_json::Value::Number(_) => {
262
+ if let Some(n) = parse_loose_json_number(&repl.value) {
263
+ return serde_json::Value::Number(n);
264
+ }
265
+ if !repl.force_quoted {
266
+ if let Ok(serde_json::Value::Number(n)) =
267
+ serde_json::from_str::<serde_json::Value>(&repl.value)
268
+ {
269
+ return serde_json::Value::Number(n);
270
+ }
271
+ }
272
+ serde_json::Value::String(repl.value.clone())
273
+ }
274
+ serde_json::Value::String(_) => {
275
+ if repl.force_quoted {
276
+ serde_json::Value::String(repl.value.clone())
277
+ } else {
278
+ serde_json::from_str(&repl.value)
279
+ .unwrap_or_else(|_| serde_json::Value::String(repl.value.clone()))
280
+ }
281
+ }
282
+ serde_json::Value::Null => replacement_to_json_value(repl),
283
+ serde_json::Value::Array(_) | serde_json::Value::Object(_) => {
284
+ replacement_to_json_value(repl)
285
+ }
286
+ }
287
+ }
288
+
289
+ fn parse_loose_json_bool(s: &str) -> Option<bool> {
290
+ match s.trim().to_ascii_lowercase().as_str() {
291
+ "true" => Some(true),
292
+ "false" => Some(false),
293
+ _ => None,
294
+ }
295
+ }
296
+
297
+ fn parse_loose_json_number(s: &str) -> Option<serde_json::Number> {
298
+ let t = s.trim();
299
+ if t.is_empty() {
300
+ return None;
301
+ }
302
+ if let Ok(i) = t.parse::<i64>() {
303
+ return Some(i.into());
304
+ }
305
+ if let Ok(u) = t.parse::<u64>() {
306
+ return Some(u.into());
307
+ }
308
+ let f = t.parse::<f64>().ok()?;
309
+ serde_json::Number::from_f64(f)
310
+ }
311
+
227
312
  fn apply_leaf_replacement(target: &mut serde_json::Value, repl: &Replacement) {
228
- *target = replacement_to_json_value(repl);
313
+ let original = target.clone();
314
+ *target = coerce_json_path_replacement(&original, repl);
229
315
  }
230
316
 
231
317
  /// Mutate JSON document strings at configured paths using the same path semantics as predicates.
318
+ ///
319
+ /// Returns [`None`] when `raw_json` is not valid strict JSON (same tolerance as row-filter JSON
320
+ /// path extraction): path rules are skipped for that cell and callers should passthrough the
321
+ /// original value unchanged.
232
322
  pub fn rewrite_json_paths_with_rules(
233
323
  registry: &AnonymizerRegistry,
234
324
  column_max_len: Option<usize>,
235
325
  json_rules: &[(Vec<String>, AnonymizerSpec)],
236
326
  raw_json: &str,
237
- ) -> anyhow::Result<String> {
238
- let mut root = serde_json::from_str::<serde_json::Value>(raw_json)?;
327
+ ) -> anyhow::Result<Option<String>> {
328
+ let mut root = match serde_json::from_str::<serde_json::Value>(raw_json) {
329
+ Ok(v) => v,
330
+ Err(_) => return Ok(None),
331
+ };
239
332
  for (path, spec) in json_rules {
240
333
  let mut apply = |original_cell: Option<String>| {
241
334
  apply_anonymizer(registry, spec, original_cell.as_deref(), column_max_len)
242
335
  };
243
336
  mutate_json_at_path(&mut root, path, &mut apply)?;
244
337
  }
245
- Ok(root.to_string())
338
+ Ok(Some(root.to_string()))
246
339
  }
247
340
 
248
341
  fn mutate_json_at_path<F>(
@@ -457,7 +550,8 @@ fn get_cached_regex(pat: &str, case_insensitive: bool) -> regex::Regex {
457
550
  #[cfg(test)]
458
551
  mod tests {
459
552
  use super::*;
460
- use crate::settings::{ResolvedConfig, RowFilterSet};
553
+ use crate::settings::{AnonymizerSpec, ResolvedConfig, RowFilterSet};
554
+ use crate::transform::AnonymizerRegistry;
461
555
  use std::collections::HashMap;
462
556
 
463
557
  #[test]
@@ -630,4 +724,139 @@ mod tests {
630
724
  &[Some(r#"{"items":[{"kind":"secondary"}]}"#.to_string())]
631
725
  ));
632
726
  }
727
+
728
+ #[test]
729
+ fn rewrite_json_paths_skips_non_json_cells_like_row_filters() {
730
+ let mut rules: HashMap<String, HashMap<String, AnonymizerSpec>> = HashMap::new();
731
+ let spec = AnonymizerSpec {
732
+ strategy: "string".to_string(),
733
+ salt: None,
734
+ min: None,
735
+ max: None,
736
+ length: Some(4),
737
+ min_days: None,
738
+ max_days: None,
739
+ min_seconds: None,
740
+ max_seconds: None,
741
+ domain: None,
742
+ unique_within_domain: None,
743
+ as_string: Some(true),
744
+ locale: None,
745
+ faker: None,
746
+ format: None,
747
+ };
748
+ rules.insert("public.t".to_string(), HashMap::new());
749
+ let cfg = ResolvedConfig {
750
+ salt: None,
751
+ rules,
752
+ row_filters: HashMap::new(),
753
+ column_cases: HashMap::new(),
754
+ sensitive_columns: HashMap::new(),
755
+ output_scan: crate::settings::OutputScanConfig::default(),
756
+ source_path: None,
757
+ };
758
+ let registry = AnonymizerRegistry::from_config(&cfg);
759
+ let json_rules: Vec<(Vec<String>, AnonymizerSpec)> = vec![(
760
+ vec!["profile".to_string(), "secret".to_string()],
761
+ spec.clone(),
762
+ )];
763
+ assert!(
764
+ rewrite_json_paths_with_rules(&registry, None, &json_rules, "{not json")
765
+ .unwrap()
766
+ .is_none()
767
+ );
768
+ let out = rewrite_json_paths_with_rules(
769
+ &registry,
770
+ None,
771
+ &json_rules,
772
+ r#"{"profile":{"secret":"x"}}"#,
773
+ )
774
+ .unwrap()
775
+ .expect("valid JSON should rewrite");
776
+ let v: serde_json::Value = serde_json::from_str(&out).unwrap();
777
+ assert_ne!(v["profile"]["secret"], "x");
778
+ }
779
+
780
+ #[test]
781
+ fn rewrite_json_paths_preserves_number_and_bool_leaf_types_for_quoted_replacements() {
782
+ let mut rules: HashMap<String, HashMap<String, AnonymizerSpec>> = HashMap::new();
783
+ rules.insert("public.t".to_string(), HashMap::new());
784
+ let cfg = ResolvedConfig {
785
+ salt: None,
786
+ rules,
787
+ row_filters: HashMap::new(),
788
+ column_cases: HashMap::new(),
789
+ sensitive_columns: HashMap::new(),
790
+ output_scan: crate::settings::OutputScanConfig::default(),
791
+ source_path: None,
792
+ };
793
+ let registry = AnonymizerRegistry::from_config(&cfg);
794
+
795
+ let int_spec = AnonymizerSpec {
796
+ strategy: "int_range".to_string(),
797
+ salt: None,
798
+ min: Some(0),
799
+ max: Some(9),
800
+ length: None,
801
+ min_days: None,
802
+ max_days: None,
803
+ min_seconds: None,
804
+ max_seconds: None,
805
+ domain: Some("coerce_int_leaf".to_string()),
806
+ unique_within_domain: None,
807
+ as_string: None,
808
+ locale: None,
809
+ faker: None,
810
+ format: None,
811
+ };
812
+ let out = rewrite_json_paths_with_rules(
813
+ &registry,
814
+ None,
815
+ &[(vec!["n".to_string()], int_spec)],
816
+ r#"{"n":1,"b":true,"s":"x"}"#,
817
+ )
818
+ .unwrap()
819
+ .unwrap();
820
+ let v: serde_json::Value = serde_json::from_str(&out).unwrap();
821
+ assert!(
822
+ v["n"].is_number(),
823
+ "int_range replacement should stay JSON number, got {:?}",
824
+ v["n"]
825
+ );
826
+ assert_eq!(v["b"], true);
827
+ assert_eq!(v["s"], "x");
828
+
829
+ let string_spec = AnonymizerSpec {
830
+ strategy: "int_range".to_string(),
831
+ salt: None,
832
+ min: Some(0),
833
+ max: Some(0),
834
+ length: None,
835
+ min_days: None,
836
+ max_days: None,
837
+ min_seconds: None,
838
+ max_seconds: None,
839
+ domain: Some("coerce_bool_leaf".to_string()),
840
+ unique_within_domain: None,
841
+ as_string: None,
842
+ locale: None,
843
+ faker: None,
844
+ format: None,
845
+ };
846
+ let out2 = rewrite_json_paths_with_rules(
847
+ &registry,
848
+ None,
849
+ &[(vec!["b".to_string()], string_spec)],
850
+ r#"{"b":false}"#,
851
+ )
852
+ .unwrap()
853
+ .unwrap();
854
+ let v2: serde_json::Value = serde_json::from_str(&out2).unwrap();
855
+ assert!(
856
+ v2["b"].is_boolean(),
857
+ "unquoted 0 from int_range should coerce to bool at bool leaf, got {:?}",
858
+ v2["b"]
859
+ );
860
+ assert_eq!(v2["b"], false);
861
+ }
633
862
  }
@@ -562,7 +562,11 @@ impl SqlStreamProcessor {
562
562
  None => return Ok(None),
563
563
  };
564
564
  let specs: Vec<AnonymizerSpec> = json_owned.iter().map(|(_, s)| s.clone()).collect();
565
- let out = rewrite_json_paths_with_rules(&self.anonymizers, col_len, &json_owned, raw)?;
565
+ let Some(out) =
566
+ rewrite_json_paths_with_rules(&self.anonymizers, col_len, &json_owned, raw)?
567
+ else {
568
+ return Ok(None);
569
+ };
566
570
  let repl = Replacement::quoted(out);
567
571
  Ok(Some((repl, specs)))
568
572
  }
@@ -2398,6 +2402,156 @@ COPY public.events (id, payload) FROM stdin;
2398
2402
  );
2399
2403
  }
2400
2404
 
2405
+ #[test]
2406
+ fn pipeline_json_path_rules_passthrough_non_json_cells() {
2407
+ let mut rules: HashMap<String, HashMap<String, AnonymizerSpec>> = HashMap::new();
2408
+ let mut cols: HashMap<String, AnonymizerSpec> = HashMap::new();
2409
+ cols.insert(
2410
+ "payload.profile.secret".to_string(),
2411
+ AnonymizerSpec {
2412
+ strategy: "string".to_string(),
2413
+ salt: None,
2414
+ min: None,
2415
+ max: None,
2416
+ length: Some(8),
2417
+ min_days: None,
2418
+ max_days: None,
2419
+ min_seconds: None,
2420
+ max_seconds: None,
2421
+ domain: Some("secrets".to_string()),
2422
+ unique_within_domain: None,
2423
+ as_string: Some(true),
2424
+ locale: None,
2425
+ faker: None,
2426
+ format: None,
2427
+ },
2428
+ );
2429
+ rules.insert("public.events".to_string(), cols);
2430
+ let cfg = ResolvedConfig {
2431
+ salt: None,
2432
+ rules,
2433
+ row_filters: HashMap::new(),
2434
+ column_cases: HashMap::new(),
2435
+ sensitive_columns: HashMap::new(),
2436
+ output_scan: crate::settings::OutputScanConfig::default(),
2437
+ source_path: None,
2438
+ };
2439
+ let reg = AnonymizerRegistry::from_config(&cfg);
2440
+ let mut proc =
2441
+ SqlStreamProcessor::new(reg, cfg, Vec::new(), Vec::new(), None, DumpFormat::Postgres);
2442
+ let input = r#"
2443
+ CREATE TABLE public.events (id int, payload jsonb);
2444
+ INSERT INTO public.events (id, payload) VALUES
2445
+ (1, '{not strict json}'),
2446
+ (2, '{"profile":{"tier":"gold","secret":"alpha"}}');
2447
+
2448
+ COPY public.events (id, payload) FROM stdin;
2449
+ 3 {not strict json}
2450
+ 4 {"profile":{"tier":"gold","secret":"alpha"}}
2451
+ \.
2452
+ "#;
2453
+ let mut reader = std::io::BufReader::new(input.as_bytes());
2454
+ let mut out = Vec::new();
2455
+ proc.process(&mut reader, &mut out).unwrap();
2456
+ let s = String::from_utf8(out).unwrap();
2457
+ assert!(
2458
+ s.contains("(1, '{not strict json}')"),
2459
+ "non-JSON INSERT cell should passthrough unchanged, got:\n{s}"
2460
+ );
2461
+ assert!(
2462
+ !s.contains("alpha"),
2463
+ "valid JSON INSERT row should still anonymize nested paths, got:\n{s}"
2464
+ );
2465
+ assert!(
2466
+ s.contains("\n3\t{not strict json}\n"),
2467
+ "non-JSON COPY cell should passthrough unchanged, got:\n{s}"
2468
+ );
2469
+ assert!(
2470
+ !s.contains("\n4\t{\"profile\":{\"tier\":\"gold\",\"secret\":\"alpha\"}}\n"),
2471
+ "valid JSON COPY row should anonymize nested secret, got:\n{s}"
2472
+ );
2473
+ }
2474
+
2475
+ #[test]
2476
+ fn pipeline_json_path_int_range_preserves_json_number_type() {
2477
+ let mut rules: HashMap<String, HashMap<String, AnonymizerSpec>> = HashMap::new();
2478
+ let mut cols: HashMap<String, AnonymizerSpec> = HashMap::new();
2479
+ cols.insert(
2480
+ "payload.score".to_string(),
2481
+ AnonymizerSpec {
2482
+ strategy: "int_range".to_string(),
2483
+ salt: None,
2484
+ min: Some(0),
2485
+ max: Some(100),
2486
+ length: None,
2487
+ min_days: None,
2488
+ max_days: None,
2489
+ min_seconds: None,
2490
+ max_seconds: None,
2491
+ domain: Some("pipeline_json_num".to_string()),
2492
+ unique_within_domain: None,
2493
+ as_string: None,
2494
+ locale: None,
2495
+ faker: None,
2496
+ format: None,
2497
+ },
2498
+ );
2499
+ rules.insert("public.events".to_string(), cols);
2500
+ let cfg = ResolvedConfig {
2501
+ salt: None,
2502
+ rules,
2503
+ row_filters: HashMap::new(),
2504
+ column_cases: HashMap::new(),
2505
+ sensitive_columns: HashMap::new(),
2506
+ output_scan: crate::settings::OutputScanConfig::default(),
2507
+ source_path: None,
2508
+ };
2509
+ let reg = AnonymizerRegistry::from_config(&cfg);
2510
+ let mut proc =
2511
+ SqlStreamProcessor::new(reg, cfg, Vec::new(), Vec::new(), None, DumpFormat::Postgres);
2512
+ let input = r#"
2513
+ CREATE TABLE public.events (id int, payload jsonb);
2514
+ INSERT INTO public.events (id, payload) VALUES
2515
+ (1, '{"score":42,"label":"x"}');
2516
+
2517
+ COPY public.events (id, payload) FROM stdin;
2518
+ 2 {"score":42,"label":"x"}
2519
+ \.
2520
+ "#;
2521
+ let mut reader = std::io::BufReader::new(input.as_bytes());
2522
+ let mut out = Vec::new();
2523
+ proc.process(&mut reader, &mut out).unwrap();
2524
+ let s = String::from_utf8(out).unwrap();
2525
+ let insert_pos = s.find("INSERT INTO public.events").unwrap();
2526
+ let insert_tail = &s[insert_pos..];
2527
+ let insert_end = insert_tail.find(";\n").unwrap() + insert_pos;
2528
+ let ins_stmt = &s[insert_pos..=insert_end];
2529
+ let vals_idx = ins_stmt.to_uppercase().find("VALUES").unwrap();
2530
+ let ins_block = strip_trailing_semicolon(ins_stmt[vals_idx + "VALUES".len()..].trim());
2531
+ let ins_rows = parse_values_rows(ins_block).unwrap();
2532
+ let copy_line = s
2533
+ .lines()
2534
+ .find(|l| l.starts_with("2\t{"))
2535
+ .expect("expected COPY data row");
2536
+ let copy_json = copy_line.split_once('\t').unwrap().1;
2537
+ let v_ins =
2538
+ serde_json::from_str::<serde_json::Value>(ins_rows[0][1].original.as_ref().unwrap())
2539
+ .unwrap();
2540
+ let v_copy = serde_json::from_str::<serde_json::Value>(copy_json).unwrap();
2541
+ assert!(
2542
+ v_ins["score"].is_number(),
2543
+ "INSERT payload.score should remain JSON number, got {:?}",
2544
+ v_ins["score"]
2545
+ );
2546
+ assert!(
2547
+ v_copy["score"].is_number(),
2548
+ "COPY payload.score should remain JSON number, got {:?}",
2549
+ v_copy["score"]
2550
+ );
2551
+ assert_eq!(v_ins["score"], v_copy["score"]);
2552
+ assert_eq!(v_ins["label"], "x");
2553
+ }
2554
+
2401
2555
  #[test]
2402
2556
  fn parse_values_rows_tracks_trailing_cast_for_quoted_literals() {
2403
2557
  let rows =
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes