pg_dump_anonymize 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 56f41e435e9420140a7969c4d536a6a517836ca5d8d06e5be8bfca127fc939f0
4
- data.tar.gz: 016d7a8b99e9f67046e7192856a13eb9bcf5ab018ce7cd61a00ed5b3375783b0
3
+ metadata.gz: ee33e3ece88467be6ffb983cb2ff09f4f4097055540ec045dbc5257d6d44c442
4
+ data.tar.gz: b2d7472cd7b919a2c0b277a77d455a10f904d22cc8a9f5ebda44b7d2aa81553e
5
5
  SHA512:
6
- metadata.gz: a69b3a188689de3d2636a28ee45e7b7902f5ebea3292f675ff7283c834ff67b6c3550fb3b979bfee4487197eb20c5d71773b92b255023476663824b7aede7ceb
7
- data.tar.gz: e04e32850bb4ad0ea99e426e5e6fb222773ea570cac6b305527fe567b6bc80e6db81158a5bcc6ce9b05c49d5b224bf8f87fa4ba2624cf5425f34e277145450a4
6
+ metadata.gz: d8867ff2d50cfff7b2aaf4bca2ab1235727c5ad47f5f91bb3e1482680080de5e7b61f462eaaf7f27d8d86c18b12af410bef175e4e029b8294dc2399a5aff2d59
7
+ data.tar.gz: ab07bff2fa4502d8a5c6805ae7e1c2162d68cd927cccc51542ea91088974e8625a65bd1c81a90b179f60f1280c52d5458509de132428df8c6d559b6243b9aea9
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- pg_dump_anonymize (0.1.1)
4
+ pg_dump_anonymize (0.1.2)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -13,6 +13,10 @@ module PgDumpAnonymize
13
13
  if @current_table
14
14
  if end_stdin?(line)
15
15
  clear_current_table
16
+ elsif skip?(line)
17
+ # do nothing
18
+ elsif delete?(line)
19
+ line = ''
16
20
  else
17
21
  line = anonymize_line(line)
18
22
  end
@@ -27,13 +31,16 @@ module PgDumpAnonymize
27
31
  # This assumes the line is a tab delimited data line
28
32
  def anonymize_line(line)
29
33
  values = line.split("\t")
30
- row_context = {} # used to share state for a row
34
+ row_context = { row: row_to_hash(values) } # used to share state for a row
31
35
  @positional_substitutions.each do |index, val_def|
32
36
  values[index] = if val_def.is_a?(Proc)
33
37
  val_def.call(*[values[index], row_context].slice(0, val_def.arity))
34
38
  else
35
39
  val_def
36
40
  end
41
+
42
+ # Postgres represents nil/null as '\N' in SQL dumps
43
+ values[index] = '\N' if values[index].nil?
37
44
  end
38
45
  values.join("\t")
39
46
  end
@@ -52,10 +59,10 @@ module PgDumpAnonymize
52
59
  # Finds the positional range of the attribute to be replaced
53
60
  # returns an array of arrays. The inner array is [<field_index>, <anonymous_value>]
54
61
  def find_positions(fields_str, rules)
55
- fields = fields_str.gsub('"', '').split(', ')
62
+ @fields = fields_str.gsub('"', '').split(', ')
56
63
 
57
64
  rules.map do |target_field, val|
58
- index = fields.index(target_field.to_s)
65
+ index = @fields.index(target_field.to_s)
59
66
  [index, val] if index
60
67
  end.compact
61
68
  end
@@ -75,7 +82,36 @@ module PgDumpAnonymize
75
82
 
76
83
  def clear_current_table
77
84
  @current_table = nil
85
+ @fields = nil
78
86
  @positional_substitutions = nil
79
87
  end
88
+
89
+ def skip?(row)
90
+ if (skip_if = @attribute_rules.dig(@current_table, :_skip_if))
91
+ !!skip_if.call(row_to_hash(row))
92
+ else
93
+ false
94
+ end
95
+ end
96
+
97
+ def delete?(row)
98
+ if (delete_if = @attribute_rules.dig(@current_table, :_delete_if))
99
+ !!delete_if.call(row_to_hash(row))
100
+ else
101
+ false
102
+ end
103
+ end
104
+
105
+ def row_to_hash(row)
106
+ return nil unless @fields
107
+
108
+ values = row.is_a?(String) ? row.split("\t") : row
109
+
110
+ begin
111
+ Hash[*@fields.zip(values).flatten]
112
+ rescue StandardError => e
113
+ raise "#{e.message}, row_to_hash error encountered: current_table: #{@current_table} -- fields(#{@fields&.length}): #{@fields} -- values(#{values&.length}): #{values}"
114
+ end
115
+ end
80
116
  end
81
117
  end
@@ -1,3 +1,3 @@
1
1
  module PgDumpAnonymize
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pg_dump_anonymize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sean McCleary
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-12-30 00:00:00.000000000 Z
11
+ date: 2021-01-07 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Given the default pg_dump text SQL dump, this can take a simple definition
14
14
  of tables and fields to anonymize and efficiently anonymize the dump.