pg_dump_anonymize 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 56f41e435e9420140a7969c4d536a6a517836ca5d8d06e5be8bfca127fc939f0
4
- data.tar.gz: 016d7a8b99e9f67046e7192856a13eb9bcf5ab018ce7cd61a00ed5b3375783b0
3
+ metadata.gz: ee33e3ece88467be6ffb983cb2ff09f4f4097055540ec045dbc5257d6d44c442
4
+ data.tar.gz: b2d7472cd7b919a2c0b277a77d455a10f904d22cc8a9f5ebda44b7d2aa81553e
5
5
  SHA512:
6
- metadata.gz: a69b3a188689de3d2636a28ee45e7b7902f5ebea3292f675ff7283c834ff67b6c3550fb3b979bfee4487197eb20c5d71773b92b255023476663824b7aede7ceb
7
- data.tar.gz: e04e32850bb4ad0ea99e426e5e6fb222773ea570cac6b305527fe567b6bc80e6db81158a5bcc6ce9b05c49d5b224bf8f87fa4ba2624cf5425f34e277145450a4
6
+ metadata.gz: d8867ff2d50cfff7b2aaf4bca2ab1235727c5ad47f5f91bb3e1482680080de5e7b61f462eaaf7f27d8d86c18b12af410bef175e4e029b8294dc2399a5aff2d59
7
+ data.tar.gz: ab07bff2fa4502d8a5c6805ae7e1c2162d68cd927cccc51542ea91088974e8625a65bd1c81a90b179f60f1280c52d5458509de132428df8c6d559b6243b9aea9
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- pg_dump_anonymize (0.1.1)
4
+ pg_dump_anonymize (0.1.2)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -13,6 +13,10 @@ module PgDumpAnonymize
13
13
  if @current_table
14
14
  if end_stdin?(line)
15
15
  clear_current_table
16
+ elsif skip?(line)
17
+ # do nothing
18
+ elsif delete?(line)
19
+ line = ''
16
20
  else
17
21
  line = anonymize_line(line)
18
22
  end
@@ -27,13 +31,16 @@ module PgDumpAnonymize
27
31
  # This assumes the line is a tab delimited data line
28
32
  def anonymize_line(line)
29
33
  values = line.split("\t")
30
- row_context = {} # used to share state for a row
34
+ row_context = { row: row_to_hash(values) } # used to share state for a row
31
35
  @positional_substitutions.each do |index, val_def|
32
36
  values[index] = if val_def.is_a?(Proc)
33
37
  val_def.call(*[values[index], row_context].slice(0, val_def.arity))
34
38
  else
35
39
  val_def
36
40
  end
41
+
42
+ # Postgres represents nil/null as '\N' in SQL dumps
43
+ values[index] = '\N' if values[index].nil?
37
44
  end
38
45
  values.join("\t")
39
46
  end
@@ -52,10 +59,10 @@ module PgDumpAnonymize
52
59
  # Finds the positional range of the attribute to be replaced
53
60
  # returns an array of arrays. The inner array is [<field_index>, <anonymous_value>]
54
61
  def find_positions(fields_str, rules)
55
- fields = fields_str.gsub('"', '').split(', ')
62
+ @fields = fields_str.gsub('"', '').split(', ')
56
63
 
57
64
  rules.map do |target_field, val|
58
- index = fields.index(target_field.to_s)
65
+ index = @fields.index(target_field.to_s)
59
66
  [index, val] if index
60
67
  end.compact
61
68
  end
@@ -75,7 +82,36 @@ module PgDumpAnonymize
75
82
 
76
83
  def clear_current_table
77
84
  @current_table = nil
85
+ @fields = nil
78
86
  @positional_substitutions = nil
79
87
  end
88
+
89
+ def skip?(row)
90
+ if (skip_if = @attribute_rules.dig(@current_table, :_skip_if))
91
+ !!skip_if.call(row_to_hash(row))
92
+ else
93
+ false
94
+ end
95
+ end
96
+
97
+ def delete?(row)
98
+ if (delete_if = @attribute_rules.dig(@current_table, :_delete_if))
99
+ !!delete_if.call(row_to_hash(row))
100
+ else
101
+ false
102
+ end
103
+ end
104
+
105
+ def row_to_hash(row)
106
+ return nil unless @fields
107
+
108
+ values = row.is_a?(String) ? row.split("\t") : row
109
+
110
+ begin
111
+ Hash[*@fields.zip(values).flatten]
112
+ rescue StandardError => e
113
+ raise "#{e.message}, row_to_hash error encountered: current_table: #{@current_table} -- fields(#{@fields&.length}): #{@fields} -- values(#{values&.length}): #{values}"
114
+ end
115
+ end
80
116
  end
81
117
  end
@@ -1,3 +1,3 @@
1
1
  module PgDumpAnonymize
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pg_dump_anonymize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sean McCleary
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-12-30 00:00:00.000000000 Z
11
+ date: 2021-01-07 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Given the default pg_dump text SQL dump, this can take a simple definition
14
14
  of tables and fields to anonymize and efficiently anonymize the dump.